1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/param.h> 33 #include <sys/devctl.h> 34 #include <sys/malloc.h> 35 #include <sys/queue.h> 36 #include <sys/refcount.h> 37 #include <sys/jail.h> 38 #include <sys/kernel.h> 39 #include <sys/limits.h> 40 #include <sys/loginclass.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/racct.h> 44 #include <sys/rctl.h> 45 #include <sys/resourcevar.h> 46 #include <sys/sx.h> 47 #include <sys/sysproto.h> 48 #include <sys/systm.h> 49 #include <sys/types.h> 50 #include <sys/eventhandler.h> 51 #include <sys/lock.h> 52 #include <sys/mutex.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/taskqueue.h> 56 #include <sys/tree.h> 57 #include <vm/uma.h> 58 59 #ifdef RCTL 60 #ifndef RACCT 61 #error "The RCTL option requires the RACCT option" 62 #endif 63 64 FEATURE(rctl, "Resource Limits"); 65 66 #define HRF_DEFAULT 0 67 #define HRF_DONT_INHERIT 1 68 #define HRF_DONT_ACCUMULATE 2 69 70 #define RCTL_MAX_INBUFSIZE 4 * 1024 71 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 72 #define RCTL_LOG_BUFSIZE 128 73 74 #define RCTL_PCPU_SHIFT (10 * 1000000) 75 76 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 77 static int rctl_log_rate_limit = 10; 78 static int rctl_devctl_rate_limit = 10; 79 80 /* 81 * Values below are initialized in rctl_init(). 82 */ 83 static int rctl_throttle_min = -1; 84 static int rctl_throttle_max = -1; 85 static int rctl_throttle_pct = -1; 86 static int rctl_throttle_pct2 = -1; 87 88 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 89 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 90 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 91 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 92 93 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 94 "Resource Limits"); 95 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 96 &rctl_maxbufsize, 0, "Maximum output buffer size"); 97 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 98 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 99 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 100 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 101 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 102 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 103 &rctl_throttle_min_sysctl, "IU", 104 "Shortest throttling duration, in hz"); 105 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 107 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 108 &rctl_throttle_max_sysctl, "IU", 109 "Longest throttling duration, in hz"); 110 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 111 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 112 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 113 &rctl_throttle_pct_sysctl, "IU", 114 "Throttling penalty for process consumption, in percent"); 115 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 117 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 118 &rctl_throttle_pct2_sysctl, "IU", 119 "Throttling penalty for container consumption, in percent"); 120 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 121 122 /* 123 * 'rctl_rule_link' connects a rule with every racct it's related to. 124 * For example, rule 'user:X:openfiles:deny=N/process' is linked 125 * with uidinfo for user X, and to each process of that user. 126 */ 127 struct rctl_rule_link { 128 LIST_ENTRY(rctl_rule_link) rrl_next; 129 struct rctl_rule *rrl_rule; 130 int rrl_exceeded; 131 }; 132 133 struct dict { 134 const char *d_name; 135 int d_value; 136 }; 137 138 static struct dict subjectnames[] = { 139 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 140 { "user", RCTL_SUBJECT_TYPE_USER }, 141 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 142 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 143 { NULL, -1 }}; 144 145 static struct dict resourcenames[] = { 146 { "cputime", RACCT_CPU }, 147 { "datasize", RACCT_DATA }, 148 { "stacksize", RACCT_STACK }, 149 { "coredumpsize", RACCT_CORE }, 150 { "memoryuse", RACCT_RSS }, 151 { "memorylocked", RACCT_MEMLOCK }, 152 { "maxproc", RACCT_NPROC }, 153 { "openfiles", RACCT_NOFILE }, 154 { "vmemoryuse", RACCT_VMEM }, 155 { "pseudoterminals", RACCT_NPTS }, 156 { "swapuse", RACCT_SWAP }, 157 { "nthr", RACCT_NTHR }, 158 { "msgqqueued", RACCT_MSGQQUEUED }, 159 { "msgqsize", RACCT_MSGQSIZE }, 160 { "nmsgq", RACCT_NMSGQ }, 161 { "nsem", RACCT_NSEM }, 162 { "nsemop", RACCT_NSEMOP }, 163 { "nshm", RACCT_NSHM }, 164 { "shmsize", RACCT_SHMSIZE }, 165 { "wallclock", RACCT_WALLCLOCK }, 166 { "pcpu", RACCT_PCTCPU }, 167 { "readbps", RACCT_READBPS }, 168 { "writebps", RACCT_WRITEBPS }, 169 { "readiops", RACCT_READIOPS }, 170 { "writeiops", RACCT_WRITEIOPS }, 171 { NULL, -1 }}; 172 173 static struct dict actionnames[] = { 174 { "sighup", RCTL_ACTION_SIGHUP }, 175 { "sigint", RCTL_ACTION_SIGINT }, 176 { "sigquit", RCTL_ACTION_SIGQUIT }, 177 { "sigill", RCTL_ACTION_SIGILL }, 178 { "sigtrap", RCTL_ACTION_SIGTRAP }, 179 { "sigabrt", RCTL_ACTION_SIGABRT }, 180 { "sigemt", RCTL_ACTION_SIGEMT }, 181 { "sigfpe", RCTL_ACTION_SIGFPE }, 182 { "sigkill", RCTL_ACTION_SIGKILL }, 183 { "sigbus", RCTL_ACTION_SIGBUS }, 184 { "sigsegv", RCTL_ACTION_SIGSEGV }, 185 { "sigsys", RCTL_ACTION_SIGSYS }, 186 { "sigpipe", RCTL_ACTION_SIGPIPE }, 187 { "sigalrm", RCTL_ACTION_SIGALRM }, 188 { "sigterm", RCTL_ACTION_SIGTERM }, 189 { "sigurg", RCTL_ACTION_SIGURG }, 190 { "sigstop", RCTL_ACTION_SIGSTOP }, 191 { "sigtstp", RCTL_ACTION_SIGTSTP }, 192 { "sigchld", RCTL_ACTION_SIGCHLD }, 193 { "sigttin", RCTL_ACTION_SIGTTIN }, 194 { "sigttou", RCTL_ACTION_SIGTTOU }, 195 { "sigio", RCTL_ACTION_SIGIO }, 196 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 197 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 198 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 199 { "sigprof", RCTL_ACTION_SIGPROF }, 200 { "sigwinch", RCTL_ACTION_SIGWINCH }, 201 { "siginfo", RCTL_ACTION_SIGINFO }, 202 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 203 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 204 { "sigthr", RCTL_ACTION_SIGTHR }, 205 { "deny", RCTL_ACTION_DENY }, 206 { "log", RCTL_ACTION_LOG }, 207 { "devctl", RCTL_ACTION_DEVCTL }, 208 { "throttle", RCTL_ACTION_THROTTLE }, 209 { NULL, -1 }}; 210 211 static void rctl_init(void); 212 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 213 214 static uma_zone_t rctl_rule_zone; 215 static uma_zone_t rctl_rule_link_zone; 216 217 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 218 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 219 220 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 221 222 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 223 { 224 int error, val = rctl_throttle_min; 225 226 error = sysctl_handle_int(oidp, &val, 0, req); 227 if (error || !req->newptr) 228 return (error); 229 if (val < 1 || val > rctl_throttle_max) 230 return (EINVAL); 231 232 RACCT_LOCK(); 233 rctl_throttle_min = val; 234 RACCT_UNLOCK(); 235 236 return (0); 237 } 238 239 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 240 { 241 int error, val = rctl_throttle_max; 242 243 error = sysctl_handle_int(oidp, &val, 0, req); 244 if (error || !req->newptr) 245 return (error); 246 if (val < rctl_throttle_min) 247 return (EINVAL); 248 249 RACCT_LOCK(); 250 rctl_throttle_max = val; 251 RACCT_UNLOCK(); 252 253 return (0); 254 } 255 256 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 257 { 258 int error, val = rctl_throttle_pct; 259 260 error = sysctl_handle_int(oidp, &val, 0, req); 261 if (error || !req->newptr) 262 return (error); 263 if (val < 0) 264 return (EINVAL); 265 266 RACCT_LOCK(); 267 rctl_throttle_pct = val; 268 RACCT_UNLOCK(); 269 270 return (0); 271 } 272 273 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 274 { 275 int error, val = rctl_throttle_pct2; 276 277 error = sysctl_handle_int(oidp, &val, 0, req); 278 if (error || !req->newptr) 279 return (error); 280 if (val < 0) 281 return (EINVAL); 282 283 RACCT_LOCK(); 284 rctl_throttle_pct2 = val; 285 RACCT_UNLOCK(); 286 287 return (0); 288 } 289 290 static const char * 291 rctl_subject_type_name(int subject) 292 { 293 int i; 294 295 for (i = 0; subjectnames[i].d_name != NULL; i++) { 296 if (subjectnames[i].d_value == subject) 297 return (subjectnames[i].d_name); 298 } 299 300 panic("rctl_subject_type_name: unknown subject type %d", subject); 301 } 302 303 static const char * 304 rctl_action_name(int action) 305 { 306 int i; 307 308 for (i = 0; actionnames[i].d_name != NULL; i++) { 309 if (actionnames[i].d_value == action) 310 return (actionnames[i].d_name); 311 } 312 313 panic("rctl_action_name: unknown action %d", action); 314 } 315 316 const char * 317 rctl_resource_name(int resource) 318 { 319 int i; 320 321 for (i = 0; resourcenames[i].d_name != NULL; i++) { 322 if (resourcenames[i].d_value == resource) 323 return (resourcenames[i].d_name); 324 } 325 326 panic("rctl_resource_name: unknown resource %d", resource); 327 } 328 329 static struct racct * 330 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 331 { 332 struct ucred *cred = p->p_ucred; 333 334 ASSERT_RACCT_ENABLED(); 335 RACCT_LOCK_ASSERT(); 336 337 switch (rule->rr_per) { 338 case RCTL_SUBJECT_TYPE_PROCESS: 339 return (p->p_racct); 340 case RCTL_SUBJECT_TYPE_USER: 341 return (cred->cr_ruidinfo->ui_racct); 342 case RCTL_SUBJECT_TYPE_LOGINCLASS: 343 return (cred->cr_loginclass->lc_racct); 344 case RCTL_SUBJECT_TYPE_JAIL: 345 return (cred->cr_prison->pr_prison_racct->prr_racct); 346 default: 347 panic("%s: unknown per %d", __func__, rule->rr_per); 348 } 349 } 350 351 /* 352 * Return the amount of resource that can be allocated by 'p' before 353 * hitting 'rule'. 354 */ 355 static int64_t 356 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 357 { 358 const struct racct *racct; 359 int64_t available; 360 361 ASSERT_RACCT_ENABLED(); 362 RACCT_LOCK_ASSERT(); 363 364 racct = rctl_proc_rule_to_racct(p, rule); 365 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 366 367 return (available); 368 } 369 370 /* 371 * Called every second for proc, uidinfo, loginclass, and jail containers. 372 * If the limit isn't exceeded, it decreases the usage amount to zero. 373 * Otherwise, it decreases it by the value of the limit. This way 374 * resource consumption exceeding the limit "carries over" to the next 375 * period. 376 */ 377 void 378 rctl_throttle_decay(struct racct *racct, int resource) 379 { 380 struct rctl_rule *rule; 381 struct rctl_rule_link *link; 382 int64_t minavailable; 383 384 ASSERT_RACCT_ENABLED(); 385 RACCT_LOCK_ASSERT(); 386 387 minavailable = INT64_MAX; 388 389 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 390 rule = link->rrl_rule; 391 392 if (rule->rr_resource != resource) 393 continue; 394 if (rule->rr_action != RCTL_ACTION_THROTTLE) 395 continue; 396 397 if (rule->rr_amount < minavailable) 398 minavailable = rule->rr_amount; 399 } 400 401 if (racct->r_resources[resource] < minavailable) { 402 racct->r_resources[resource] = 0; 403 } else { 404 /* 405 * Cap utilization counter at ten times the limit. Otherwise, 406 * if we changed the rule lowering the allowed amount, it could 407 * take unreasonably long time for the accumulated resource 408 * usage to drop. 409 */ 410 if (racct->r_resources[resource] > minavailable * 10) 411 racct->r_resources[resource] = minavailable * 10; 412 413 racct->r_resources[resource] -= minavailable; 414 } 415 } 416 417 /* 418 * Special version of rctl_get_available() for the %CPU resource. 419 * We slightly cheat here and return less than we normally would. 420 */ 421 int64_t 422 rctl_pcpu_available(const struct proc *p) { 423 struct rctl_rule *rule; 424 struct rctl_rule_link *link; 425 int64_t available, minavailable, limit; 426 427 ASSERT_RACCT_ENABLED(); 428 RACCT_LOCK_ASSERT(); 429 430 minavailable = INT64_MAX; 431 limit = 0; 432 433 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 434 rule = link->rrl_rule; 435 if (rule->rr_resource != RACCT_PCTCPU) 436 continue; 437 if (rule->rr_action != RCTL_ACTION_DENY) 438 continue; 439 available = rctl_available_resource(p, rule); 440 if (available < minavailable) { 441 minavailable = available; 442 limit = rule->rr_amount; 443 } 444 } 445 446 /* 447 * Return slightly less than actual value of the available 448 * %cpu resource. This makes %cpu throttling more aggressive 449 * and lets us act sooner than the limits are already exceeded. 450 */ 451 if (limit != 0) { 452 if (limit > 2 * RCTL_PCPU_SHIFT) 453 minavailable -= RCTL_PCPU_SHIFT; 454 else 455 minavailable -= (limit / 2); 456 } 457 458 return (minavailable); 459 } 460 461 static uint64_t 462 xadd(uint64_t a, uint64_t b) 463 { 464 uint64_t c; 465 466 c = a + b; 467 468 /* 469 * Detect overflow. 470 */ 471 if (c < a || c < b) 472 return (UINT64_MAX); 473 474 return (c); 475 } 476 477 static uint64_t 478 xmul(uint64_t a, uint64_t b) 479 { 480 481 if (b != 0 && a > UINT64_MAX / b) 482 return (UINT64_MAX); 483 484 return (a * b); 485 } 486 487 /* 488 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 489 * to what it keeps allocated now. Returns non-zero if the allocation should 490 * be denied, 0 otherwise. 491 */ 492 int 493 rctl_enforce(struct proc *p, int resource, uint64_t amount) 494 { 495 static struct timeval log_lasttime, devctl_lasttime; 496 static int log_curtime = 0, devctl_curtime = 0; 497 struct rctl_rule *rule; 498 struct rctl_rule_link *link; 499 struct sbuf sb; 500 char *buf; 501 int64_t available; 502 uint64_t sleep_ms, sleep_ratio; 503 int should_deny = 0; 504 505 ASSERT_RACCT_ENABLED(); 506 RACCT_LOCK_ASSERT(); 507 508 /* 509 * There may be more than one matching rule; go through all of them. 510 * Denial should be done last, after logging and sending signals. 511 */ 512 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 513 rule = link->rrl_rule; 514 if (rule->rr_resource != resource) 515 continue; 516 517 available = rctl_available_resource(p, rule); 518 if (available >= (int64_t)amount) { 519 link->rrl_exceeded = 0; 520 continue; 521 } 522 523 switch (rule->rr_action) { 524 case RCTL_ACTION_DENY: 525 should_deny = 1; 526 continue; 527 case RCTL_ACTION_LOG: 528 /* 529 * If rrl_exceeded != 0, it means we've already 530 * logged a warning for this process. 531 */ 532 if (link->rrl_exceeded != 0) 533 continue; 534 535 /* 536 * If the process state is not fully initialized yet, 537 * we can't access most of the required fields, e.g. 538 * p->p_comm. This happens when called from fork1(). 539 * Ignore this rule for now; it will be processed just 540 * after fork, when called from racct_proc_fork_done(). 541 */ 542 if (p->p_state != PRS_NORMAL) 543 continue; 544 545 if (!ppsratecheck(&log_lasttime, &log_curtime, 546 rctl_log_rate_limit)) 547 continue; 548 549 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 550 if (buf == NULL) { 551 printf("rctl_enforce: out of memory\n"); 552 continue; 553 } 554 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 555 rctl_rule_to_sbuf(&sb, rule); 556 sbuf_finish(&sb); 557 printf("rctl: rule \"%s\" matched by pid %d " 558 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 559 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 560 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 561 sbuf_delete(&sb); 562 free(buf, M_RCTL); 563 link->rrl_exceeded = 1; 564 continue; 565 case RCTL_ACTION_DEVCTL: 566 if (link->rrl_exceeded != 0) 567 continue; 568 569 if (p->p_state != PRS_NORMAL) 570 continue; 571 572 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 573 rctl_devctl_rate_limit)) 574 continue; 575 576 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 577 if (buf == NULL) { 578 printf("rctl_enforce: out of memory\n"); 579 continue; 580 } 581 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 582 sbuf_printf(&sb, "rule="); 583 rctl_rule_to_sbuf(&sb, rule); 584 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 585 p->p_pid, p->p_ucred->cr_ruid, 586 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 587 sbuf_finish(&sb); 588 devctl_notify("RCTL", "rule", "matched", 589 sbuf_data(&sb)); 590 sbuf_delete(&sb); 591 free(buf, M_RCTL); 592 link->rrl_exceeded = 1; 593 continue; 594 case RCTL_ACTION_THROTTLE: 595 if (p->p_state != PRS_NORMAL) 596 continue; 597 598 if (rule->rr_amount == 0) { 599 racct_proc_throttle(p, rctl_throttle_max); 600 continue; 601 } 602 603 /* 604 * Make the process sleep for a fraction of second 605 * proportional to the ratio of process' resource 606 * utilization compared to the limit. The point is 607 * to penalize resource hogs: processes that consume 608 * more of the available resources sleep for longer. 609 * 610 * We're trying to defer division until the very end, 611 * to minimize the rounding effects. The following 612 * calculation could have been written in a clearer 613 * way like this: 614 * 615 * sleep_ms = hz * p->p_racct->r_resources[resource] / 616 * rule->rr_amount; 617 * sleep_ms *= rctl_throttle_pct / 100; 618 * if (sleep_ms < rctl_throttle_min) 619 * sleep_ms = rctl_throttle_min; 620 * 621 */ 622 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 623 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 624 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 625 sleep_ms = rctl_throttle_min * rule->rr_amount; 626 627 /* 628 * Multiply that by the ratio of the resource 629 * consumption for the container compared to the limit, 630 * squared. In other words, a process in a container 631 * that is two times over the limit will be throttled 632 * four times as much for hitting the same rule. The 633 * point is to penalize processes more if the container 634 * itself (eg certain UID or jail) is above the limit. 635 */ 636 if (available < 0) 637 sleep_ratio = -available / rule->rr_amount; 638 else 639 sleep_ratio = 0; 640 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 641 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 642 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 643 644 /* 645 * Finally the division. 646 */ 647 sleep_ms /= rule->rr_amount; 648 649 if (sleep_ms > rctl_throttle_max) 650 sleep_ms = rctl_throttle_max; 651 #if 0 652 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n", 653 __func__, p->p_pid, p->p_comm, 654 p->p_racct->r_resources[resource], 655 rule->rr_amount, (uintmax_t)sleep_ms, 656 (uintmax_t)sleep_ratio, (intmax_t)available); 657 #endif 658 659 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 660 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 661 racct_proc_throttle(p, sleep_ms); 662 continue; 663 default: 664 if (link->rrl_exceeded != 0) 665 continue; 666 667 if (p->p_state != PRS_NORMAL) 668 continue; 669 670 KASSERT(rule->rr_action > 0 && 671 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 672 ("rctl_enforce: unknown action %d", 673 rule->rr_action)); 674 675 /* 676 * We're using the fact that RCTL_ACTION_SIG* values 677 * are equal to their counterparts from sys/signal.h. 678 */ 679 kern_psignal(p, rule->rr_action); 680 link->rrl_exceeded = 1; 681 continue; 682 } 683 } 684 685 if (should_deny) { 686 /* 687 * Return fake error code; the caller should change it 688 * into one proper for the situation - EFSIZ, ENOMEM etc. 689 */ 690 return (EDOOFUS); 691 } 692 693 return (0); 694 } 695 696 uint64_t 697 rctl_get_limit(struct proc *p, int resource) 698 { 699 struct rctl_rule *rule; 700 struct rctl_rule_link *link; 701 uint64_t amount = UINT64_MAX; 702 703 ASSERT_RACCT_ENABLED(); 704 RACCT_LOCK_ASSERT(); 705 706 /* 707 * There may be more than one matching rule; go through all of them. 708 * Denial should be done last, after logging and sending signals. 709 */ 710 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 711 rule = link->rrl_rule; 712 if (rule->rr_resource != resource) 713 continue; 714 if (rule->rr_action != RCTL_ACTION_DENY) 715 continue; 716 if (rule->rr_amount < amount) 717 amount = rule->rr_amount; 718 } 719 720 return (amount); 721 } 722 723 uint64_t 724 rctl_get_available(struct proc *p, int resource) 725 { 726 struct rctl_rule *rule; 727 struct rctl_rule_link *link; 728 int64_t available, minavailable, allocated; 729 730 minavailable = INT64_MAX; 731 732 ASSERT_RACCT_ENABLED(); 733 RACCT_LOCK_ASSERT(); 734 735 /* 736 * There may be more than one matching rule; go through all of them. 737 * Denial should be done last, after logging and sending signals. 738 */ 739 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 740 rule = link->rrl_rule; 741 if (rule->rr_resource != resource) 742 continue; 743 if (rule->rr_action != RCTL_ACTION_DENY) 744 continue; 745 available = rctl_available_resource(p, rule); 746 if (available < minavailable) 747 minavailable = available; 748 } 749 750 /* 751 * XXX: Think about this _hard_. 752 */ 753 allocated = p->p_racct->r_resources[resource]; 754 if (minavailable < INT64_MAX - allocated) 755 minavailable += allocated; 756 if (minavailable < 0) 757 minavailable = 0; 758 759 return (minavailable); 760 } 761 762 static int 763 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 764 { 765 766 ASSERT_RACCT_ENABLED(); 767 768 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 769 if (rule->rr_subject_type != filter->rr_subject_type) 770 return (0); 771 772 switch (filter->rr_subject_type) { 773 case RCTL_SUBJECT_TYPE_PROCESS: 774 if (filter->rr_subject.rs_proc != NULL && 775 rule->rr_subject.rs_proc != 776 filter->rr_subject.rs_proc) 777 return (0); 778 break; 779 case RCTL_SUBJECT_TYPE_USER: 780 if (filter->rr_subject.rs_uip != NULL && 781 rule->rr_subject.rs_uip != 782 filter->rr_subject.rs_uip) 783 return (0); 784 break; 785 case RCTL_SUBJECT_TYPE_LOGINCLASS: 786 if (filter->rr_subject.rs_loginclass != NULL && 787 rule->rr_subject.rs_loginclass != 788 filter->rr_subject.rs_loginclass) 789 return (0); 790 break; 791 case RCTL_SUBJECT_TYPE_JAIL: 792 if (filter->rr_subject.rs_prison_racct != NULL && 793 rule->rr_subject.rs_prison_racct != 794 filter->rr_subject.rs_prison_racct) 795 return (0); 796 break; 797 default: 798 panic("rctl_rule_matches: unknown subject type %d", 799 filter->rr_subject_type); 800 } 801 } 802 803 if (filter->rr_resource != RACCT_UNDEFINED) { 804 if (rule->rr_resource != filter->rr_resource) 805 return (0); 806 } 807 808 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 809 if (rule->rr_action != filter->rr_action) 810 return (0); 811 } 812 813 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 814 if (rule->rr_amount != filter->rr_amount) 815 return (0); 816 } 817 818 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 819 if (rule->rr_per != filter->rr_per) 820 return (0); 821 } 822 823 return (1); 824 } 825 826 static int 827 str2value(const char *str, int *value, struct dict *table) 828 { 829 int i; 830 831 if (value == NULL) 832 return (EINVAL); 833 834 for (i = 0; table[i].d_name != NULL; i++) { 835 if (strcasecmp(table[i].d_name, str) == 0) { 836 *value = table[i].d_value; 837 return (0); 838 } 839 } 840 841 return (EINVAL); 842 } 843 844 static int 845 str2id(const char *str, id_t *value) 846 { 847 char *end; 848 849 if (str == NULL) 850 return (EINVAL); 851 852 *value = strtoul(str, &end, 10); 853 if ((size_t)(end - str) != strlen(str)) 854 return (EINVAL); 855 856 return (0); 857 } 858 859 static int 860 str2int64(const char *str, int64_t *value) 861 { 862 char *end; 863 864 if (str == NULL) 865 return (EINVAL); 866 867 *value = strtoul(str, &end, 10); 868 if ((size_t)(end - str) != strlen(str)) 869 return (EINVAL); 870 871 if (*value < 0) 872 return (ERANGE); 873 874 return (0); 875 } 876 877 /* 878 * Connect the rule to the racct, increasing refcount for the rule. 879 */ 880 static void 881 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 882 { 883 struct rctl_rule_link *link; 884 885 ASSERT_RACCT_ENABLED(); 886 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 887 888 rctl_rule_acquire(rule); 889 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 890 link->rrl_rule = rule; 891 link->rrl_exceeded = 0; 892 893 RACCT_LOCK(); 894 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 895 RACCT_UNLOCK(); 896 } 897 898 static int 899 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 900 { 901 struct rctl_rule_link *link; 902 903 ASSERT_RACCT_ENABLED(); 904 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 905 RACCT_LOCK_ASSERT(); 906 907 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 908 if (link == NULL) 909 return (ENOMEM); 910 rctl_rule_acquire(rule); 911 link->rrl_rule = rule; 912 link->rrl_exceeded = 0; 913 914 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 915 916 return (0); 917 } 918 919 /* 920 * Remove limits for a rules matching the filter and release 921 * the refcounts for the rules, possibly freeing them. Returns 922 * the number of limit structures removed. 923 */ 924 static int 925 rctl_racct_remove_rules(struct racct *racct, 926 const struct rctl_rule *filter) 927 { 928 struct rctl_rule_link *link, *linktmp; 929 int removed = 0; 930 931 ASSERT_RACCT_ENABLED(); 932 RACCT_LOCK_ASSERT(); 933 934 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 935 if (!rctl_rule_matches(link->rrl_rule, filter)) 936 continue; 937 938 LIST_REMOVE(link, rrl_next); 939 rctl_rule_release(link->rrl_rule); 940 uma_zfree(rctl_rule_link_zone, link); 941 removed++; 942 } 943 return (removed); 944 } 945 946 static void 947 rctl_rule_acquire_subject(struct rctl_rule *rule) 948 { 949 950 ASSERT_RACCT_ENABLED(); 951 952 switch (rule->rr_subject_type) { 953 case RCTL_SUBJECT_TYPE_UNDEFINED: 954 case RCTL_SUBJECT_TYPE_PROCESS: 955 break; 956 case RCTL_SUBJECT_TYPE_JAIL: 957 if (rule->rr_subject.rs_prison_racct != NULL) 958 prison_racct_hold(rule->rr_subject.rs_prison_racct); 959 break; 960 case RCTL_SUBJECT_TYPE_USER: 961 if (rule->rr_subject.rs_uip != NULL) 962 uihold(rule->rr_subject.rs_uip); 963 break; 964 case RCTL_SUBJECT_TYPE_LOGINCLASS: 965 if (rule->rr_subject.rs_loginclass != NULL) 966 loginclass_hold(rule->rr_subject.rs_loginclass); 967 break; 968 default: 969 panic("rctl_rule_acquire_subject: unknown subject type %d", 970 rule->rr_subject_type); 971 } 972 } 973 974 static void 975 rctl_rule_release_subject(struct rctl_rule *rule) 976 { 977 978 ASSERT_RACCT_ENABLED(); 979 980 switch (rule->rr_subject_type) { 981 case RCTL_SUBJECT_TYPE_UNDEFINED: 982 case RCTL_SUBJECT_TYPE_PROCESS: 983 break; 984 case RCTL_SUBJECT_TYPE_JAIL: 985 if (rule->rr_subject.rs_prison_racct != NULL) 986 prison_racct_free(rule->rr_subject.rs_prison_racct); 987 break; 988 case RCTL_SUBJECT_TYPE_USER: 989 if (rule->rr_subject.rs_uip != NULL) 990 uifree(rule->rr_subject.rs_uip); 991 break; 992 case RCTL_SUBJECT_TYPE_LOGINCLASS: 993 if (rule->rr_subject.rs_loginclass != NULL) 994 loginclass_free(rule->rr_subject.rs_loginclass); 995 break; 996 default: 997 panic("rctl_rule_release_subject: unknown subject type %d", 998 rule->rr_subject_type); 999 } 1000 } 1001 1002 struct rctl_rule * 1003 rctl_rule_alloc(int flags) 1004 { 1005 struct rctl_rule *rule; 1006 1007 ASSERT_RACCT_ENABLED(); 1008 1009 rule = uma_zalloc(rctl_rule_zone, flags); 1010 if (rule == NULL) 1011 return (NULL); 1012 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1013 rule->rr_subject.rs_proc = NULL; 1014 rule->rr_subject.rs_uip = NULL; 1015 rule->rr_subject.rs_loginclass = NULL; 1016 rule->rr_subject.rs_prison_racct = NULL; 1017 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1018 rule->rr_resource = RACCT_UNDEFINED; 1019 rule->rr_action = RCTL_ACTION_UNDEFINED; 1020 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1021 refcount_init(&rule->rr_refcount, 1); 1022 1023 return (rule); 1024 } 1025 1026 struct rctl_rule * 1027 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1028 { 1029 struct rctl_rule *copy; 1030 1031 ASSERT_RACCT_ENABLED(); 1032 1033 copy = uma_zalloc(rctl_rule_zone, flags); 1034 if (copy == NULL) 1035 return (NULL); 1036 copy->rr_subject_type = rule->rr_subject_type; 1037 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1038 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1039 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1040 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1041 copy->rr_per = rule->rr_per; 1042 copy->rr_resource = rule->rr_resource; 1043 copy->rr_action = rule->rr_action; 1044 copy->rr_amount = rule->rr_amount; 1045 refcount_init(©->rr_refcount, 1); 1046 rctl_rule_acquire_subject(copy); 1047 1048 return (copy); 1049 } 1050 1051 void 1052 rctl_rule_acquire(struct rctl_rule *rule) 1053 { 1054 1055 ASSERT_RACCT_ENABLED(); 1056 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1057 1058 refcount_acquire(&rule->rr_refcount); 1059 } 1060 1061 static void 1062 rctl_rule_free(void *context, int pending) 1063 { 1064 struct rctl_rule *rule; 1065 1066 rule = (struct rctl_rule *)context; 1067 1068 ASSERT_RACCT_ENABLED(); 1069 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1070 1071 /* 1072 * We don't need locking here; rule is guaranteed to be inaccessible. 1073 */ 1074 1075 rctl_rule_release_subject(rule); 1076 uma_zfree(rctl_rule_zone, rule); 1077 } 1078 1079 void 1080 rctl_rule_release(struct rctl_rule *rule) 1081 { 1082 1083 ASSERT_RACCT_ENABLED(); 1084 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1085 1086 if (refcount_release(&rule->rr_refcount)) { 1087 /* 1088 * rctl_rule_release() is often called when iterating 1089 * over all the uidinfo structures in the system, 1090 * holding uihashtbl_lock. Since rctl_rule_free() 1091 * might end up calling uifree(), this would lead 1092 * to lock recursion. Use taskqueue to avoid this. 1093 */ 1094 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1095 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1096 } 1097 } 1098 1099 static int 1100 rctl_rule_fully_specified(const struct rctl_rule *rule) 1101 { 1102 1103 ASSERT_RACCT_ENABLED(); 1104 1105 switch (rule->rr_subject_type) { 1106 case RCTL_SUBJECT_TYPE_UNDEFINED: 1107 return (0); 1108 case RCTL_SUBJECT_TYPE_PROCESS: 1109 if (rule->rr_subject.rs_proc == NULL) 1110 return (0); 1111 break; 1112 case RCTL_SUBJECT_TYPE_USER: 1113 if (rule->rr_subject.rs_uip == NULL) 1114 return (0); 1115 break; 1116 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1117 if (rule->rr_subject.rs_loginclass == NULL) 1118 return (0); 1119 break; 1120 case RCTL_SUBJECT_TYPE_JAIL: 1121 if (rule->rr_subject.rs_prison_racct == NULL) 1122 return (0); 1123 break; 1124 default: 1125 panic("rctl_rule_fully_specified: unknown subject type %d", 1126 rule->rr_subject_type); 1127 } 1128 if (rule->rr_resource == RACCT_UNDEFINED) 1129 return (0); 1130 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1131 return (0); 1132 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1133 return (0); 1134 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1135 return (0); 1136 1137 return (1); 1138 } 1139 1140 static int 1141 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1142 { 1143 struct rctl_rule *rule; 1144 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1145 *amountstr, *perstr; 1146 id_t id; 1147 int error = 0; 1148 1149 ASSERT_RACCT_ENABLED(); 1150 1151 rule = rctl_rule_alloc(M_WAITOK); 1152 1153 subjectstr = strsep(&rulestr, ":"); 1154 subject_idstr = strsep(&rulestr, ":"); 1155 resourcestr = strsep(&rulestr, ":"); 1156 actionstr = strsep(&rulestr, "=/"); 1157 amountstr = strsep(&rulestr, "/"); 1158 perstr = rulestr; 1159 1160 if (subjectstr == NULL || subjectstr[0] == '\0') 1161 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1162 else { 1163 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1164 if (error != 0) 1165 goto out; 1166 } 1167 1168 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1169 rule->rr_subject.rs_proc = NULL; 1170 rule->rr_subject.rs_uip = NULL; 1171 rule->rr_subject.rs_loginclass = NULL; 1172 rule->rr_subject.rs_prison_racct = NULL; 1173 } else { 1174 switch (rule->rr_subject_type) { 1175 case RCTL_SUBJECT_TYPE_UNDEFINED: 1176 error = EINVAL; 1177 goto out; 1178 case RCTL_SUBJECT_TYPE_PROCESS: 1179 error = str2id(subject_idstr, &id); 1180 if (error != 0) 1181 goto out; 1182 sx_assert(&allproc_lock, SA_LOCKED); 1183 rule->rr_subject.rs_proc = pfind(id); 1184 if (rule->rr_subject.rs_proc == NULL) { 1185 error = ESRCH; 1186 goto out; 1187 } 1188 PROC_UNLOCK(rule->rr_subject.rs_proc); 1189 break; 1190 case RCTL_SUBJECT_TYPE_USER: 1191 error = str2id(subject_idstr, &id); 1192 if (error != 0) 1193 goto out; 1194 rule->rr_subject.rs_uip = uifind(id); 1195 break; 1196 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1197 rule->rr_subject.rs_loginclass = 1198 loginclass_find(subject_idstr); 1199 if (rule->rr_subject.rs_loginclass == NULL) { 1200 error = ENAMETOOLONG; 1201 goto out; 1202 } 1203 break; 1204 case RCTL_SUBJECT_TYPE_JAIL: 1205 rule->rr_subject.rs_prison_racct = 1206 prison_racct_find(subject_idstr); 1207 if (rule->rr_subject.rs_prison_racct == NULL) { 1208 error = ENAMETOOLONG; 1209 goto out; 1210 } 1211 break; 1212 default: 1213 panic("rctl_string_to_rule: unknown subject type %d", 1214 rule->rr_subject_type); 1215 } 1216 } 1217 1218 if (resourcestr == NULL || resourcestr[0] == '\0') 1219 rule->rr_resource = RACCT_UNDEFINED; 1220 else { 1221 error = str2value(resourcestr, &rule->rr_resource, 1222 resourcenames); 1223 if (error != 0) 1224 goto out; 1225 } 1226 1227 if (actionstr == NULL || actionstr[0] == '\0') 1228 rule->rr_action = RCTL_ACTION_UNDEFINED; 1229 else { 1230 error = str2value(actionstr, &rule->rr_action, actionnames); 1231 if (error != 0) 1232 goto out; 1233 } 1234 1235 if (amountstr == NULL || amountstr[0] == '\0') 1236 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1237 else { 1238 error = str2int64(amountstr, &rule->rr_amount); 1239 if (error != 0) 1240 goto out; 1241 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1242 if (rule->rr_amount > INT64_MAX / 1000000) { 1243 error = ERANGE; 1244 goto out; 1245 } 1246 rule->rr_amount *= 1000000; 1247 } 1248 } 1249 1250 if (perstr == NULL || perstr[0] == '\0') 1251 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1252 else { 1253 error = str2value(perstr, &rule->rr_per, subjectnames); 1254 if (error != 0) 1255 goto out; 1256 } 1257 1258 out: 1259 if (error == 0) 1260 *rulep = rule; 1261 else 1262 rctl_rule_release(rule); 1263 1264 return (error); 1265 } 1266 1267 /* 1268 * Link a rule with all the subjects it applies to. 1269 */ 1270 int 1271 rctl_rule_add(struct rctl_rule *rule) 1272 { 1273 struct proc *p; 1274 struct ucred *cred; 1275 struct uidinfo *uip; 1276 struct prison *pr; 1277 struct prison_racct *prr; 1278 struct loginclass *lc; 1279 struct rctl_rule *rule2; 1280 int match; 1281 1282 ASSERT_RACCT_ENABLED(); 1283 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1284 1285 /* 1286 * Some rules just don't make sense, like "deny" rule for an undeniable 1287 * resource. The exception are the RSS and %CPU resources - they are 1288 * not deniable in the racct sense, but the limit is enforced in 1289 * a different way. 1290 */ 1291 if (rule->rr_action == RCTL_ACTION_DENY && 1292 !RACCT_IS_DENIABLE(rule->rr_resource) && 1293 rule->rr_resource != RACCT_RSS && 1294 rule->rr_resource != RACCT_PCTCPU) { 1295 return (EOPNOTSUPP); 1296 } 1297 1298 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1299 !RACCT_IS_DECAYING(rule->rr_resource)) { 1300 return (EOPNOTSUPP); 1301 } 1302 1303 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1304 rule->rr_resource == RACCT_PCTCPU) { 1305 return (EOPNOTSUPP); 1306 } 1307 1308 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1309 RACCT_IS_SLOPPY(rule->rr_resource)) { 1310 return (EOPNOTSUPP); 1311 } 1312 1313 /* 1314 * Make sure there are no duplicated rules. Also, for the "deny" 1315 * rules, remove ones differing only by "amount". 1316 */ 1317 if (rule->rr_action == RCTL_ACTION_DENY) { 1318 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1319 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1320 rctl_rule_remove(rule2); 1321 rctl_rule_release(rule2); 1322 } else 1323 rctl_rule_remove(rule); 1324 1325 switch (rule->rr_subject_type) { 1326 case RCTL_SUBJECT_TYPE_PROCESS: 1327 p = rule->rr_subject.rs_proc; 1328 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1329 1330 rctl_racct_add_rule(p->p_racct, rule); 1331 /* 1332 * In case of per-process rule, we don't have anything more 1333 * to do. 1334 */ 1335 return (0); 1336 1337 case RCTL_SUBJECT_TYPE_USER: 1338 uip = rule->rr_subject.rs_uip; 1339 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1340 rctl_racct_add_rule(uip->ui_racct, rule); 1341 break; 1342 1343 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1344 lc = rule->rr_subject.rs_loginclass; 1345 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1346 rctl_racct_add_rule(lc->lc_racct, rule); 1347 break; 1348 1349 case RCTL_SUBJECT_TYPE_JAIL: 1350 prr = rule->rr_subject.rs_prison_racct; 1351 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1352 rctl_racct_add_rule(prr->prr_racct, rule); 1353 break; 1354 1355 default: 1356 panic("rctl_rule_add: unknown subject type %d", 1357 rule->rr_subject_type); 1358 } 1359 1360 /* 1361 * Now go through all the processes and add the new rule to the ones 1362 * it applies to. 1363 */ 1364 sx_assert(&allproc_lock, SA_LOCKED); 1365 FOREACH_PROC_IN_SYSTEM(p) { 1366 cred = p->p_ucred; 1367 switch (rule->rr_subject_type) { 1368 case RCTL_SUBJECT_TYPE_USER: 1369 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1370 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1371 break; 1372 continue; 1373 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1374 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1375 break; 1376 continue; 1377 case RCTL_SUBJECT_TYPE_JAIL: 1378 match = 0; 1379 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1380 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1381 match = 1; 1382 break; 1383 } 1384 } 1385 if (match) 1386 break; 1387 continue; 1388 default: 1389 panic("rctl_rule_add: unknown subject type %d", 1390 rule->rr_subject_type); 1391 } 1392 1393 rctl_racct_add_rule(p->p_racct, rule); 1394 } 1395 1396 return (0); 1397 } 1398 1399 static void 1400 rctl_rule_pre_callback(void) 1401 { 1402 1403 RACCT_LOCK(); 1404 } 1405 1406 static void 1407 rctl_rule_post_callback(void) 1408 { 1409 1410 RACCT_UNLOCK(); 1411 } 1412 1413 static void 1414 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1415 { 1416 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1417 int found = 0; 1418 1419 ASSERT_RACCT_ENABLED(); 1420 RACCT_LOCK_ASSERT(); 1421 1422 found += rctl_racct_remove_rules(racct, filter); 1423 1424 *((int *)arg3) += found; 1425 } 1426 1427 /* 1428 * Remove all rules that match the filter. 1429 */ 1430 int 1431 rctl_rule_remove(struct rctl_rule *filter) 1432 { 1433 struct proc *p; 1434 int found = 0; 1435 1436 ASSERT_RACCT_ENABLED(); 1437 1438 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1439 filter->rr_subject.rs_proc != NULL) { 1440 p = filter->rr_subject.rs_proc; 1441 RACCT_LOCK(); 1442 found = rctl_racct_remove_rules(p->p_racct, filter); 1443 RACCT_UNLOCK(); 1444 if (found) 1445 return (0); 1446 return (ESRCH); 1447 } 1448 1449 loginclass_racct_foreach(rctl_rule_remove_callback, 1450 rctl_rule_pre_callback, rctl_rule_post_callback, 1451 filter, (void *)&found); 1452 ui_racct_foreach(rctl_rule_remove_callback, 1453 rctl_rule_pre_callback, rctl_rule_post_callback, 1454 filter, (void *)&found); 1455 prison_racct_foreach(rctl_rule_remove_callback, 1456 rctl_rule_pre_callback, rctl_rule_post_callback, 1457 filter, (void *)&found); 1458 1459 sx_assert(&allproc_lock, SA_LOCKED); 1460 RACCT_LOCK(); 1461 FOREACH_PROC_IN_SYSTEM(p) { 1462 found += rctl_racct_remove_rules(p->p_racct, filter); 1463 } 1464 RACCT_UNLOCK(); 1465 1466 if (found) 1467 return (0); 1468 return (ESRCH); 1469 } 1470 1471 /* 1472 * Appends a rule to the sbuf. 1473 */ 1474 static void 1475 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1476 { 1477 int64_t amount; 1478 1479 ASSERT_RACCT_ENABLED(); 1480 1481 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1482 1483 switch (rule->rr_subject_type) { 1484 case RCTL_SUBJECT_TYPE_PROCESS: 1485 if (rule->rr_subject.rs_proc == NULL) 1486 sbuf_printf(sb, ":"); 1487 else 1488 sbuf_printf(sb, "%d:", 1489 rule->rr_subject.rs_proc->p_pid); 1490 break; 1491 case RCTL_SUBJECT_TYPE_USER: 1492 if (rule->rr_subject.rs_uip == NULL) 1493 sbuf_printf(sb, ":"); 1494 else 1495 sbuf_printf(sb, "%d:", 1496 rule->rr_subject.rs_uip->ui_uid); 1497 break; 1498 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1499 if (rule->rr_subject.rs_loginclass == NULL) 1500 sbuf_printf(sb, ":"); 1501 else 1502 sbuf_printf(sb, "%s:", 1503 rule->rr_subject.rs_loginclass->lc_name); 1504 break; 1505 case RCTL_SUBJECT_TYPE_JAIL: 1506 if (rule->rr_subject.rs_prison_racct == NULL) 1507 sbuf_printf(sb, ":"); 1508 else 1509 sbuf_printf(sb, "%s:", 1510 rule->rr_subject.rs_prison_racct->prr_name); 1511 break; 1512 default: 1513 panic("rctl_rule_to_sbuf: unknown subject type %d", 1514 rule->rr_subject_type); 1515 } 1516 1517 amount = rule->rr_amount; 1518 if (amount != RCTL_AMOUNT_UNDEFINED && 1519 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1520 amount /= 1000000; 1521 1522 sbuf_printf(sb, "%s:%s=%jd", 1523 rctl_resource_name(rule->rr_resource), 1524 rctl_action_name(rule->rr_action), 1525 amount); 1526 1527 if (rule->rr_per != rule->rr_subject_type) 1528 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1529 } 1530 1531 /* 1532 * Routine used by RCTL syscalls to read in input string. 1533 */ 1534 static int 1535 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1536 { 1537 char *str; 1538 int error; 1539 1540 ASSERT_RACCT_ENABLED(); 1541 1542 if (inbuflen <= 0) 1543 return (EINVAL); 1544 if (inbuflen > RCTL_MAX_INBUFSIZE) 1545 return (E2BIG); 1546 1547 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1548 error = copyinstr(inbufp, str, inbuflen, NULL); 1549 if (error != 0) { 1550 free(str, M_RCTL); 1551 return (error); 1552 } 1553 1554 *inputstr = str; 1555 1556 return (0); 1557 } 1558 1559 /* 1560 * Routine used by RCTL syscalls to write out output string. 1561 */ 1562 static int 1563 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1564 { 1565 int error; 1566 1567 ASSERT_RACCT_ENABLED(); 1568 1569 if (outputsbuf == NULL) 1570 return (0); 1571 1572 sbuf_finish(outputsbuf); 1573 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1574 sbuf_delete(outputsbuf); 1575 return (ERANGE); 1576 } 1577 error = copyout(sbuf_data(outputsbuf), outbufp, 1578 sbuf_len(outputsbuf) + 1); 1579 sbuf_delete(outputsbuf); 1580 return (error); 1581 } 1582 1583 static struct sbuf * 1584 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1585 { 1586 struct sbuf *sb; 1587 int64_t amount; 1588 int i; 1589 1590 ASSERT_RACCT_ENABLED(); 1591 1592 sb = sbuf_new_auto(); 1593 for (i = 0; i <= RACCT_MAX; i++) { 1594 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1595 continue; 1596 RACCT_LOCK(); 1597 amount = racct->r_resources[i]; 1598 RACCT_UNLOCK(); 1599 if (RACCT_IS_IN_MILLIONS(i)) 1600 amount /= 1000000; 1601 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1602 } 1603 sbuf_setpos(sb, sbuf_len(sb) - 1); 1604 return (sb); 1605 } 1606 1607 int 1608 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1609 { 1610 struct rctl_rule *filter; 1611 struct sbuf *outputsbuf = NULL; 1612 struct proc *p; 1613 struct uidinfo *uip; 1614 struct loginclass *lc; 1615 struct prison_racct *prr; 1616 char *inputstr; 1617 int error; 1618 1619 if (!racct_enable) 1620 return (ENOSYS); 1621 1622 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1623 if (error != 0) 1624 return (error); 1625 1626 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1627 if (error != 0) 1628 return (error); 1629 1630 sx_slock(&allproc_lock); 1631 error = rctl_string_to_rule(inputstr, &filter); 1632 free(inputstr, M_RCTL); 1633 if (error != 0) { 1634 sx_sunlock(&allproc_lock); 1635 return (error); 1636 } 1637 1638 switch (filter->rr_subject_type) { 1639 case RCTL_SUBJECT_TYPE_PROCESS: 1640 p = filter->rr_subject.rs_proc; 1641 if (p == NULL) { 1642 error = EINVAL; 1643 goto out; 1644 } 1645 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1646 break; 1647 case RCTL_SUBJECT_TYPE_USER: 1648 uip = filter->rr_subject.rs_uip; 1649 if (uip == NULL) { 1650 error = EINVAL; 1651 goto out; 1652 } 1653 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1654 break; 1655 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1656 lc = filter->rr_subject.rs_loginclass; 1657 if (lc == NULL) { 1658 error = EINVAL; 1659 goto out; 1660 } 1661 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1662 break; 1663 case RCTL_SUBJECT_TYPE_JAIL: 1664 prr = filter->rr_subject.rs_prison_racct; 1665 if (prr == NULL) { 1666 error = EINVAL; 1667 goto out; 1668 } 1669 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1670 break; 1671 default: 1672 error = EINVAL; 1673 } 1674 out: 1675 rctl_rule_release(filter); 1676 sx_sunlock(&allproc_lock); 1677 if (error != 0) 1678 return (error); 1679 1680 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1681 1682 return (error); 1683 } 1684 1685 static void 1686 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1687 { 1688 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1689 struct rctl_rule_link *link; 1690 struct sbuf *sb = (struct sbuf *)arg3; 1691 1692 ASSERT_RACCT_ENABLED(); 1693 RACCT_LOCK_ASSERT(); 1694 1695 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1696 if (!rctl_rule_matches(link->rrl_rule, filter)) 1697 continue; 1698 rctl_rule_to_sbuf(sb, link->rrl_rule); 1699 sbuf_printf(sb, ","); 1700 } 1701 } 1702 1703 int 1704 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1705 { 1706 struct sbuf *sb; 1707 struct rctl_rule *filter; 1708 struct rctl_rule_link *link; 1709 struct proc *p; 1710 char *inputstr, *buf; 1711 size_t bufsize; 1712 int error; 1713 1714 if (!racct_enable) 1715 return (ENOSYS); 1716 1717 error = priv_check(td, PRIV_RCTL_GET_RULES); 1718 if (error != 0) 1719 return (error); 1720 1721 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1722 if (error != 0) 1723 return (error); 1724 1725 sx_slock(&allproc_lock); 1726 error = rctl_string_to_rule(inputstr, &filter); 1727 free(inputstr, M_RCTL); 1728 if (error != 0) { 1729 sx_sunlock(&allproc_lock); 1730 return (error); 1731 } 1732 1733 bufsize = uap->outbuflen; 1734 if (bufsize > rctl_maxbufsize) { 1735 sx_sunlock(&allproc_lock); 1736 return (E2BIG); 1737 } 1738 1739 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1740 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1741 KASSERT(sb != NULL, ("sbuf_new failed")); 1742 1743 FOREACH_PROC_IN_SYSTEM(p) { 1744 RACCT_LOCK(); 1745 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1746 /* 1747 * Non-process rules will be added to the buffer later. 1748 * Adding them here would result in duplicated output. 1749 */ 1750 if (link->rrl_rule->rr_subject_type != 1751 RCTL_SUBJECT_TYPE_PROCESS) 1752 continue; 1753 if (!rctl_rule_matches(link->rrl_rule, filter)) 1754 continue; 1755 rctl_rule_to_sbuf(sb, link->rrl_rule); 1756 sbuf_printf(sb, ","); 1757 } 1758 RACCT_UNLOCK(); 1759 } 1760 1761 loginclass_racct_foreach(rctl_get_rules_callback, 1762 rctl_rule_pre_callback, rctl_rule_post_callback, 1763 filter, sb); 1764 ui_racct_foreach(rctl_get_rules_callback, 1765 rctl_rule_pre_callback, rctl_rule_post_callback, 1766 filter, sb); 1767 prison_racct_foreach(rctl_get_rules_callback, 1768 rctl_rule_pre_callback, rctl_rule_post_callback, 1769 filter, sb); 1770 if (sbuf_error(sb) == ENOMEM) { 1771 error = ERANGE; 1772 goto out; 1773 } 1774 1775 /* 1776 * Remove trailing ",". 1777 */ 1778 if (sbuf_len(sb) > 0) 1779 sbuf_setpos(sb, sbuf_len(sb) - 1); 1780 1781 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1782 out: 1783 rctl_rule_release(filter); 1784 sx_sunlock(&allproc_lock); 1785 free(buf, M_RCTL); 1786 return (error); 1787 } 1788 1789 int 1790 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1791 { 1792 struct sbuf *sb; 1793 struct rctl_rule *filter; 1794 struct rctl_rule_link *link; 1795 char *inputstr, *buf; 1796 size_t bufsize; 1797 int error; 1798 1799 if (!racct_enable) 1800 return (ENOSYS); 1801 1802 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1803 if (error != 0) 1804 return (error); 1805 1806 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1807 if (error != 0) 1808 return (error); 1809 1810 sx_slock(&allproc_lock); 1811 error = rctl_string_to_rule(inputstr, &filter); 1812 free(inputstr, M_RCTL); 1813 if (error != 0) { 1814 sx_sunlock(&allproc_lock); 1815 return (error); 1816 } 1817 1818 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1819 rctl_rule_release(filter); 1820 sx_sunlock(&allproc_lock); 1821 return (EINVAL); 1822 } 1823 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1824 rctl_rule_release(filter); 1825 sx_sunlock(&allproc_lock); 1826 return (EOPNOTSUPP); 1827 } 1828 if (filter->rr_subject.rs_proc == NULL) { 1829 rctl_rule_release(filter); 1830 sx_sunlock(&allproc_lock); 1831 return (EINVAL); 1832 } 1833 1834 bufsize = uap->outbuflen; 1835 if (bufsize > rctl_maxbufsize) { 1836 rctl_rule_release(filter); 1837 sx_sunlock(&allproc_lock); 1838 return (E2BIG); 1839 } 1840 1841 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1842 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1843 KASSERT(sb != NULL, ("sbuf_new failed")); 1844 1845 RACCT_LOCK(); 1846 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1847 rrl_next) { 1848 rctl_rule_to_sbuf(sb, link->rrl_rule); 1849 sbuf_printf(sb, ","); 1850 } 1851 RACCT_UNLOCK(); 1852 if (sbuf_error(sb) == ENOMEM) { 1853 error = ERANGE; 1854 sbuf_delete(sb); 1855 goto out; 1856 } 1857 1858 /* 1859 * Remove trailing ",". 1860 */ 1861 if (sbuf_len(sb) > 0) 1862 sbuf_setpos(sb, sbuf_len(sb) - 1); 1863 1864 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1865 out: 1866 rctl_rule_release(filter); 1867 sx_sunlock(&allproc_lock); 1868 free(buf, M_RCTL); 1869 return (error); 1870 } 1871 1872 int 1873 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1874 { 1875 struct rctl_rule *rule; 1876 char *inputstr; 1877 int error; 1878 1879 if (!racct_enable) 1880 return (ENOSYS); 1881 1882 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1883 if (error != 0) 1884 return (error); 1885 1886 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1887 if (error != 0) 1888 return (error); 1889 1890 sx_slock(&allproc_lock); 1891 error = rctl_string_to_rule(inputstr, &rule); 1892 free(inputstr, M_RCTL); 1893 if (error != 0) { 1894 sx_sunlock(&allproc_lock); 1895 return (error); 1896 } 1897 /* 1898 * The 'per' part of a rule is optional. 1899 */ 1900 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1901 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1902 rule->rr_per = rule->rr_subject_type; 1903 1904 if (!rctl_rule_fully_specified(rule)) { 1905 error = EINVAL; 1906 goto out; 1907 } 1908 1909 error = rctl_rule_add(rule); 1910 1911 out: 1912 rctl_rule_release(rule); 1913 sx_sunlock(&allproc_lock); 1914 return (error); 1915 } 1916 1917 int 1918 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1919 { 1920 struct rctl_rule *filter; 1921 char *inputstr; 1922 int error; 1923 1924 if (!racct_enable) 1925 return (ENOSYS); 1926 1927 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1928 if (error != 0) 1929 return (error); 1930 1931 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1932 if (error != 0) 1933 return (error); 1934 1935 sx_slock(&allproc_lock); 1936 error = rctl_string_to_rule(inputstr, &filter); 1937 free(inputstr, M_RCTL); 1938 if (error != 0) { 1939 sx_sunlock(&allproc_lock); 1940 return (error); 1941 } 1942 1943 error = rctl_rule_remove(filter); 1944 rctl_rule_release(filter); 1945 sx_sunlock(&allproc_lock); 1946 1947 return (error); 1948 } 1949 1950 /* 1951 * Update RCTL rule list after credential change. 1952 */ 1953 void 1954 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1955 { 1956 LIST_HEAD(, rctl_rule_link) newrules; 1957 struct rctl_rule_link *link, *newlink; 1958 struct uidinfo *newuip; 1959 struct loginclass *newlc; 1960 struct prison_racct *newprr; 1961 int rulecnt, i; 1962 1963 if (!racct_enable) 1964 return; 1965 1966 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1967 1968 newuip = newcred->cr_ruidinfo; 1969 newlc = newcred->cr_loginclass; 1970 newprr = newcred->cr_prison->pr_prison_racct; 1971 1972 LIST_INIT(&newrules); 1973 1974 again: 1975 /* 1976 * First, count the rules that apply to the process with new 1977 * credentials. 1978 */ 1979 rulecnt = 0; 1980 RACCT_LOCK(); 1981 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1982 if (link->rrl_rule->rr_subject_type == 1983 RCTL_SUBJECT_TYPE_PROCESS) 1984 rulecnt++; 1985 } 1986 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 1987 rulecnt++; 1988 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 1989 rulecnt++; 1990 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 1991 rulecnt++; 1992 RACCT_UNLOCK(); 1993 1994 /* 1995 * Create temporary list. We've dropped the rctl_lock in order 1996 * to use M_WAITOK. 1997 */ 1998 for (i = 0; i < rulecnt; i++) { 1999 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 2000 newlink->rrl_rule = NULL; 2001 newlink->rrl_exceeded = 0; 2002 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 2003 } 2004 2005 newlink = LIST_FIRST(&newrules); 2006 2007 /* 2008 * Assign rules to the newly allocated list entries. 2009 */ 2010 RACCT_LOCK(); 2011 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2012 if (link->rrl_rule->rr_subject_type == 2013 RCTL_SUBJECT_TYPE_PROCESS) { 2014 if (newlink == NULL) 2015 goto goaround; 2016 rctl_rule_acquire(link->rrl_rule); 2017 newlink->rrl_rule = link->rrl_rule; 2018 newlink->rrl_exceeded = link->rrl_exceeded; 2019 newlink = LIST_NEXT(newlink, rrl_next); 2020 rulecnt--; 2021 } 2022 } 2023 2024 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2025 if (newlink == NULL) 2026 goto goaround; 2027 rctl_rule_acquire(link->rrl_rule); 2028 newlink->rrl_rule = link->rrl_rule; 2029 newlink->rrl_exceeded = link->rrl_exceeded; 2030 newlink = LIST_NEXT(newlink, rrl_next); 2031 rulecnt--; 2032 } 2033 2034 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2035 if (newlink == NULL) 2036 goto goaround; 2037 rctl_rule_acquire(link->rrl_rule); 2038 newlink->rrl_rule = link->rrl_rule; 2039 newlink->rrl_exceeded = link->rrl_exceeded; 2040 newlink = LIST_NEXT(newlink, rrl_next); 2041 rulecnt--; 2042 } 2043 2044 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2045 if (newlink == NULL) 2046 goto goaround; 2047 rctl_rule_acquire(link->rrl_rule); 2048 newlink->rrl_rule = link->rrl_rule; 2049 newlink->rrl_exceeded = link->rrl_exceeded; 2050 newlink = LIST_NEXT(newlink, rrl_next); 2051 rulecnt--; 2052 } 2053 2054 if (rulecnt == 0) { 2055 /* 2056 * Free the old rule list. 2057 */ 2058 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2059 link = LIST_FIRST(&p->p_racct->r_rule_links); 2060 LIST_REMOVE(link, rrl_next); 2061 rctl_rule_release(link->rrl_rule); 2062 uma_zfree(rctl_rule_link_zone, link); 2063 } 2064 2065 /* 2066 * Replace lists and we're done. 2067 * 2068 * XXX: Is there any way to switch list heads instead 2069 * of iterating here? 2070 */ 2071 while (!LIST_EMPTY(&newrules)) { 2072 newlink = LIST_FIRST(&newrules); 2073 LIST_REMOVE(newlink, rrl_next); 2074 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2075 newlink, rrl_next); 2076 } 2077 2078 RACCT_UNLOCK(); 2079 2080 return; 2081 } 2082 2083 goaround: 2084 RACCT_UNLOCK(); 2085 2086 /* 2087 * Rule list changed while we were not holding the rctl_lock. 2088 * Free the new list and try again. 2089 */ 2090 while (!LIST_EMPTY(&newrules)) { 2091 newlink = LIST_FIRST(&newrules); 2092 LIST_REMOVE(newlink, rrl_next); 2093 if (newlink->rrl_rule != NULL) 2094 rctl_rule_release(newlink->rrl_rule); 2095 uma_zfree(rctl_rule_link_zone, newlink); 2096 } 2097 2098 goto again; 2099 } 2100 2101 /* 2102 * Assign RCTL rules to the newly created process. 2103 */ 2104 int 2105 rctl_proc_fork(struct proc *parent, struct proc *child) 2106 { 2107 struct rctl_rule *rule; 2108 struct rctl_rule_link *link; 2109 int error; 2110 2111 ASSERT_RACCT_ENABLED(); 2112 RACCT_LOCK_ASSERT(); 2113 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2114 2115 LIST_INIT(&child->p_racct->r_rule_links); 2116 2117 /* 2118 * Go through limits applicable to the parent and assign them 2119 * to the child. Rules with 'process' subject have to be duplicated 2120 * in order to make their rr_subject point to the new process. 2121 */ 2122 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2123 if (link->rrl_rule->rr_subject_type == 2124 RCTL_SUBJECT_TYPE_PROCESS) { 2125 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2126 if (rule == NULL) 2127 goto fail; 2128 KASSERT(rule->rr_subject.rs_proc == parent, 2129 ("rule->rr_subject.rs_proc != parent")); 2130 rule->rr_subject.rs_proc = child; 2131 error = rctl_racct_add_rule_locked(child->p_racct, 2132 rule); 2133 rctl_rule_release(rule); 2134 if (error != 0) 2135 goto fail; 2136 } else { 2137 error = rctl_racct_add_rule_locked(child->p_racct, 2138 link->rrl_rule); 2139 if (error != 0) 2140 goto fail; 2141 } 2142 } 2143 2144 return (0); 2145 2146 fail: 2147 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2148 link = LIST_FIRST(&child->p_racct->r_rule_links); 2149 LIST_REMOVE(link, rrl_next); 2150 rctl_rule_release(link->rrl_rule); 2151 uma_zfree(rctl_rule_link_zone, link); 2152 } 2153 2154 return (EAGAIN); 2155 } 2156 2157 /* 2158 * Release rules attached to the racct. 2159 */ 2160 void 2161 rctl_racct_release(struct racct *racct) 2162 { 2163 struct rctl_rule_link *link; 2164 2165 ASSERT_RACCT_ENABLED(); 2166 RACCT_LOCK_ASSERT(); 2167 2168 while (!LIST_EMPTY(&racct->r_rule_links)) { 2169 link = LIST_FIRST(&racct->r_rule_links); 2170 LIST_REMOVE(link, rrl_next); 2171 rctl_rule_release(link->rrl_rule); 2172 uma_zfree(rctl_rule_link_zone, link); 2173 } 2174 } 2175 2176 static void 2177 rctl_init(void) 2178 { 2179 2180 if (!racct_enable) 2181 return; 2182 2183 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2184 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2185 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2186 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2187 UMA_ALIGN_PTR, 0); 2188 2189 /* 2190 * Set default values, making sure not to overwrite the ones 2191 * fetched from tunables. Most of those could be set at the 2192 * declaration, except for the rctl_throttle_max - we cannot 2193 * set it there due to hz not being compile time constant. 2194 */ 2195 if (rctl_throttle_min < 1) 2196 rctl_throttle_min = 1; 2197 if (rctl_throttle_max < rctl_throttle_min) 2198 rctl_throttle_max = 2 * hz; 2199 if (rctl_throttle_pct < 0) 2200 rctl_throttle_pct = 100; 2201 if (rctl_throttle_pct2 < 0) 2202 rctl_throttle_pct2 = 100; 2203 } 2204 2205 #else /* !RCTL */ 2206 2207 int 2208 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2209 { 2210 2211 return (ENOSYS); 2212 } 2213 2214 int 2215 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2216 { 2217 2218 return (ENOSYS); 2219 } 2220 2221 int 2222 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2223 { 2224 2225 return (ENOSYS); 2226 } 2227 2228 int 2229 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2230 { 2231 2232 return (ENOSYS); 2233 } 2234 2235 int 2236 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2237 { 2238 2239 return (ENOSYS); 2240 } 2241 2242 #endif /* !RCTL */ 2243