1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <sys/param.h> 37 #include <sys/devctl.h> 38 #include <sys/malloc.h> 39 #include <sys/queue.h> 40 #include <sys/refcount.h> 41 #include <sys/jail.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/loginclass.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/racct.h> 48 #include <sys/rctl.h> 49 #include <sys/resourcevar.h> 50 #include <sys/sx.h> 51 #include <sys/sysent.h> 52 #include <sys/sysproto.h> 53 #include <sys/systm.h> 54 #include <sys/types.h> 55 #include <sys/eventhandler.h> 56 #include <sys/lock.h> 57 #include <sys/mutex.h> 58 #include <sys/rwlock.h> 59 #include <sys/sbuf.h> 60 #include <sys/taskqueue.h> 61 #include <sys/tree.h> 62 #include <vm/uma.h> 63 64 #ifdef RCTL 65 #ifndef RACCT 66 #error "The RCTL option requires the RACCT option" 67 #endif 68 69 FEATURE(rctl, "Resource Limits"); 70 71 #define HRF_DEFAULT 0 72 #define HRF_DONT_INHERIT 1 73 #define HRF_DONT_ACCUMULATE 2 74 75 #define RCTL_MAX_INBUFSIZE 4 * 1024 76 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 77 #define RCTL_LOG_BUFSIZE 128 78 79 #define RCTL_PCPU_SHIFT (10 * 1000000) 80 81 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 82 static int rctl_log_rate_limit = 10; 83 static int rctl_devctl_rate_limit = 10; 84 85 /* 86 * Values below are initialized in rctl_init(). 87 */ 88 static int rctl_throttle_min = -1; 89 static int rctl_throttle_max = -1; 90 static int rctl_throttle_pct = -1; 91 static int rctl_throttle_pct2 = -1; 92 93 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 94 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 95 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 96 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 97 98 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 99 "Resource Limits"); 100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 101 &rctl_maxbufsize, 0, "Maximum output buffer size"); 102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 103 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 104 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 105 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 107 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 108 &rctl_throttle_min_sysctl, "IU", 109 "Shortest throttling duration, in hz"); 110 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 111 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 112 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 113 &rctl_throttle_max_sysctl, "IU", 114 "Longest throttling duration, in hz"); 115 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 117 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 118 &rctl_throttle_pct_sysctl, "IU", 119 "Throttling penalty for process consumption, in percent"); 120 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 121 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 122 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 123 &rctl_throttle_pct2_sysctl, "IU", 124 "Throttling penalty for container consumption, in percent"); 125 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 126 127 /* 128 * 'rctl_rule_link' connects a rule with every racct it's related to. 129 * For example, rule 'user:X:openfiles:deny=N/process' is linked 130 * with uidinfo for user X, and to each process of that user. 131 */ 132 struct rctl_rule_link { 133 LIST_ENTRY(rctl_rule_link) rrl_next; 134 struct rctl_rule *rrl_rule; 135 int rrl_exceeded; 136 }; 137 138 struct dict { 139 const char *d_name; 140 int d_value; 141 }; 142 143 static struct dict subjectnames[] = { 144 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 145 { "user", RCTL_SUBJECT_TYPE_USER }, 146 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 147 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 148 { NULL, -1 }}; 149 150 static struct dict resourcenames[] = { 151 { "cputime", RACCT_CPU }, 152 { "datasize", RACCT_DATA }, 153 { "stacksize", RACCT_STACK }, 154 { "coredumpsize", RACCT_CORE }, 155 { "memoryuse", RACCT_RSS }, 156 { "memorylocked", RACCT_MEMLOCK }, 157 { "maxproc", RACCT_NPROC }, 158 { "openfiles", RACCT_NOFILE }, 159 { "vmemoryuse", RACCT_VMEM }, 160 { "pseudoterminals", RACCT_NPTS }, 161 { "swapuse", RACCT_SWAP }, 162 { "nthr", RACCT_NTHR }, 163 { "msgqqueued", RACCT_MSGQQUEUED }, 164 { "msgqsize", RACCT_MSGQSIZE }, 165 { "nmsgq", RACCT_NMSGQ }, 166 { "nsem", RACCT_NSEM }, 167 { "nsemop", RACCT_NSEMOP }, 168 { "nshm", RACCT_NSHM }, 169 { "shmsize", RACCT_SHMSIZE }, 170 { "wallclock", RACCT_WALLCLOCK }, 171 { "pcpu", RACCT_PCTCPU }, 172 { "readbps", RACCT_READBPS }, 173 { "writebps", RACCT_WRITEBPS }, 174 { "readiops", RACCT_READIOPS }, 175 { "writeiops", RACCT_WRITEIOPS }, 176 { NULL, -1 }}; 177 178 static struct dict actionnames[] = { 179 { "sighup", RCTL_ACTION_SIGHUP }, 180 { "sigint", RCTL_ACTION_SIGINT }, 181 { "sigquit", RCTL_ACTION_SIGQUIT }, 182 { "sigill", RCTL_ACTION_SIGILL }, 183 { "sigtrap", RCTL_ACTION_SIGTRAP }, 184 { "sigabrt", RCTL_ACTION_SIGABRT }, 185 { "sigemt", RCTL_ACTION_SIGEMT }, 186 { "sigfpe", RCTL_ACTION_SIGFPE }, 187 { "sigkill", RCTL_ACTION_SIGKILL }, 188 { "sigbus", RCTL_ACTION_SIGBUS }, 189 { "sigsegv", RCTL_ACTION_SIGSEGV }, 190 { "sigsys", RCTL_ACTION_SIGSYS }, 191 { "sigpipe", RCTL_ACTION_SIGPIPE }, 192 { "sigalrm", RCTL_ACTION_SIGALRM }, 193 { "sigterm", RCTL_ACTION_SIGTERM }, 194 { "sigurg", RCTL_ACTION_SIGURG }, 195 { "sigstop", RCTL_ACTION_SIGSTOP }, 196 { "sigtstp", RCTL_ACTION_SIGTSTP }, 197 { "sigchld", RCTL_ACTION_SIGCHLD }, 198 { "sigttin", RCTL_ACTION_SIGTTIN }, 199 { "sigttou", RCTL_ACTION_SIGTTOU }, 200 { "sigio", RCTL_ACTION_SIGIO }, 201 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 202 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 203 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 204 { "sigprof", RCTL_ACTION_SIGPROF }, 205 { "sigwinch", RCTL_ACTION_SIGWINCH }, 206 { "siginfo", RCTL_ACTION_SIGINFO }, 207 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 208 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 209 { "sigthr", RCTL_ACTION_SIGTHR }, 210 { "deny", RCTL_ACTION_DENY }, 211 { "log", RCTL_ACTION_LOG }, 212 { "devctl", RCTL_ACTION_DEVCTL }, 213 { "throttle", RCTL_ACTION_THROTTLE }, 214 { NULL, -1 }}; 215 216 static void rctl_init(void); 217 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 218 219 static uma_zone_t rctl_rule_zone; 220 static uma_zone_t rctl_rule_link_zone; 221 222 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 223 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 224 225 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 226 227 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 228 { 229 int error, val = rctl_throttle_min; 230 231 error = sysctl_handle_int(oidp, &val, 0, req); 232 if (error || !req->newptr) 233 return (error); 234 if (val < 1 || val > rctl_throttle_max) 235 return (EINVAL); 236 237 RACCT_LOCK(); 238 rctl_throttle_min = val; 239 RACCT_UNLOCK(); 240 241 return (0); 242 } 243 244 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 245 { 246 int error, val = rctl_throttle_max; 247 248 error = sysctl_handle_int(oidp, &val, 0, req); 249 if (error || !req->newptr) 250 return (error); 251 if (val < rctl_throttle_min) 252 return (EINVAL); 253 254 RACCT_LOCK(); 255 rctl_throttle_max = val; 256 RACCT_UNLOCK(); 257 258 return (0); 259 } 260 261 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 262 { 263 int error, val = rctl_throttle_pct; 264 265 error = sysctl_handle_int(oidp, &val, 0, req); 266 if (error || !req->newptr) 267 return (error); 268 if (val < 0) 269 return (EINVAL); 270 271 RACCT_LOCK(); 272 rctl_throttle_pct = val; 273 RACCT_UNLOCK(); 274 275 return (0); 276 } 277 278 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 279 { 280 int error, val = rctl_throttle_pct2; 281 282 error = sysctl_handle_int(oidp, &val, 0, req); 283 if (error || !req->newptr) 284 return (error); 285 if (val < 0) 286 return (EINVAL); 287 288 RACCT_LOCK(); 289 rctl_throttle_pct2 = val; 290 RACCT_UNLOCK(); 291 292 return (0); 293 } 294 295 static const char * 296 rctl_subject_type_name(int subject) 297 { 298 int i; 299 300 for (i = 0; subjectnames[i].d_name != NULL; i++) { 301 if (subjectnames[i].d_value == subject) 302 return (subjectnames[i].d_name); 303 } 304 305 panic("rctl_subject_type_name: unknown subject type %d", subject); 306 } 307 308 static const char * 309 rctl_action_name(int action) 310 { 311 int i; 312 313 for (i = 0; actionnames[i].d_name != NULL; i++) { 314 if (actionnames[i].d_value == action) 315 return (actionnames[i].d_name); 316 } 317 318 panic("rctl_action_name: unknown action %d", action); 319 } 320 321 const char * 322 rctl_resource_name(int resource) 323 { 324 int i; 325 326 for (i = 0; resourcenames[i].d_name != NULL; i++) { 327 if (resourcenames[i].d_value == resource) 328 return (resourcenames[i].d_name); 329 } 330 331 panic("rctl_resource_name: unknown resource %d", resource); 332 } 333 334 static struct racct * 335 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 336 { 337 struct ucred *cred = p->p_ucred; 338 339 ASSERT_RACCT_ENABLED(); 340 RACCT_LOCK_ASSERT(); 341 342 switch (rule->rr_per) { 343 case RCTL_SUBJECT_TYPE_PROCESS: 344 return (p->p_racct); 345 case RCTL_SUBJECT_TYPE_USER: 346 return (cred->cr_ruidinfo->ui_racct); 347 case RCTL_SUBJECT_TYPE_LOGINCLASS: 348 return (cred->cr_loginclass->lc_racct); 349 case RCTL_SUBJECT_TYPE_JAIL: 350 return (cred->cr_prison->pr_prison_racct->prr_racct); 351 default: 352 panic("%s: unknown per %d", __func__, rule->rr_per); 353 } 354 } 355 356 /* 357 * Return the amount of resource that can be allocated by 'p' before 358 * hitting 'rule'. 359 */ 360 static int64_t 361 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 362 { 363 const struct racct *racct; 364 int64_t available; 365 366 ASSERT_RACCT_ENABLED(); 367 RACCT_LOCK_ASSERT(); 368 369 racct = rctl_proc_rule_to_racct(p, rule); 370 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 371 372 return (available); 373 } 374 375 /* 376 * Called every second for proc, uidinfo, loginclass, and jail containers. 377 * If the limit isn't exceeded, it decreases the usage amount to zero. 378 * Otherwise, it decreases it by the value of the limit. This way 379 * resource consumption exceeding the limit "carries over" to the next 380 * period. 381 */ 382 void 383 rctl_throttle_decay(struct racct *racct, int resource) 384 { 385 struct rctl_rule *rule; 386 struct rctl_rule_link *link; 387 int64_t minavailable; 388 389 ASSERT_RACCT_ENABLED(); 390 RACCT_LOCK_ASSERT(); 391 392 minavailable = INT64_MAX; 393 394 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 395 rule = link->rrl_rule; 396 397 if (rule->rr_resource != resource) 398 continue; 399 if (rule->rr_action != RCTL_ACTION_THROTTLE) 400 continue; 401 402 if (rule->rr_amount < minavailable) 403 minavailable = rule->rr_amount; 404 } 405 406 if (racct->r_resources[resource] < minavailable) { 407 racct->r_resources[resource] = 0; 408 } else { 409 /* 410 * Cap utilization counter at ten times the limit. Otherwise, 411 * if we changed the rule lowering the allowed amount, it could 412 * take unreasonably long time for the accumulated resource 413 * usage to drop. 414 */ 415 if (racct->r_resources[resource] > minavailable * 10) 416 racct->r_resources[resource] = minavailable * 10; 417 418 racct->r_resources[resource] -= minavailable; 419 } 420 } 421 422 /* 423 * Special version of rctl_get_available() for the %CPU resource. 424 * We slightly cheat here and return less than we normally would. 425 */ 426 int64_t 427 rctl_pcpu_available(const struct proc *p) { 428 struct rctl_rule *rule; 429 struct rctl_rule_link *link; 430 int64_t available, minavailable, limit; 431 432 ASSERT_RACCT_ENABLED(); 433 RACCT_LOCK_ASSERT(); 434 435 minavailable = INT64_MAX; 436 limit = 0; 437 438 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 439 rule = link->rrl_rule; 440 if (rule->rr_resource != RACCT_PCTCPU) 441 continue; 442 if (rule->rr_action != RCTL_ACTION_DENY) 443 continue; 444 available = rctl_available_resource(p, rule); 445 if (available < minavailable) { 446 minavailable = available; 447 limit = rule->rr_amount; 448 } 449 } 450 451 /* 452 * Return slightly less than actual value of the available 453 * %cpu resource. This makes %cpu throttling more aggressive 454 * and lets us act sooner than the limits are already exceeded. 455 */ 456 if (limit != 0) { 457 if (limit > 2 * RCTL_PCPU_SHIFT) 458 minavailable -= RCTL_PCPU_SHIFT; 459 else 460 minavailable -= (limit / 2); 461 } 462 463 return (minavailable); 464 } 465 466 static uint64_t 467 xadd(uint64_t a, uint64_t b) 468 { 469 uint64_t c; 470 471 c = a + b; 472 473 /* 474 * Detect overflow. 475 */ 476 if (c < a || c < b) 477 return (UINT64_MAX); 478 479 return (c); 480 } 481 482 static uint64_t 483 xmul(uint64_t a, uint64_t b) 484 { 485 486 if (b != 0 && a > UINT64_MAX / b) 487 return (UINT64_MAX); 488 489 return (a * b); 490 } 491 492 /* 493 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 494 * to what it keeps allocated now. Returns non-zero if the allocation should 495 * be denied, 0 otherwise. 496 */ 497 int 498 rctl_enforce(struct proc *p, int resource, uint64_t amount) 499 { 500 static struct timeval log_lasttime, devctl_lasttime; 501 static int log_curtime = 0, devctl_curtime = 0; 502 struct rctl_rule *rule; 503 struct rctl_rule_link *link; 504 struct sbuf sb; 505 char *buf; 506 int64_t available; 507 uint64_t sleep_ms, sleep_ratio; 508 int should_deny = 0; 509 510 ASSERT_RACCT_ENABLED(); 511 RACCT_LOCK_ASSERT(); 512 513 /* 514 * There may be more than one matching rule; go through all of them. 515 * Denial should be done last, after logging and sending signals. 516 */ 517 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 518 rule = link->rrl_rule; 519 if (rule->rr_resource != resource) 520 continue; 521 522 available = rctl_available_resource(p, rule); 523 if (available >= (int64_t)amount) { 524 link->rrl_exceeded = 0; 525 continue; 526 } 527 528 switch (rule->rr_action) { 529 case RCTL_ACTION_DENY: 530 should_deny = 1; 531 continue; 532 case RCTL_ACTION_LOG: 533 /* 534 * If rrl_exceeded != 0, it means we've already 535 * logged a warning for this process. 536 */ 537 if (link->rrl_exceeded != 0) 538 continue; 539 540 /* 541 * If the process state is not fully initialized yet, 542 * we can't access most of the required fields, e.g. 543 * p->p_comm. This happens when called from fork1(). 544 * Ignore this rule for now; it will be processed just 545 * after fork, when called from racct_proc_fork_done(). 546 */ 547 if (p->p_state != PRS_NORMAL) 548 continue; 549 550 if (!ppsratecheck(&log_lasttime, &log_curtime, 551 rctl_log_rate_limit)) 552 continue; 553 554 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 555 if (buf == NULL) { 556 printf("rctl_enforce: out of memory\n"); 557 continue; 558 } 559 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 560 rctl_rule_to_sbuf(&sb, rule); 561 sbuf_finish(&sb); 562 printf("rctl: rule \"%s\" matched by pid %d " 563 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 564 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 565 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 566 sbuf_delete(&sb); 567 free(buf, M_RCTL); 568 link->rrl_exceeded = 1; 569 continue; 570 case RCTL_ACTION_DEVCTL: 571 if (link->rrl_exceeded != 0) 572 continue; 573 574 if (p->p_state != PRS_NORMAL) 575 continue; 576 577 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 578 rctl_devctl_rate_limit)) 579 continue; 580 581 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 582 if (buf == NULL) { 583 printf("rctl_enforce: out of memory\n"); 584 continue; 585 } 586 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 587 sbuf_printf(&sb, "rule="); 588 rctl_rule_to_sbuf(&sb, rule); 589 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 590 p->p_pid, p->p_ucred->cr_ruid, 591 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 592 sbuf_finish(&sb); 593 devctl_notify("RCTL", "rule", "matched", 594 sbuf_data(&sb)); 595 sbuf_delete(&sb); 596 free(buf, M_RCTL); 597 link->rrl_exceeded = 1; 598 continue; 599 case RCTL_ACTION_THROTTLE: 600 if (p->p_state != PRS_NORMAL) 601 continue; 602 603 if (rule->rr_amount == 0) { 604 racct_proc_throttle(p, rctl_throttle_max); 605 continue; 606 } 607 608 /* 609 * Make the process sleep for a fraction of second 610 * proportional to the ratio of process' resource 611 * utilization compared to the limit. The point is 612 * to penalize resource hogs: processes that consume 613 * more of the available resources sleep for longer. 614 * 615 * We're trying to defer division until the very end, 616 * to minimize the rounding effects. The following 617 * calculation could have been written in a clearer 618 * way like this: 619 * 620 * sleep_ms = hz * p->p_racct->r_resources[resource] / 621 * rule->rr_amount; 622 * sleep_ms *= rctl_throttle_pct / 100; 623 * if (sleep_ms < rctl_throttle_min) 624 * sleep_ms = rctl_throttle_min; 625 * 626 */ 627 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 628 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 629 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 630 sleep_ms = rctl_throttle_min * rule->rr_amount; 631 632 /* 633 * Multiply that by the ratio of the resource 634 * consumption for the container compared to the limit, 635 * squared. In other words, a process in a container 636 * that is two times over the limit will be throttled 637 * four times as much for hitting the same rule. The 638 * point is to penalize processes more if the container 639 * itself (eg certain UID or jail) is above the limit. 640 */ 641 if (available < 0) 642 sleep_ratio = -available / rule->rr_amount; 643 else 644 sleep_ratio = 0; 645 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 646 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 647 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 648 649 /* 650 * Finally the division. 651 */ 652 sleep_ms /= rule->rr_amount; 653 654 if (sleep_ms > rctl_throttle_max) 655 sleep_ms = rctl_throttle_max; 656 #if 0 657 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n", 658 __func__, p->p_pid, p->p_comm, 659 p->p_racct->r_resources[resource], 660 rule->rr_amount, (uintmax_t)sleep_ms, 661 (uintmax_t)sleep_ratio, (intmax_t)available); 662 #endif 663 664 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 665 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 666 racct_proc_throttle(p, sleep_ms); 667 continue; 668 default: 669 if (link->rrl_exceeded != 0) 670 continue; 671 672 if (p->p_state != PRS_NORMAL) 673 continue; 674 675 KASSERT(rule->rr_action > 0 && 676 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 677 ("rctl_enforce: unknown action %d", 678 rule->rr_action)); 679 680 /* 681 * We're using the fact that RCTL_ACTION_SIG* values 682 * are equal to their counterparts from sys/signal.h. 683 */ 684 kern_psignal(p, rule->rr_action); 685 link->rrl_exceeded = 1; 686 continue; 687 } 688 } 689 690 if (should_deny) { 691 /* 692 * Return fake error code; the caller should change it 693 * into one proper for the situation - EFSIZ, ENOMEM etc. 694 */ 695 return (EDOOFUS); 696 } 697 698 return (0); 699 } 700 701 uint64_t 702 rctl_get_limit(struct proc *p, int resource) 703 { 704 struct rctl_rule *rule; 705 struct rctl_rule_link *link; 706 uint64_t amount = UINT64_MAX; 707 708 ASSERT_RACCT_ENABLED(); 709 RACCT_LOCK_ASSERT(); 710 711 /* 712 * There may be more than one matching rule; go through all of them. 713 * Denial should be done last, after logging and sending signals. 714 */ 715 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 716 rule = link->rrl_rule; 717 if (rule->rr_resource != resource) 718 continue; 719 if (rule->rr_action != RCTL_ACTION_DENY) 720 continue; 721 if (rule->rr_amount < amount) 722 amount = rule->rr_amount; 723 } 724 725 return (amount); 726 } 727 728 uint64_t 729 rctl_get_available(struct proc *p, int resource) 730 { 731 struct rctl_rule *rule; 732 struct rctl_rule_link *link; 733 int64_t available, minavailable, allocated; 734 735 minavailable = INT64_MAX; 736 737 ASSERT_RACCT_ENABLED(); 738 RACCT_LOCK_ASSERT(); 739 740 /* 741 * There may be more than one matching rule; go through all of them. 742 * Denial should be done last, after logging and sending signals. 743 */ 744 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 745 rule = link->rrl_rule; 746 if (rule->rr_resource != resource) 747 continue; 748 if (rule->rr_action != RCTL_ACTION_DENY) 749 continue; 750 available = rctl_available_resource(p, rule); 751 if (available < minavailable) 752 minavailable = available; 753 } 754 755 /* 756 * XXX: Think about this _hard_. 757 */ 758 allocated = p->p_racct->r_resources[resource]; 759 if (minavailable < INT64_MAX - allocated) 760 minavailable += allocated; 761 if (minavailable < 0) 762 minavailable = 0; 763 764 return (minavailable); 765 } 766 767 static int 768 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 769 { 770 771 ASSERT_RACCT_ENABLED(); 772 773 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 774 if (rule->rr_subject_type != filter->rr_subject_type) 775 return (0); 776 777 switch (filter->rr_subject_type) { 778 case RCTL_SUBJECT_TYPE_PROCESS: 779 if (filter->rr_subject.rs_proc != NULL && 780 rule->rr_subject.rs_proc != 781 filter->rr_subject.rs_proc) 782 return (0); 783 break; 784 case RCTL_SUBJECT_TYPE_USER: 785 if (filter->rr_subject.rs_uip != NULL && 786 rule->rr_subject.rs_uip != 787 filter->rr_subject.rs_uip) 788 return (0); 789 break; 790 case RCTL_SUBJECT_TYPE_LOGINCLASS: 791 if (filter->rr_subject.rs_loginclass != NULL && 792 rule->rr_subject.rs_loginclass != 793 filter->rr_subject.rs_loginclass) 794 return (0); 795 break; 796 case RCTL_SUBJECT_TYPE_JAIL: 797 if (filter->rr_subject.rs_prison_racct != NULL && 798 rule->rr_subject.rs_prison_racct != 799 filter->rr_subject.rs_prison_racct) 800 return (0); 801 break; 802 default: 803 panic("rctl_rule_matches: unknown subject type %d", 804 filter->rr_subject_type); 805 } 806 } 807 808 if (filter->rr_resource != RACCT_UNDEFINED) { 809 if (rule->rr_resource != filter->rr_resource) 810 return (0); 811 } 812 813 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 814 if (rule->rr_action != filter->rr_action) 815 return (0); 816 } 817 818 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 819 if (rule->rr_amount != filter->rr_amount) 820 return (0); 821 } 822 823 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 824 if (rule->rr_per != filter->rr_per) 825 return (0); 826 } 827 828 return (1); 829 } 830 831 static int 832 str2value(const char *str, int *value, struct dict *table) 833 { 834 int i; 835 836 if (value == NULL) 837 return (EINVAL); 838 839 for (i = 0; table[i].d_name != NULL; i++) { 840 if (strcasecmp(table[i].d_name, str) == 0) { 841 *value = table[i].d_value; 842 return (0); 843 } 844 } 845 846 return (EINVAL); 847 } 848 849 static int 850 str2id(const char *str, id_t *value) 851 { 852 char *end; 853 854 if (str == NULL) 855 return (EINVAL); 856 857 *value = strtoul(str, &end, 10); 858 if ((size_t)(end - str) != strlen(str)) 859 return (EINVAL); 860 861 return (0); 862 } 863 864 static int 865 str2int64(const char *str, int64_t *value) 866 { 867 char *end; 868 869 if (str == NULL) 870 return (EINVAL); 871 872 *value = strtoul(str, &end, 10); 873 if ((size_t)(end - str) != strlen(str)) 874 return (EINVAL); 875 876 if (*value < 0) 877 return (ERANGE); 878 879 return (0); 880 } 881 882 /* 883 * Connect the rule to the racct, increasing refcount for the rule. 884 */ 885 static void 886 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 887 { 888 struct rctl_rule_link *link; 889 890 ASSERT_RACCT_ENABLED(); 891 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 892 893 rctl_rule_acquire(rule); 894 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 895 link->rrl_rule = rule; 896 link->rrl_exceeded = 0; 897 898 RACCT_LOCK(); 899 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 900 RACCT_UNLOCK(); 901 } 902 903 static int 904 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 905 { 906 struct rctl_rule_link *link; 907 908 ASSERT_RACCT_ENABLED(); 909 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 910 RACCT_LOCK_ASSERT(); 911 912 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 913 if (link == NULL) 914 return (ENOMEM); 915 rctl_rule_acquire(rule); 916 link->rrl_rule = rule; 917 link->rrl_exceeded = 0; 918 919 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 920 921 return (0); 922 } 923 924 /* 925 * Remove limits for a rules matching the filter and release 926 * the refcounts for the rules, possibly freeing them. Returns 927 * the number of limit structures removed. 928 */ 929 static int 930 rctl_racct_remove_rules(struct racct *racct, 931 const struct rctl_rule *filter) 932 { 933 struct rctl_rule_link *link, *linktmp; 934 int removed = 0; 935 936 ASSERT_RACCT_ENABLED(); 937 RACCT_LOCK_ASSERT(); 938 939 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 940 if (!rctl_rule_matches(link->rrl_rule, filter)) 941 continue; 942 943 LIST_REMOVE(link, rrl_next); 944 rctl_rule_release(link->rrl_rule); 945 uma_zfree(rctl_rule_link_zone, link); 946 removed++; 947 } 948 return (removed); 949 } 950 951 static void 952 rctl_rule_acquire_subject(struct rctl_rule *rule) 953 { 954 955 ASSERT_RACCT_ENABLED(); 956 957 switch (rule->rr_subject_type) { 958 case RCTL_SUBJECT_TYPE_UNDEFINED: 959 case RCTL_SUBJECT_TYPE_PROCESS: 960 break; 961 case RCTL_SUBJECT_TYPE_JAIL: 962 if (rule->rr_subject.rs_prison_racct != NULL) 963 prison_racct_hold(rule->rr_subject.rs_prison_racct); 964 break; 965 case RCTL_SUBJECT_TYPE_USER: 966 if (rule->rr_subject.rs_uip != NULL) 967 uihold(rule->rr_subject.rs_uip); 968 break; 969 case RCTL_SUBJECT_TYPE_LOGINCLASS: 970 if (rule->rr_subject.rs_loginclass != NULL) 971 loginclass_hold(rule->rr_subject.rs_loginclass); 972 break; 973 default: 974 panic("rctl_rule_acquire_subject: unknown subject type %d", 975 rule->rr_subject_type); 976 } 977 } 978 979 static void 980 rctl_rule_release_subject(struct rctl_rule *rule) 981 { 982 983 ASSERT_RACCT_ENABLED(); 984 985 switch (rule->rr_subject_type) { 986 case RCTL_SUBJECT_TYPE_UNDEFINED: 987 case RCTL_SUBJECT_TYPE_PROCESS: 988 break; 989 case RCTL_SUBJECT_TYPE_JAIL: 990 if (rule->rr_subject.rs_prison_racct != NULL) 991 prison_racct_free(rule->rr_subject.rs_prison_racct); 992 break; 993 case RCTL_SUBJECT_TYPE_USER: 994 if (rule->rr_subject.rs_uip != NULL) 995 uifree(rule->rr_subject.rs_uip); 996 break; 997 case RCTL_SUBJECT_TYPE_LOGINCLASS: 998 if (rule->rr_subject.rs_loginclass != NULL) 999 loginclass_free(rule->rr_subject.rs_loginclass); 1000 break; 1001 default: 1002 panic("rctl_rule_release_subject: unknown subject type %d", 1003 rule->rr_subject_type); 1004 } 1005 } 1006 1007 struct rctl_rule * 1008 rctl_rule_alloc(int flags) 1009 { 1010 struct rctl_rule *rule; 1011 1012 ASSERT_RACCT_ENABLED(); 1013 1014 rule = uma_zalloc(rctl_rule_zone, flags); 1015 if (rule == NULL) 1016 return (NULL); 1017 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1018 rule->rr_subject.rs_proc = NULL; 1019 rule->rr_subject.rs_uip = NULL; 1020 rule->rr_subject.rs_loginclass = NULL; 1021 rule->rr_subject.rs_prison_racct = NULL; 1022 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1023 rule->rr_resource = RACCT_UNDEFINED; 1024 rule->rr_action = RCTL_ACTION_UNDEFINED; 1025 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1026 refcount_init(&rule->rr_refcount, 1); 1027 1028 return (rule); 1029 } 1030 1031 struct rctl_rule * 1032 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1033 { 1034 struct rctl_rule *copy; 1035 1036 ASSERT_RACCT_ENABLED(); 1037 1038 copy = uma_zalloc(rctl_rule_zone, flags); 1039 if (copy == NULL) 1040 return (NULL); 1041 copy->rr_subject_type = rule->rr_subject_type; 1042 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1043 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1044 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1045 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1046 copy->rr_per = rule->rr_per; 1047 copy->rr_resource = rule->rr_resource; 1048 copy->rr_action = rule->rr_action; 1049 copy->rr_amount = rule->rr_amount; 1050 refcount_init(©->rr_refcount, 1); 1051 rctl_rule_acquire_subject(copy); 1052 1053 return (copy); 1054 } 1055 1056 void 1057 rctl_rule_acquire(struct rctl_rule *rule) 1058 { 1059 1060 ASSERT_RACCT_ENABLED(); 1061 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1062 1063 refcount_acquire(&rule->rr_refcount); 1064 } 1065 1066 static void 1067 rctl_rule_free(void *context, int pending) 1068 { 1069 struct rctl_rule *rule; 1070 1071 rule = (struct rctl_rule *)context; 1072 1073 ASSERT_RACCT_ENABLED(); 1074 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1075 1076 /* 1077 * We don't need locking here; rule is guaranteed to be inaccessible. 1078 */ 1079 1080 rctl_rule_release_subject(rule); 1081 uma_zfree(rctl_rule_zone, rule); 1082 } 1083 1084 void 1085 rctl_rule_release(struct rctl_rule *rule) 1086 { 1087 1088 ASSERT_RACCT_ENABLED(); 1089 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1090 1091 if (refcount_release(&rule->rr_refcount)) { 1092 /* 1093 * rctl_rule_release() is often called when iterating 1094 * over all the uidinfo structures in the system, 1095 * holding uihashtbl_lock. Since rctl_rule_free() 1096 * might end up calling uifree(), this would lead 1097 * to lock recursion. Use taskqueue to avoid this. 1098 */ 1099 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1100 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1101 } 1102 } 1103 1104 static int 1105 rctl_rule_fully_specified(const struct rctl_rule *rule) 1106 { 1107 1108 ASSERT_RACCT_ENABLED(); 1109 1110 switch (rule->rr_subject_type) { 1111 case RCTL_SUBJECT_TYPE_UNDEFINED: 1112 return (0); 1113 case RCTL_SUBJECT_TYPE_PROCESS: 1114 if (rule->rr_subject.rs_proc == NULL) 1115 return (0); 1116 break; 1117 case RCTL_SUBJECT_TYPE_USER: 1118 if (rule->rr_subject.rs_uip == NULL) 1119 return (0); 1120 break; 1121 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1122 if (rule->rr_subject.rs_loginclass == NULL) 1123 return (0); 1124 break; 1125 case RCTL_SUBJECT_TYPE_JAIL: 1126 if (rule->rr_subject.rs_prison_racct == NULL) 1127 return (0); 1128 break; 1129 default: 1130 panic("rctl_rule_fully_specified: unknown subject type %d", 1131 rule->rr_subject_type); 1132 } 1133 if (rule->rr_resource == RACCT_UNDEFINED) 1134 return (0); 1135 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1136 return (0); 1137 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1138 return (0); 1139 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1140 return (0); 1141 1142 return (1); 1143 } 1144 1145 static int 1146 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1147 { 1148 struct rctl_rule *rule; 1149 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1150 *amountstr, *perstr; 1151 id_t id; 1152 int error = 0; 1153 1154 ASSERT_RACCT_ENABLED(); 1155 1156 rule = rctl_rule_alloc(M_WAITOK); 1157 1158 subjectstr = strsep(&rulestr, ":"); 1159 subject_idstr = strsep(&rulestr, ":"); 1160 resourcestr = strsep(&rulestr, ":"); 1161 actionstr = strsep(&rulestr, "=/"); 1162 amountstr = strsep(&rulestr, "/"); 1163 perstr = rulestr; 1164 1165 if (subjectstr == NULL || subjectstr[0] == '\0') 1166 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1167 else { 1168 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1169 if (error != 0) 1170 goto out; 1171 } 1172 1173 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1174 rule->rr_subject.rs_proc = NULL; 1175 rule->rr_subject.rs_uip = NULL; 1176 rule->rr_subject.rs_loginclass = NULL; 1177 rule->rr_subject.rs_prison_racct = NULL; 1178 } else { 1179 switch (rule->rr_subject_type) { 1180 case RCTL_SUBJECT_TYPE_UNDEFINED: 1181 error = EINVAL; 1182 goto out; 1183 case RCTL_SUBJECT_TYPE_PROCESS: 1184 error = str2id(subject_idstr, &id); 1185 if (error != 0) 1186 goto out; 1187 sx_assert(&allproc_lock, SA_LOCKED); 1188 rule->rr_subject.rs_proc = pfind(id); 1189 if (rule->rr_subject.rs_proc == NULL) { 1190 error = ESRCH; 1191 goto out; 1192 } 1193 PROC_UNLOCK(rule->rr_subject.rs_proc); 1194 break; 1195 case RCTL_SUBJECT_TYPE_USER: 1196 error = str2id(subject_idstr, &id); 1197 if (error != 0) 1198 goto out; 1199 rule->rr_subject.rs_uip = uifind(id); 1200 break; 1201 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1202 rule->rr_subject.rs_loginclass = 1203 loginclass_find(subject_idstr); 1204 if (rule->rr_subject.rs_loginclass == NULL) { 1205 error = ENAMETOOLONG; 1206 goto out; 1207 } 1208 break; 1209 case RCTL_SUBJECT_TYPE_JAIL: 1210 rule->rr_subject.rs_prison_racct = 1211 prison_racct_find(subject_idstr); 1212 if (rule->rr_subject.rs_prison_racct == NULL) { 1213 error = ENAMETOOLONG; 1214 goto out; 1215 } 1216 break; 1217 default: 1218 panic("rctl_string_to_rule: unknown subject type %d", 1219 rule->rr_subject_type); 1220 } 1221 } 1222 1223 if (resourcestr == NULL || resourcestr[0] == '\0') 1224 rule->rr_resource = RACCT_UNDEFINED; 1225 else { 1226 error = str2value(resourcestr, &rule->rr_resource, 1227 resourcenames); 1228 if (error != 0) 1229 goto out; 1230 } 1231 1232 if (actionstr == NULL || actionstr[0] == '\0') 1233 rule->rr_action = RCTL_ACTION_UNDEFINED; 1234 else { 1235 error = str2value(actionstr, &rule->rr_action, actionnames); 1236 if (error != 0) 1237 goto out; 1238 } 1239 1240 if (amountstr == NULL || amountstr[0] == '\0') 1241 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1242 else { 1243 error = str2int64(amountstr, &rule->rr_amount); 1244 if (error != 0) 1245 goto out; 1246 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1247 if (rule->rr_amount > INT64_MAX / 1000000) { 1248 error = ERANGE; 1249 goto out; 1250 } 1251 rule->rr_amount *= 1000000; 1252 } 1253 } 1254 1255 if (perstr == NULL || perstr[0] == '\0') 1256 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1257 else { 1258 error = str2value(perstr, &rule->rr_per, subjectnames); 1259 if (error != 0) 1260 goto out; 1261 } 1262 1263 out: 1264 if (error == 0) 1265 *rulep = rule; 1266 else 1267 rctl_rule_release(rule); 1268 1269 return (error); 1270 } 1271 1272 /* 1273 * Link a rule with all the subjects it applies to. 1274 */ 1275 int 1276 rctl_rule_add(struct rctl_rule *rule) 1277 { 1278 struct proc *p; 1279 struct ucred *cred; 1280 struct uidinfo *uip; 1281 struct prison *pr; 1282 struct prison_racct *prr; 1283 struct loginclass *lc; 1284 struct rctl_rule *rule2; 1285 int match; 1286 1287 ASSERT_RACCT_ENABLED(); 1288 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1289 1290 /* 1291 * Some rules just don't make sense, like "deny" rule for an undeniable 1292 * resource. The exception are the RSS and %CPU resources - they are 1293 * not deniable in the racct sense, but the limit is enforced in 1294 * a different way. 1295 */ 1296 if (rule->rr_action == RCTL_ACTION_DENY && 1297 !RACCT_IS_DENIABLE(rule->rr_resource) && 1298 rule->rr_resource != RACCT_RSS && 1299 rule->rr_resource != RACCT_PCTCPU) { 1300 return (EOPNOTSUPP); 1301 } 1302 1303 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1304 !RACCT_IS_DECAYING(rule->rr_resource)) { 1305 return (EOPNOTSUPP); 1306 } 1307 1308 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1309 rule->rr_resource == RACCT_PCTCPU) { 1310 return (EOPNOTSUPP); 1311 } 1312 1313 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1314 RACCT_IS_SLOPPY(rule->rr_resource)) { 1315 return (EOPNOTSUPP); 1316 } 1317 1318 /* 1319 * Make sure there are no duplicated rules. Also, for the "deny" 1320 * rules, remove ones differing only by "amount". 1321 */ 1322 if (rule->rr_action == RCTL_ACTION_DENY) { 1323 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1324 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1325 rctl_rule_remove(rule2); 1326 rctl_rule_release(rule2); 1327 } else 1328 rctl_rule_remove(rule); 1329 1330 switch (rule->rr_subject_type) { 1331 case RCTL_SUBJECT_TYPE_PROCESS: 1332 p = rule->rr_subject.rs_proc; 1333 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1334 1335 rctl_racct_add_rule(p->p_racct, rule); 1336 /* 1337 * In case of per-process rule, we don't have anything more 1338 * to do. 1339 */ 1340 return (0); 1341 1342 case RCTL_SUBJECT_TYPE_USER: 1343 uip = rule->rr_subject.rs_uip; 1344 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1345 rctl_racct_add_rule(uip->ui_racct, rule); 1346 break; 1347 1348 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1349 lc = rule->rr_subject.rs_loginclass; 1350 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1351 rctl_racct_add_rule(lc->lc_racct, rule); 1352 break; 1353 1354 case RCTL_SUBJECT_TYPE_JAIL: 1355 prr = rule->rr_subject.rs_prison_racct; 1356 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1357 rctl_racct_add_rule(prr->prr_racct, rule); 1358 break; 1359 1360 default: 1361 panic("rctl_rule_add: unknown subject type %d", 1362 rule->rr_subject_type); 1363 } 1364 1365 /* 1366 * Now go through all the processes and add the new rule to the ones 1367 * it applies to. 1368 */ 1369 sx_assert(&allproc_lock, SA_LOCKED); 1370 FOREACH_PROC_IN_SYSTEM(p) { 1371 cred = p->p_ucred; 1372 switch (rule->rr_subject_type) { 1373 case RCTL_SUBJECT_TYPE_USER: 1374 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1375 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1376 break; 1377 continue; 1378 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1379 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1380 break; 1381 continue; 1382 case RCTL_SUBJECT_TYPE_JAIL: 1383 match = 0; 1384 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1385 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1386 match = 1; 1387 break; 1388 } 1389 } 1390 if (match) 1391 break; 1392 continue; 1393 default: 1394 panic("rctl_rule_add: unknown subject type %d", 1395 rule->rr_subject_type); 1396 } 1397 1398 rctl_racct_add_rule(p->p_racct, rule); 1399 } 1400 1401 return (0); 1402 } 1403 1404 static void 1405 rctl_rule_pre_callback(void) 1406 { 1407 1408 RACCT_LOCK(); 1409 } 1410 1411 static void 1412 rctl_rule_post_callback(void) 1413 { 1414 1415 RACCT_UNLOCK(); 1416 } 1417 1418 static void 1419 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1420 { 1421 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1422 int found = 0; 1423 1424 ASSERT_RACCT_ENABLED(); 1425 RACCT_LOCK_ASSERT(); 1426 1427 found += rctl_racct_remove_rules(racct, filter); 1428 1429 *((int *)arg3) += found; 1430 } 1431 1432 /* 1433 * Remove all rules that match the filter. 1434 */ 1435 int 1436 rctl_rule_remove(struct rctl_rule *filter) 1437 { 1438 struct proc *p; 1439 int found = 0; 1440 1441 ASSERT_RACCT_ENABLED(); 1442 1443 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1444 filter->rr_subject.rs_proc != NULL) { 1445 p = filter->rr_subject.rs_proc; 1446 RACCT_LOCK(); 1447 found = rctl_racct_remove_rules(p->p_racct, filter); 1448 RACCT_UNLOCK(); 1449 if (found) 1450 return (0); 1451 return (ESRCH); 1452 } 1453 1454 loginclass_racct_foreach(rctl_rule_remove_callback, 1455 rctl_rule_pre_callback, rctl_rule_post_callback, 1456 filter, (void *)&found); 1457 ui_racct_foreach(rctl_rule_remove_callback, 1458 rctl_rule_pre_callback, rctl_rule_post_callback, 1459 filter, (void *)&found); 1460 prison_racct_foreach(rctl_rule_remove_callback, 1461 rctl_rule_pre_callback, rctl_rule_post_callback, 1462 filter, (void *)&found); 1463 1464 sx_assert(&allproc_lock, SA_LOCKED); 1465 RACCT_LOCK(); 1466 FOREACH_PROC_IN_SYSTEM(p) { 1467 found += rctl_racct_remove_rules(p->p_racct, filter); 1468 } 1469 RACCT_UNLOCK(); 1470 1471 if (found) 1472 return (0); 1473 return (ESRCH); 1474 } 1475 1476 /* 1477 * Appends a rule to the sbuf. 1478 */ 1479 static void 1480 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1481 { 1482 int64_t amount; 1483 1484 ASSERT_RACCT_ENABLED(); 1485 1486 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1487 1488 switch (rule->rr_subject_type) { 1489 case RCTL_SUBJECT_TYPE_PROCESS: 1490 if (rule->rr_subject.rs_proc == NULL) 1491 sbuf_printf(sb, ":"); 1492 else 1493 sbuf_printf(sb, "%d:", 1494 rule->rr_subject.rs_proc->p_pid); 1495 break; 1496 case RCTL_SUBJECT_TYPE_USER: 1497 if (rule->rr_subject.rs_uip == NULL) 1498 sbuf_printf(sb, ":"); 1499 else 1500 sbuf_printf(sb, "%d:", 1501 rule->rr_subject.rs_uip->ui_uid); 1502 break; 1503 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1504 if (rule->rr_subject.rs_loginclass == NULL) 1505 sbuf_printf(sb, ":"); 1506 else 1507 sbuf_printf(sb, "%s:", 1508 rule->rr_subject.rs_loginclass->lc_name); 1509 break; 1510 case RCTL_SUBJECT_TYPE_JAIL: 1511 if (rule->rr_subject.rs_prison_racct == NULL) 1512 sbuf_printf(sb, ":"); 1513 else 1514 sbuf_printf(sb, "%s:", 1515 rule->rr_subject.rs_prison_racct->prr_name); 1516 break; 1517 default: 1518 panic("rctl_rule_to_sbuf: unknown subject type %d", 1519 rule->rr_subject_type); 1520 } 1521 1522 amount = rule->rr_amount; 1523 if (amount != RCTL_AMOUNT_UNDEFINED && 1524 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1525 amount /= 1000000; 1526 1527 sbuf_printf(sb, "%s:%s=%jd", 1528 rctl_resource_name(rule->rr_resource), 1529 rctl_action_name(rule->rr_action), 1530 amount); 1531 1532 if (rule->rr_per != rule->rr_subject_type) 1533 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1534 } 1535 1536 /* 1537 * Routine used by RCTL syscalls to read in input string. 1538 */ 1539 static int 1540 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1541 { 1542 char *str; 1543 int error; 1544 1545 ASSERT_RACCT_ENABLED(); 1546 1547 if (inbuflen <= 0) 1548 return (EINVAL); 1549 if (inbuflen > RCTL_MAX_INBUFSIZE) 1550 return (E2BIG); 1551 1552 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1553 error = copyinstr(inbufp, str, inbuflen, NULL); 1554 if (error != 0) { 1555 free(str, M_RCTL); 1556 return (error); 1557 } 1558 1559 *inputstr = str; 1560 1561 return (0); 1562 } 1563 1564 /* 1565 * Routine used by RCTL syscalls to write out output string. 1566 */ 1567 static int 1568 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1569 { 1570 int error; 1571 1572 ASSERT_RACCT_ENABLED(); 1573 1574 if (outputsbuf == NULL) 1575 return (0); 1576 1577 sbuf_finish(outputsbuf); 1578 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1579 sbuf_delete(outputsbuf); 1580 return (ERANGE); 1581 } 1582 error = copyout(sbuf_data(outputsbuf), outbufp, 1583 sbuf_len(outputsbuf) + 1); 1584 sbuf_delete(outputsbuf); 1585 return (error); 1586 } 1587 1588 static struct sbuf * 1589 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1590 { 1591 struct sbuf *sb; 1592 int64_t amount; 1593 int i; 1594 1595 ASSERT_RACCT_ENABLED(); 1596 1597 sb = sbuf_new_auto(); 1598 for (i = 0; i <= RACCT_MAX; i++) { 1599 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1600 continue; 1601 RACCT_LOCK(); 1602 amount = racct->r_resources[i]; 1603 RACCT_UNLOCK(); 1604 if (RACCT_IS_IN_MILLIONS(i)) 1605 amount /= 1000000; 1606 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1607 } 1608 sbuf_setpos(sb, sbuf_len(sb) - 1); 1609 return (sb); 1610 } 1611 1612 int 1613 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1614 { 1615 struct rctl_rule *filter; 1616 struct sbuf *outputsbuf = NULL; 1617 struct proc *p; 1618 struct uidinfo *uip; 1619 struct loginclass *lc; 1620 struct prison_racct *prr; 1621 char *inputstr; 1622 int error; 1623 1624 if (!racct_enable) 1625 return (ENOSYS); 1626 1627 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1628 if (error != 0) 1629 return (error); 1630 1631 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1632 if (error != 0) 1633 return (error); 1634 1635 sx_slock(&allproc_lock); 1636 error = rctl_string_to_rule(inputstr, &filter); 1637 free(inputstr, M_RCTL); 1638 if (error != 0) { 1639 sx_sunlock(&allproc_lock); 1640 return (error); 1641 } 1642 1643 switch (filter->rr_subject_type) { 1644 case RCTL_SUBJECT_TYPE_PROCESS: 1645 p = filter->rr_subject.rs_proc; 1646 if (p == NULL) { 1647 error = EINVAL; 1648 goto out; 1649 } 1650 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1651 break; 1652 case RCTL_SUBJECT_TYPE_USER: 1653 uip = filter->rr_subject.rs_uip; 1654 if (uip == NULL) { 1655 error = EINVAL; 1656 goto out; 1657 } 1658 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1659 break; 1660 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1661 lc = filter->rr_subject.rs_loginclass; 1662 if (lc == NULL) { 1663 error = EINVAL; 1664 goto out; 1665 } 1666 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1667 break; 1668 case RCTL_SUBJECT_TYPE_JAIL: 1669 prr = filter->rr_subject.rs_prison_racct; 1670 if (prr == NULL) { 1671 error = EINVAL; 1672 goto out; 1673 } 1674 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1675 break; 1676 default: 1677 error = EINVAL; 1678 } 1679 out: 1680 rctl_rule_release(filter); 1681 sx_sunlock(&allproc_lock); 1682 if (error != 0) 1683 return (error); 1684 1685 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1686 1687 return (error); 1688 } 1689 1690 static void 1691 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1692 { 1693 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1694 struct rctl_rule_link *link; 1695 struct sbuf *sb = (struct sbuf *)arg3; 1696 1697 ASSERT_RACCT_ENABLED(); 1698 RACCT_LOCK_ASSERT(); 1699 1700 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1701 if (!rctl_rule_matches(link->rrl_rule, filter)) 1702 continue; 1703 rctl_rule_to_sbuf(sb, link->rrl_rule); 1704 sbuf_printf(sb, ","); 1705 } 1706 } 1707 1708 int 1709 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1710 { 1711 struct sbuf *sb; 1712 struct rctl_rule *filter; 1713 struct rctl_rule_link *link; 1714 struct proc *p; 1715 char *inputstr, *buf; 1716 size_t bufsize; 1717 int error; 1718 1719 if (!racct_enable) 1720 return (ENOSYS); 1721 1722 error = priv_check(td, PRIV_RCTL_GET_RULES); 1723 if (error != 0) 1724 return (error); 1725 1726 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1727 if (error != 0) 1728 return (error); 1729 1730 sx_slock(&allproc_lock); 1731 error = rctl_string_to_rule(inputstr, &filter); 1732 free(inputstr, M_RCTL); 1733 if (error != 0) { 1734 sx_sunlock(&allproc_lock); 1735 return (error); 1736 } 1737 1738 bufsize = uap->outbuflen; 1739 if (bufsize > rctl_maxbufsize) { 1740 sx_sunlock(&allproc_lock); 1741 return (E2BIG); 1742 } 1743 1744 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1745 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1746 KASSERT(sb != NULL, ("sbuf_new failed")); 1747 1748 FOREACH_PROC_IN_SYSTEM(p) { 1749 RACCT_LOCK(); 1750 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1751 /* 1752 * Non-process rules will be added to the buffer later. 1753 * Adding them here would result in duplicated output. 1754 */ 1755 if (link->rrl_rule->rr_subject_type != 1756 RCTL_SUBJECT_TYPE_PROCESS) 1757 continue; 1758 if (!rctl_rule_matches(link->rrl_rule, filter)) 1759 continue; 1760 rctl_rule_to_sbuf(sb, link->rrl_rule); 1761 sbuf_printf(sb, ","); 1762 } 1763 RACCT_UNLOCK(); 1764 } 1765 1766 loginclass_racct_foreach(rctl_get_rules_callback, 1767 rctl_rule_pre_callback, rctl_rule_post_callback, 1768 filter, sb); 1769 ui_racct_foreach(rctl_get_rules_callback, 1770 rctl_rule_pre_callback, rctl_rule_post_callback, 1771 filter, sb); 1772 prison_racct_foreach(rctl_get_rules_callback, 1773 rctl_rule_pre_callback, rctl_rule_post_callback, 1774 filter, sb); 1775 if (sbuf_error(sb) == ENOMEM) { 1776 error = ERANGE; 1777 goto out; 1778 } 1779 1780 /* 1781 * Remove trailing ",". 1782 */ 1783 if (sbuf_len(sb) > 0) 1784 sbuf_setpos(sb, sbuf_len(sb) - 1); 1785 1786 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1787 out: 1788 rctl_rule_release(filter); 1789 sx_sunlock(&allproc_lock); 1790 free(buf, M_RCTL); 1791 return (error); 1792 } 1793 1794 int 1795 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1796 { 1797 struct sbuf *sb; 1798 struct rctl_rule *filter; 1799 struct rctl_rule_link *link; 1800 char *inputstr, *buf; 1801 size_t bufsize; 1802 int error; 1803 1804 if (!racct_enable) 1805 return (ENOSYS); 1806 1807 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1808 if (error != 0) 1809 return (error); 1810 1811 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1812 if (error != 0) 1813 return (error); 1814 1815 sx_slock(&allproc_lock); 1816 error = rctl_string_to_rule(inputstr, &filter); 1817 free(inputstr, M_RCTL); 1818 if (error != 0) { 1819 sx_sunlock(&allproc_lock); 1820 return (error); 1821 } 1822 1823 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1824 rctl_rule_release(filter); 1825 sx_sunlock(&allproc_lock); 1826 return (EINVAL); 1827 } 1828 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1829 rctl_rule_release(filter); 1830 sx_sunlock(&allproc_lock); 1831 return (EOPNOTSUPP); 1832 } 1833 if (filter->rr_subject.rs_proc == NULL) { 1834 rctl_rule_release(filter); 1835 sx_sunlock(&allproc_lock); 1836 return (EINVAL); 1837 } 1838 1839 bufsize = uap->outbuflen; 1840 if (bufsize > rctl_maxbufsize) { 1841 rctl_rule_release(filter); 1842 sx_sunlock(&allproc_lock); 1843 return (E2BIG); 1844 } 1845 1846 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1847 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1848 KASSERT(sb != NULL, ("sbuf_new failed")); 1849 1850 RACCT_LOCK(); 1851 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1852 rrl_next) { 1853 rctl_rule_to_sbuf(sb, link->rrl_rule); 1854 sbuf_printf(sb, ","); 1855 } 1856 RACCT_UNLOCK(); 1857 if (sbuf_error(sb) == ENOMEM) { 1858 error = ERANGE; 1859 sbuf_delete(sb); 1860 goto out; 1861 } 1862 1863 /* 1864 * Remove trailing ",". 1865 */ 1866 if (sbuf_len(sb) > 0) 1867 sbuf_setpos(sb, sbuf_len(sb) - 1); 1868 1869 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1870 out: 1871 rctl_rule_release(filter); 1872 sx_sunlock(&allproc_lock); 1873 free(buf, M_RCTL); 1874 return (error); 1875 } 1876 1877 int 1878 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1879 { 1880 struct rctl_rule *rule; 1881 char *inputstr; 1882 int error; 1883 1884 if (!racct_enable) 1885 return (ENOSYS); 1886 1887 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1888 if (error != 0) 1889 return (error); 1890 1891 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1892 if (error != 0) 1893 return (error); 1894 1895 sx_slock(&allproc_lock); 1896 error = rctl_string_to_rule(inputstr, &rule); 1897 free(inputstr, M_RCTL); 1898 if (error != 0) { 1899 sx_sunlock(&allproc_lock); 1900 return (error); 1901 } 1902 /* 1903 * The 'per' part of a rule is optional. 1904 */ 1905 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1906 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1907 rule->rr_per = rule->rr_subject_type; 1908 1909 if (!rctl_rule_fully_specified(rule)) { 1910 error = EINVAL; 1911 goto out; 1912 } 1913 1914 error = rctl_rule_add(rule); 1915 1916 out: 1917 rctl_rule_release(rule); 1918 sx_sunlock(&allproc_lock); 1919 return (error); 1920 } 1921 1922 int 1923 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1924 { 1925 struct rctl_rule *filter; 1926 char *inputstr; 1927 int error; 1928 1929 if (!racct_enable) 1930 return (ENOSYS); 1931 1932 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1933 if (error != 0) 1934 return (error); 1935 1936 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1937 if (error != 0) 1938 return (error); 1939 1940 sx_slock(&allproc_lock); 1941 error = rctl_string_to_rule(inputstr, &filter); 1942 free(inputstr, M_RCTL); 1943 if (error != 0) { 1944 sx_sunlock(&allproc_lock); 1945 return (error); 1946 } 1947 1948 error = rctl_rule_remove(filter); 1949 rctl_rule_release(filter); 1950 sx_sunlock(&allproc_lock); 1951 1952 return (error); 1953 } 1954 1955 /* 1956 * Update RCTL rule list after credential change. 1957 */ 1958 void 1959 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1960 { 1961 LIST_HEAD(, rctl_rule_link) newrules; 1962 struct rctl_rule_link *link, *newlink; 1963 struct uidinfo *newuip; 1964 struct loginclass *newlc; 1965 struct prison_racct *newprr; 1966 int rulecnt, i; 1967 1968 if (!racct_enable) 1969 return; 1970 1971 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1972 1973 newuip = newcred->cr_ruidinfo; 1974 newlc = newcred->cr_loginclass; 1975 newprr = newcred->cr_prison->pr_prison_racct; 1976 1977 LIST_INIT(&newrules); 1978 1979 again: 1980 /* 1981 * First, count the rules that apply to the process with new 1982 * credentials. 1983 */ 1984 rulecnt = 0; 1985 RACCT_LOCK(); 1986 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1987 if (link->rrl_rule->rr_subject_type == 1988 RCTL_SUBJECT_TYPE_PROCESS) 1989 rulecnt++; 1990 } 1991 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 1992 rulecnt++; 1993 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 1994 rulecnt++; 1995 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 1996 rulecnt++; 1997 RACCT_UNLOCK(); 1998 1999 /* 2000 * Create temporary list. We've dropped the rctl_lock in order 2001 * to use M_WAITOK. 2002 */ 2003 for (i = 0; i < rulecnt; i++) { 2004 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 2005 newlink->rrl_rule = NULL; 2006 newlink->rrl_exceeded = 0; 2007 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 2008 } 2009 2010 newlink = LIST_FIRST(&newrules); 2011 2012 /* 2013 * Assign rules to the newly allocated list entries. 2014 */ 2015 RACCT_LOCK(); 2016 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2017 if (link->rrl_rule->rr_subject_type == 2018 RCTL_SUBJECT_TYPE_PROCESS) { 2019 if (newlink == NULL) 2020 goto goaround; 2021 rctl_rule_acquire(link->rrl_rule); 2022 newlink->rrl_rule = link->rrl_rule; 2023 newlink->rrl_exceeded = link->rrl_exceeded; 2024 newlink = LIST_NEXT(newlink, rrl_next); 2025 rulecnt--; 2026 } 2027 } 2028 2029 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2030 if (newlink == NULL) 2031 goto goaround; 2032 rctl_rule_acquire(link->rrl_rule); 2033 newlink->rrl_rule = link->rrl_rule; 2034 newlink->rrl_exceeded = link->rrl_exceeded; 2035 newlink = LIST_NEXT(newlink, rrl_next); 2036 rulecnt--; 2037 } 2038 2039 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2040 if (newlink == NULL) 2041 goto goaround; 2042 rctl_rule_acquire(link->rrl_rule); 2043 newlink->rrl_rule = link->rrl_rule; 2044 newlink->rrl_exceeded = link->rrl_exceeded; 2045 newlink = LIST_NEXT(newlink, rrl_next); 2046 rulecnt--; 2047 } 2048 2049 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2050 if (newlink == NULL) 2051 goto goaround; 2052 rctl_rule_acquire(link->rrl_rule); 2053 newlink->rrl_rule = link->rrl_rule; 2054 newlink->rrl_exceeded = link->rrl_exceeded; 2055 newlink = LIST_NEXT(newlink, rrl_next); 2056 rulecnt--; 2057 } 2058 2059 if (rulecnt == 0) { 2060 /* 2061 * Free the old rule list. 2062 */ 2063 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2064 link = LIST_FIRST(&p->p_racct->r_rule_links); 2065 LIST_REMOVE(link, rrl_next); 2066 rctl_rule_release(link->rrl_rule); 2067 uma_zfree(rctl_rule_link_zone, link); 2068 } 2069 2070 /* 2071 * Replace lists and we're done. 2072 * 2073 * XXX: Is there any way to switch list heads instead 2074 * of iterating here? 2075 */ 2076 while (!LIST_EMPTY(&newrules)) { 2077 newlink = LIST_FIRST(&newrules); 2078 LIST_REMOVE(newlink, rrl_next); 2079 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2080 newlink, rrl_next); 2081 } 2082 2083 RACCT_UNLOCK(); 2084 2085 return; 2086 } 2087 2088 goaround: 2089 RACCT_UNLOCK(); 2090 2091 /* 2092 * Rule list changed while we were not holding the rctl_lock. 2093 * Free the new list and try again. 2094 */ 2095 while (!LIST_EMPTY(&newrules)) { 2096 newlink = LIST_FIRST(&newrules); 2097 LIST_REMOVE(newlink, rrl_next); 2098 if (newlink->rrl_rule != NULL) 2099 rctl_rule_release(newlink->rrl_rule); 2100 uma_zfree(rctl_rule_link_zone, newlink); 2101 } 2102 2103 goto again; 2104 } 2105 2106 /* 2107 * Assign RCTL rules to the newly created process. 2108 */ 2109 int 2110 rctl_proc_fork(struct proc *parent, struct proc *child) 2111 { 2112 struct rctl_rule *rule; 2113 struct rctl_rule_link *link; 2114 int error; 2115 2116 ASSERT_RACCT_ENABLED(); 2117 RACCT_LOCK_ASSERT(); 2118 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2119 2120 LIST_INIT(&child->p_racct->r_rule_links); 2121 2122 /* 2123 * Go through limits applicable to the parent and assign them 2124 * to the child. Rules with 'process' subject have to be duplicated 2125 * in order to make their rr_subject point to the new process. 2126 */ 2127 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2128 if (link->rrl_rule->rr_subject_type == 2129 RCTL_SUBJECT_TYPE_PROCESS) { 2130 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2131 if (rule == NULL) 2132 goto fail; 2133 KASSERT(rule->rr_subject.rs_proc == parent, 2134 ("rule->rr_subject.rs_proc != parent")); 2135 rule->rr_subject.rs_proc = child; 2136 error = rctl_racct_add_rule_locked(child->p_racct, 2137 rule); 2138 rctl_rule_release(rule); 2139 if (error != 0) 2140 goto fail; 2141 } else { 2142 error = rctl_racct_add_rule_locked(child->p_racct, 2143 link->rrl_rule); 2144 if (error != 0) 2145 goto fail; 2146 } 2147 } 2148 2149 return (0); 2150 2151 fail: 2152 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2153 link = LIST_FIRST(&child->p_racct->r_rule_links); 2154 LIST_REMOVE(link, rrl_next); 2155 rctl_rule_release(link->rrl_rule); 2156 uma_zfree(rctl_rule_link_zone, link); 2157 } 2158 2159 return (EAGAIN); 2160 } 2161 2162 /* 2163 * Release rules attached to the racct. 2164 */ 2165 void 2166 rctl_racct_release(struct racct *racct) 2167 { 2168 struct rctl_rule_link *link; 2169 2170 ASSERT_RACCT_ENABLED(); 2171 RACCT_LOCK_ASSERT(); 2172 2173 while (!LIST_EMPTY(&racct->r_rule_links)) { 2174 link = LIST_FIRST(&racct->r_rule_links); 2175 LIST_REMOVE(link, rrl_next); 2176 rctl_rule_release(link->rrl_rule); 2177 uma_zfree(rctl_rule_link_zone, link); 2178 } 2179 } 2180 2181 static void 2182 rctl_init(void) 2183 { 2184 2185 if (!racct_enable) 2186 return; 2187 2188 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2189 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2190 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2191 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2192 UMA_ALIGN_PTR, 0); 2193 2194 /* 2195 * Set default values, making sure not to overwrite the ones 2196 * fetched from tunables. Most of those could be set at the 2197 * declaration, except for the rctl_throttle_max - we cannot 2198 * set it there due to hz not being compile time constant. 2199 */ 2200 if (rctl_throttle_min < 1) 2201 rctl_throttle_min = 1; 2202 if (rctl_throttle_max < rctl_throttle_min) 2203 rctl_throttle_max = 2 * hz; 2204 if (rctl_throttle_pct < 0) 2205 rctl_throttle_pct = 100; 2206 if (rctl_throttle_pct2 < 0) 2207 rctl_throttle_pct2 = 100; 2208 } 2209 2210 #else /* !RCTL */ 2211 2212 int 2213 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2214 { 2215 2216 return (ENOSYS); 2217 } 2218 2219 int 2220 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2221 { 2222 2223 return (ENOSYS); 2224 } 2225 2226 int 2227 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2228 { 2229 2230 return (ENOSYS); 2231 } 2232 2233 int 2234 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2235 { 2236 2237 return (ENOSYS); 2238 } 2239 2240 int 2241 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2242 { 2243 2244 return (ENOSYS); 2245 } 2246 2247 #endif /* !RCTL */ 2248