1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #ifdef RCTL 32 33 #include <sys/param.h> 34 #include <sys/devctl.h> 35 #include <sys/malloc.h> 36 #include <sys/queue.h> 37 #include <sys/refcount.h> 38 #include <sys/jail.h> 39 #include <sys/kernel.h> 40 #include <sys/limits.h> 41 #include <sys/loginclass.h> 42 #include <sys/malloc.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/racct.h> 46 #include <sys/rctl.h> 47 #include <sys/resourcevar.h> 48 #include <sys/sx.h> 49 #include <sys/sysproto.h> 50 #include <sys/systm.h> 51 #include <sys/types.h> 52 #include <sys/eventhandler.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/rwlock.h> 56 #include <sys/sbuf.h> 57 #include <sys/taskqueue.h> 58 #include <sys/tree.h> 59 #include <vm/uma.h> 60 61 #ifndef RACCT 62 #error "The RCTL option requires the RACCT option" 63 #endif 64 65 FEATURE(rctl, "Resource Limits"); 66 67 #define HRF_DEFAULT 0 68 #define HRF_DONT_INHERIT 1 69 #define HRF_DONT_ACCUMULATE 2 70 71 #define RCTL_MAX_INBUFSIZE 4 * 1024 72 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 73 #define RCTL_LOG_BUFSIZE 128 74 75 #define RCTL_PCPU_SHIFT (10 * 1000000) 76 77 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 78 static int rctl_log_rate_limit = 10; 79 static int rctl_devctl_rate_limit = 10; 80 81 /* 82 * Values below are initialized in rctl_init(). 83 */ 84 static int rctl_throttle_min = -1; 85 static int rctl_throttle_max = -1; 86 static int rctl_throttle_pct = -1; 87 static int rctl_throttle_pct2 = -1; 88 89 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 90 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 91 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 92 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 93 94 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 95 "Resource Limits"); 96 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 97 &rctl_maxbufsize, 0, "Maximum output buffer size"); 98 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 99 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 101 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 102 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 103 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 104 &rctl_throttle_min_sysctl, "IU", 105 "Shortest throttling duration, in hz"); 106 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 107 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 108 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 109 &rctl_throttle_max_sysctl, "IU", 110 "Longest throttling duration, in hz"); 111 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 112 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 113 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 114 &rctl_throttle_pct_sysctl, "IU", 115 "Throttling penalty for process consumption, in percent"); 116 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 117 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 118 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 119 &rctl_throttle_pct2_sysctl, "IU", 120 "Throttling penalty for container consumption, in percent"); 121 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 122 123 /* 124 * 'rctl_rule_link' connects a rule with every racct it's related to. 125 * For example, rule 'user:X:openfiles:deny=N/process' is linked 126 * with uidinfo for user X, and to each process of that user. 127 */ 128 struct rctl_rule_link { 129 LIST_ENTRY(rctl_rule_link) rrl_next; 130 struct rctl_rule *rrl_rule; 131 int rrl_exceeded; 132 }; 133 134 struct dict { 135 const char *d_name; 136 int d_value; 137 }; 138 139 static struct dict subjectnames[] = { 140 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 141 { "user", RCTL_SUBJECT_TYPE_USER }, 142 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 143 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 144 { NULL, -1 }}; 145 146 static struct dict resourcenames[] = { 147 { "cputime", RACCT_CPU }, 148 { "datasize", RACCT_DATA }, 149 { "stacksize", RACCT_STACK }, 150 { "coredumpsize", RACCT_CORE }, 151 { "memoryuse", RACCT_RSS }, 152 { "memorylocked", RACCT_MEMLOCK }, 153 { "maxproc", RACCT_NPROC }, 154 { "openfiles", RACCT_NOFILE }, 155 { "vmemoryuse", RACCT_VMEM }, 156 { "pseudoterminals", RACCT_NPTS }, 157 { "swapuse", RACCT_SWAP }, 158 { "nthr", RACCT_NTHR }, 159 { "msgqqueued", RACCT_MSGQQUEUED }, 160 { "msgqsize", RACCT_MSGQSIZE }, 161 { "nmsgq", RACCT_NMSGQ }, 162 { "nsem", RACCT_NSEM }, 163 { "nsemop", RACCT_NSEMOP }, 164 { "nshm", RACCT_NSHM }, 165 { "shmsize", RACCT_SHMSIZE }, 166 { "wallclock", RACCT_WALLCLOCK }, 167 { "pcpu", RACCT_PCTCPU }, 168 { "readbps", RACCT_READBPS }, 169 { "writebps", RACCT_WRITEBPS }, 170 { "readiops", RACCT_READIOPS }, 171 { "writeiops", RACCT_WRITEIOPS }, 172 { NULL, -1 }}; 173 174 static struct dict actionnames[] = { 175 { "sighup", RCTL_ACTION_SIGHUP }, 176 { "sigint", RCTL_ACTION_SIGINT }, 177 { "sigquit", RCTL_ACTION_SIGQUIT }, 178 { "sigill", RCTL_ACTION_SIGILL }, 179 { "sigtrap", RCTL_ACTION_SIGTRAP }, 180 { "sigabrt", RCTL_ACTION_SIGABRT }, 181 { "sigemt", RCTL_ACTION_SIGEMT }, 182 { "sigfpe", RCTL_ACTION_SIGFPE }, 183 { "sigkill", RCTL_ACTION_SIGKILL }, 184 { "sigbus", RCTL_ACTION_SIGBUS }, 185 { "sigsegv", RCTL_ACTION_SIGSEGV }, 186 { "sigsys", RCTL_ACTION_SIGSYS }, 187 { "sigpipe", RCTL_ACTION_SIGPIPE }, 188 { "sigalrm", RCTL_ACTION_SIGALRM }, 189 { "sigterm", RCTL_ACTION_SIGTERM }, 190 { "sigurg", RCTL_ACTION_SIGURG }, 191 { "sigstop", RCTL_ACTION_SIGSTOP }, 192 { "sigtstp", RCTL_ACTION_SIGTSTP }, 193 { "sigchld", RCTL_ACTION_SIGCHLD }, 194 { "sigttin", RCTL_ACTION_SIGTTIN }, 195 { "sigttou", RCTL_ACTION_SIGTTOU }, 196 { "sigio", RCTL_ACTION_SIGIO }, 197 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 198 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 199 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 200 { "sigprof", RCTL_ACTION_SIGPROF }, 201 { "sigwinch", RCTL_ACTION_SIGWINCH }, 202 { "siginfo", RCTL_ACTION_SIGINFO }, 203 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 204 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 205 { "sigthr", RCTL_ACTION_SIGTHR }, 206 { "deny", RCTL_ACTION_DENY }, 207 { "log", RCTL_ACTION_LOG }, 208 { "devctl", RCTL_ACTION_DEVCTL }, 209 { "throttle", RCTL_ACTION_THROTTLE }, 210 { NULL, -1 }}; 211 212 static void rctl_init(void); 213 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 214 215 static uma_zone_t rctl_rule_zone; 216 static uma_zone_t rctl_rule_link_zone; 217 218 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 219 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 220 221 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 222 223 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 224 { 225 int error, val = rctl_throttle_min; 226 227 error = sysctl_handle_int(oidp, &val, 0, req); 228 if (error || !req->newptr) 229 return (error); 230 if (val < 1 || val > rctl_throttle_max) 231 return (EINVAL); 232 233 RACCT_LOCK(); 234 rctl_throttle_min = val; 235 RACCT_UNLOCK(); 236 237 return (0); 238 } 239 240 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 241 { 242 int error, val = rctl_throttle_max; 243 244 error = sysctl_handle_int(oidp, &val, 0, req); 245 if (error || !req->newptr) 246 return (error); 247 if (val < rctl_throttle_min) 248 return (EINVAL); 249 250 RACCT_LOCK(); 251 rctl_throttle_max = val; 252 RACCT_UNLOCK(); 253 254 return (0); 255 } 256 257 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 258 { 259 int error, val = rctl_throttle_pct; 260 261 error = sysctl_handle_int(oidp, &val, 0, req); 262 if (error || !req->newptr) 263 return (error); 264 if (val < 0) 265 return (EINVAL); 266 267 RACCT_LOCK(); 268 rctl_throttle_pct = val; 269 RACCT_UNLOCK(); 270 271 return (0); 272 } 273 274 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 275 { 276 int error, val = rctl_throttle_pct2; 277 278 error = sysctl_handle_int(oidp, &val, 0, req); 279 if (error || !req->newptr) 280 return (error); 281 if (val < 0) 282 return (EINVAL); 283 284 RACCT_LOCK(); 285 rctl_throttle_pct2 = val; 286 RACCT_UNLOCK(); 287 288 return (0); 289 } 290 291 static const char * 292 rctl_subject_type_name(int subject) 293 { 294 int i; 295 296 for (i = 0; subjectnames[i].d_name != NULL; i++) { 297 if (subjectnames[i].d_value == subject) 298 return (subjectnames[i].d_name); 299 } 300 301 panic("rctl_subject_type_name: unknown subject type %d", subject); 302 } 303 304 static const char * 305 rctl_action_name(int action) 306 { 307 int i; 308 309 for (i = 0; actionnames[i].d_name != NULL; i++) { 310 if (actionnames[i].d_value == action) 311 return (actionnames[i].d_name); 312 } 313 314 panic("rctl_action_name: unknown action %d", action); 315 } 316 317 const char * 318 rctl_resource_name(int resource) 319 { 320 int i; 321 322 for (i = 0; resourcenames[i].d_name != NULL; i++) { 323 if (resourcenames[i].d_value == resource) 324 return (resourcenames[i].d_name); 325 } 326 327 panic("rctl_resource_name: unknown resource %d", resource); 328 } 329 330 static struct racct * 331 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 332 { 333 struct ucred *cred = p->p_ucred; 334 335 ASSERT_RACCT_ENABLED(); 336 RACCT_LOCK_ASSERT(); 337 338 switch (rule->rr_per) { 339 case RCTL_SUBJECT_TYPE_PROCESS: 340 return (p->p_racct); 341 case RCTL_SUBJECT_TYPE_USER: 342 return (cred->cr_ruidinfo->ui_racct); 343 case RCTL_SUBJECT_TYPE_LOGINCLASS: 344 return (cred->cr_loginclass->lc_racct); 345 case RCTL_SUBJECT_TYPE_JAIL: 346 return (cred->cr_prison->pr_prison_racct->prr_racct); 347 default: 348 panic("%s: unknown per %d", __func__, rule->rr_per); 349 } 350 } 351 352 /* 353 * Return the amount of resource that can be allocated by 'p' before 354 * hitting 'rule'. 355 */ 356 static int64_t 357 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 358 { 359 const struct racct *racct; 360 int64_t available; 361 362 ASSERT_RACCT_ENABLED(); 363 RACCT_LOCK_ASSERT(); 364 365 racct = rctl_proc_rule_to_racct(p, rule); 366 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 367 368 return (available); 369 } 370 371 /* 372 * Called every second for proc, uidinfo, loginclass, and jail containers. 373 * If the limit isn't exceeded, it decreases the usage amount to zero. 374 * Otherwise, it decreases it by the value of the limit. This way 375 * resource consumption exceeding the limit "carries over" to the next 376 * period. 377 */ 378 void 379 rctl_throttle_decay(struct racct *racct, int resource) 380 { 381 struct rctl_rule *rule; 382 struct rctl_rule_link *link; 383 int64_t minavailable; 384 385 ASSERT_RACCT_ENABLED(); 386 RACCT_LOCK_ASSERT(); 387 388 minavailable = INT64_MAX; 389 390 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 391 rule = link->rrl_rule; 392 393 if (rule->rr_resource != resource) 394 continue; 395 if (rule->rr_action != RCTL_ACTION_THROTTLE) 396 continue; 397 398 if (rule->rr_amount < minavailable) 399 minavailable = rule->rr_amount; 400 } 401 402 if (racct->r_resources[resource] < minavailable) { 403 racct->r_resources[resource] = 0; 404 } else { 405 /* 406 * Cap utilization counter at ten times the limit. Otherwise, 407 * if we changed the rule lowering the allowed amount, it could 408 * take unreasonably long time for the accumulated resource 409 * usage to drop. 410 */ 411 if (racct->r_resources[resource] > minavailable * 10) 412 racct->r_resources[resource] = minavailable * 10; 413 414 racct->r_resources[resource] -= minavailable; 415 } 416 } 417 418 /* 419 * Special version of rctl_get_available() for the %CPU resource. 420 * We slightly cheat here and return less than we normally would. 421 */ 422 int64_t 423 rctl_pcpu_available(const struct proc *p) { 424 struct rctl_rule *rule; 425 struct rctl_rule_link *link; 426 int64_t available, minavailable, limit; 427 428 ASSERT_RACCT_ENABLED(); 429 RACCT_LOCK_ASSERT(); 430 431 minavailable = INT64_MAX; 432 limit = 0; 433 434 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 435 rule = link->rrl_rule; 436 if (rule->rr_resource != RACCT_PCTCPU) 437 continue; 438 if (rule->rr_action != RCTL_ACTION_DENY) 439 continue; 440 available = rctl_available_resource(p, rule); 441 if (available < minavailable) { 442 minavailable = available; 443 limit = rule->rr_amount; 444 } 445 } 446 447 /* 448 * Return slightly less than actual value of the available 449 * %cpu resource. This makes %cpu throttling more aggressive 450 * and lets us act sooner than the limits are already exceeded. 451 */ 452 if (limit != 0) { 453 if (limit > 2 * RCTL_PCPU_SHIFT) 454 minavailable -= RCTL_PCPU_SHIFT; 455 else 456 minavailable -= (limit / 2); 457 } 458 459 return (minavailable); 460 } 461 462 static uint64_t 463 xadd(uint64_t a, uint64_t b) 464 { 465 uint64_t c; 466 467 c = a + b; 468 469 /* 470 * Detect overflow. 471 */ 472 if (c < a || c < b) 473 return (UINT64_MAX); 474 475 return (c); 476 } 477 478 static uint64_t 479 xmul(uint64_t a, uint64_t b) 480 { 481 482 if (b != 0 && a > UINT64_MAX / b) 483 return (UINT64_MAX); 484 485 return (a * b); 486 } 487 488 /* 489 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 490 * to what it keeps allocated now. Returns non-zero if the allocation should 491 * be denied, 0 otherwise. 492 */ 493 int 494 rctl_enforce(struct proc *p, int resource, uint64_t amount) 495 { 496 static struct timeval log_lasttime, devctl_lasttime; 497 static int log_curtime = 0, devctl_curtime = 0; 498 struct rctl_rule *rule; 499 struct rctl_rule_link *link; 500 struct sbuf sb; 501 char *buf; 502 int64_t available; 503 uint64_t sleep_ms, sleep_ratio; 504 int should_deny = 0; 505 506 ASSERT_RACCT_ENABLED(); 507 RACCT_LOCK_ASSERT(); 508 509 /* 510 * There may be more than one matching rule; go through all of them. 511 * Denial should be done last, after logging and sending signals. 512 */ 513 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 514 rule = link->rrl_rule; 515 if (rule->rr_resource != resource) 516 continue; 517 518 available = rctl_available_resource(p, rule); 519 if (available >= (int64_t)amount) { 520 link->rrl_exceeded = 0; 521 continue; 522 } 523 524 switch (rule->rr_action) { 525 case RCTL_ACTION_DENY: 526 should_deny = 1; 527 continue; 528 case RCTL_ACTION_LOG: 529 /* 530 * If rrl_exceeded != 0, it means we've already 531 * logged a warning for this process. 532 */ 533 if (link->rrl_exceeded != 0) 534 continue; 535 536 /* 537 * If the process state is not fully initialized yet, 538 * we can't access most of the required fields, e.g. 539 * p->p_comm. This happens when called from fork1(). 540 * Ignore this rule for now; it will be processed just 541 * after fork, when called from racct_proc_fork_done(). 542 */ 543 if (p->p_state != PRS_NORMAL) 544 continue; 545 546 if (!ppsratecheck(&log_lasttime, &log_curtime, 547 rctl_log_rate_limit)) 548 continue; 549 550 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 551 if (buf == NULL) { 552 printf("rctl_enforce: out of memory\n"); 553 continue; 554 } 555 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 556 rctl_rule_to_sbuf(&sb, rule); 557 sbuf_finish(&sb); 558 printf("rctl: rule \"%s\" matched by pid %d " 559 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 560 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 561 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 562 sbuf_delete(&sb); 563 free(buf, M_RCTL); 564 link->rrl_exceeded = 1; 565 continue; 566 case RCTL_ACTION_DEVCTL: 567 if (link->rrl_exceeded != 0) 568 continue; 569 570 if (p->p_state != PRS_NORMAL) 571 continue; 572 573 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 574 rctl_devctl_rate_limit)) 575 continue; 576 577 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 578 if (buf == NULL) { 579 printf("rctl_enforce: out of memory\n"); 580 continue; 581 } 582 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 583 sbuf_cat(&sb, "rule="); 584 rctl_rule_to_sbuf(&sb, rule); 585 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 586 p->p_pid, p->p_ucred->cr_ruid, 587 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 588 sbuf_finish(&sb); 589 devctl_notify("RCTL", "rule", "matched", 590 sbuf_data(&sb)); 591 sbuf_delete(&sb); 592 free(buf, M_RCTL); 593 link->rrl_exceeded = 1; 594 continue; 595 case RCTL_ACTION_THROTTLE: 596 if (p->p_state != PRS_NORMAL) 597 continue; 598 599 if (rule->rr_amount == 0) { 600 racct_proc_throttle(p, rctl_throttle_max); 601 continue; 602 } 603 604 /* 605 * Make the process sleep for a fraction of second 606 * proportional to the ratio of process' resource 607 * utilization compared to the limit. The point is 608 * to penalize resource hogs: processes that consume 609 * more of the available resources sleep for longer. 610 * 611 * We're trying to defer division until the very end, 612 * to minimize the rounding effects. The following 613 * calculation could have been written in a clearer 614 * way like this: 615 * 616 * sleep_ms = hz * p->p_racct->r_resources[resource] / 617 * rule->rr_amount; 618 * sleep_ms *= rctl_throttle_pct / 100; 619 * if (sleep_ms < rctl_throttle_min) 620 * sleep_ms = rctl_throttle_min; 621 * 622 */ 623 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 624 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 625 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 626 sleep_ms = rctl_throttle_min * rule->rr_amount; 627 628 /* 629 * Multiply that by the ratio of the resource 630 * consumption for the container compared to the limit, 631 * squared. In other words, a process in a container 632 * that is two times over the limit will be throttled 633 * four times as much for hitting the same rule. The 634 * point is to penalize processes more if the container 635 * itself (eg certain UID or jail) is above the limit. 636 */ 637 if (available < 0) 638 sleep_ratio = -available / rule->rr_amount; 639 else 640 sleep_ratio = 0; 641 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 642 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 643 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 644 645 /* 646 * Finally the division. 647 */ 648 sleep_ms /= rule->rr_amount; 649 650 if (sleep_ms > rctl_throttle_max) 651 sleep_ms = rctl_throttle_max; 652 #if 0 653 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n", 654 __func__, p->p_pid, p->p_comm, 655 p->p_racct->r_resources[resource], 656 rule->rr_amount, (uintmax_t)sleep_ms, 657 (uintmax_t)sleep_ratio, (intmax_t)available); 658 #endif 659 660 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 661 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 662 racct_proc_throttle(p, sleep_ms); 663 continue; 664 default: 665 if (link->rrl_exceeded != 0) 666 continue; 667 668 if (p->p_state != PRS_NORMAL) 669 continue; 670 671 KASSERT(rule->rr_action > 0 && 672 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 673 ("rctl_enforce: unknown action %d", 674 rule->rr_action)); 675 676 /* 677 * We're using the fact that RCTL_ACTION_SIG* values 678 * are equal to their counterparts from sys/signal.h. 679 */ 680 kern_psignal(p, rule->rr_action); 681 link->rrl_exceeded = 1; 682 continue; 683 } 684 } 685 686 if (should_deny) { 687 /* 688 * Return fake error code; the caller should change it 689 * into one proper for the situation - EFSIZ, ENOMEM etc. 690 */ 691 return (EDOOFUS); 692 } 693 694 return (0); 695 } 696 697 uint64_t 698 rctl_get_limit(struct proc *p, int resource) 699 { 700 struct rctl_rule *rule; 701 struct rctl_rule_link *link; 702 uint64_t amount = UINT64_MAX; 703 704 ASSERT_RACCT_ENABLED(); 705 RACCT_LOCK_ASSERT(); 706 707 /* 708 * There may be more than one matching rule; go through all of them. 709 * Denial should be done last, after logging and sending signals. 710 */ 711 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 712 rule = link->rrl_rule; 713 if (rule->rr_resource != resource) 714 continue; 715 if (rule->rr_action != RCTL_ACTION_DENY) 716 continue; 717 if (rule->rr_amount < amount) 718 amount = rule->rr_amount; 719 } 720 721 return (amount); 722 } 723 724 uint64_t 725 rctl_get_available(struct proc *p, int resource) 726 { 727 struct rctl_rule *rule; 728 struct rctl_rule_link *link; 729 int64_t available, minavailable, allocated; 730 731 minavailable = INT64_MAX; 732 733 ASSERT_RACCT_ENABLED(); 734 RACCT_LOCK_ASSERT(); 735 736 /* 737 * There may be more than one matching rule; go through all of them. 738 * Denial should be done last, after logging and sending signals. 739 */ 740 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 741 rule = link->rrl_rule; 742 if (rule->rr_resource != resource) 743 continue; 744 if (rule->rr_action != RCTL_ACTION_DENY) 745 continue; 746 available = rctl_available_resource(p, rule); 747 if (available < minavailable) 748 minavailable = available; 749 } 750 751 /* 752 * XXX: Think about this _hard_. 753 */ 754 allocated = p->p_racct->r_resources[resource]; 755 if (minavailable < INT64_MAX - allocated) 756 minavailable += allocated; 757 if (minavailable < 0) 758 minavailable = 0; 759 760 return (minavailable); 761 } 762 763 static int 764 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 765 { 766 767 ASSERT_RACCT_ENABLED(); 768 769 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 770 if (rule->rr_subject_type != filter->rr_subject_type) 771 return (0); 772 773 switch (filter->rr_subject_type) { 774 case RCTL_SUBJECT_TYPE_PROCESS: 775 if (filter->rr_subject.rs_proc != NULL && 776 rule->rr_subject.rs_proc != 777 filter->rr_subject.rs_proc) 778 return (0); 779 break; 780 case RCTL_SUBJECT_TYPE_USER: 781 if (filter->rr_subject.rs_uip != NULL && 782 rule->rr_subject.rs_uip != 783 filter->rr_subject.rs_uip) 784 return (0); 785 break; 786 case RCTL_SUBJECT_TYPE_LOGINCLASS: 787 if (filter->rr_subject.rs_loginclass != NULL && 788 rule->rr_subject.rs_loginclass != 789 filter->rr_subject.rs_loginclass) 790 return (0); 791 break; 792 case RCTL_SUBJECT_TYPE_JAIL: 793 if (filter->rr_subject.rs_prison_racct != NULL && 794 rule->rr_subject.rs_prison_racct != 795 filter->rr_subject.rs_prison_racct) 796 return (0); 797 break; 798 default: 799 panic("rctl_rule_matches: unknown subject type %d", 800 filter->rr_subject_type); 801 } 802 } 803 804 if (filter->rr_resource != RACCT_UNDEFINED) { 805 if (rule->rr_resource != filter->rr_resource) 806 return (0); 807 } 808 809 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 810 if (rule->rr_action != filter->rr_action) 811 return (0); 812 } 813 814 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 815 if (rule->rr_amount != filter->rr_amount) 816 return (0); 817 } 818 819 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 820 if (rule->rr_per != filter->rr_per) 821 return (0); 822 } 823 824 return (1); 825 } 826 827 static int 828 str2value(const char *str, int *value, struct dict *table) 829 { 830 int i; 831 832 if (value == NULL) 833 return (EINVAL); 834 835 for (i = 0; table[i].d_name != NULL; i++) { 836 if (strcasecmp(table[i].d_name, str) == 0) { 837 *value = table[i].d_value; 838 return (0); 839 } 840 } 841 842 return (EINVAL); 843 } 844 845 static int 846 str2id(const char *str, id_t *value) 847 { 848 char *end; 849 850 if (str == NULL) 851 return (EINVAL); 852 853 *value = strtoul(str, &end, 10); 854 if ((size_t)(end - str) != strlen(str)) 855 return (EINVAL); 856 857 return (0); 858 } 859 860 static int 861 str2int64(const char *str, int64_t *value) 862 { 863 char *end; 864 865 if (str == NULL) 866 return (EINVAL); 867 868 *value = strtoul(str, &end, 10); 869 if ((size_t)(end - str) != strlen(str)) 870 return (EINVAL); 871 872 if (*value < 0) 873 return (ERANGE); 874 875 return (0); 876 } 877 878 /* 879 * Connect the rule to the racct, increasing refcount for the rule. 880 */ 881 static void 882 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 883 { 884 struct rctl_rule_link *link; 885 886 ASSERT_RACCT_ENABLED(); 887 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 888 889 rctl_rule_acquire(rule); 890 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 891 link->rrl_rule = rule; 892 link->rrl_exceeded = 0; 893 894 RACCT_LOCK(); 895 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 896 RACCT_UNLOCK(); 897 } 898 899 static int 900 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 901 { 902 struct rctl_rule_link *link; 903 904 ASSERT_RACCT_ENABLED(); 905 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 906 RACCT_LOCK_ASSERT(); 907 908 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 909 if (link == NULL) 910 return (ENOMEM); 911 rctl_rule_acquire(rule); 912 link->rrl_rule = rule; 913 link->rrl_exceeded = 0; 914 915 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 916 917 return (0); 918 } 919 920 /* 921 * Remove limits for a rules matching the filter and release 922 * the refcounts for the rules, possibly freeing them. Returns 923 * the number of limit structures removed. 924 */ 925 static int 926 rctl_racct_remove_rules(struct racct *racct, 927 const struct rctl_rule *filter) 928 { 929 struct rctl_rule_link *link, *linktmp; 930 int removed = 0; 931 932 ASSERT_RACCT_ENABLED(); 933 RACCT_LOCK_ASSERT(); 934 935 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 936 if (!rctl_rule_matches(link->rrl_rule, filter)) 937 continue; 938 939 LIST_REMOVE(link, rrl_next); 940 rctl_rule_release(link->rrl_rule); 941 uma_zfree(rctl_rule_link_zone, link); 942 removed++; 943 } 944 return (removed); 945 } 946 947 static void 948 rctl_rule_acquire_subject(struct rctl_rule *rule) 949 { 950 951 ASSERT_RACCT_ENABLED(); 952 953 switch (rule->rr_subject_type) { 954 case RCTL_SUBJECT_TYPE_UNDEFINED: 955 case RCTL_SUBJECT_TYPE_PROCESS: 956 break; 957 case RCTL_SUBJECT_TYPE_JAIL: 958 if (rule->rr_subject.rs_prison_racct != NULL) 959 prison_racct_hold(rule->rr_subject.rs_prison_racct); 960 break; 961 case RCTL_SUBJECT_TYPE_USER: 962 if (rule->rr_subject.rs_uip != NULL) 963 uihold(rule->rr_subject.rs_uip); 964 break; 965 case RCTL_SUBJECT_TYPE_LOGINCLASS: 966 if (rule->rr_subject.rs_loginclass != NULL) 967 loginclass_hold(rule->rr_subject.rs_loginclass); 968 break; 969 default: 970 panic("rctl_rule_acquire_subject: unknown subject type %d", 971 rule->rr_subject_type); 972 } 973 } 974 975 static void 976 rctl_rule_release_subject(struct rctl_rule *rule) 977 { 978 979 ASSERT_RACCT_ENABLED(); 980 981 switch (rule->rr_subject_type) { 982 case RCTL_SUBJECT_TYPE_UNDEFINED: 983 case RCTL_SUBJECT_TYPE_PROCESS: 984 break; 985 case RCTL_SUBJECT_TYPE_JAIL: 986 if (rule->rr_subject.rs_prison_racct != NULL) 987 prison_racct_free(rule->rr_subject.rs_prison_racct); 988 break; 989 case RCTL_SUBJECT_TYPE_USER: 990 if (rule->rr_subject.rs_uip != NULL) 991 uifree(rule->rr_subject.rs_uip); 992 break; 993 case RCTL_SUBJECT_TYPE_LOGINCLASS: 994 if (rule->rr_subject.rs_loginclass != NULL) 995 loginclass_free(rule->rr_subject.rs_loginclass); 996 break; 997 default: 998 panic("rctl_rule_release_subject: unknown subject type %d", 999 rule->rr_subject_type); 1000 } 1001 } 1002 1003 struct rctl_rule * 1004 rctl_rule_alloc(int flags) 1005 { 1006 struct rctl_rule *rule; 1007 1008 ASSERT_RACCT_ENABLED(); 1009 1010 rule = uma_zalloc(rctl_rule_zone, flags); 1011 if (rule == NULL) 1012 return (NULL); 1013 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1014 rule->rr_subject.rs_proc = NULL; 1015 rule->rr_subject.rs_uip = NULL; 1016 rule->rr_subject.rs_loginclass = NULL; 1017 rule->rr_subject.rs_prison_racct = NULL; 1018 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1019 rule->rr_resource = RACCT_UNDEFINED; 1020 rule->rr_action = RCTL_ACTION_UNDEFINED; 1021 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1022 refcount_init(&rule->rr_refcount, 1); 1023 1024 return (rule); 1025 } 1026 1027 struct rctl_rule * 1028 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1029 { 1030 struct rctl_rule *copy; 1031 1032 ASSERT_RACCT_ENABLED(); 1033 1034 copy = uma_zalloc(rctl_rule_zone, flags); 1035 if (copy == NULL) 1036 return (NULL); 1037 copy->rr_subject_type = rule->rr_subject_type; 1038 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1039 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1040 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1041 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1042 copy->rr_per = rule->rr_per; 1043 copy->rr_resource = rule->rr_resource; 1044 copy->rr_action = rule->rr_action; 1045 copy->rr_amount = rule->rr_amount; 1046 refcount_init(©->rr_refcount, 1); 1047 rctl_rule_acquire_subject(copy); 1048 1049 return (copy); 1050 } 1051 1052 void 1053 rctl_rule_acquire(struct rctl_rule *rule) 1054 { 1055 1056 ASSERT_RACCT_ENABLED(); 1057 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1058 1059 refcount_acquire(&rule->rr_refcount); 1060 } 1061 1062 static void 1063 rctl_rule_free(void *context, int pending) 1064 { 1065 struct rctl_rule *rule; 1066 1067 rule = (struct rctl_rule *)context; 1068 1069 ASSERT_RACCT_ENABLED(); 1070 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1071 1072 /* 1073 * We don't need locking here; rule is guaranteed to be inaccessible. 1074 */ 1075 1076 rctl_rule_release_subject(rule); 1077 uma_zfree(rctl_rule_zone, rule); 1078 } 1079 1080 void 1081 rctl_rule_release(struct rctl_rule *rule) 1082 { 1083 1084 ASSERT_RACCT_ENABLED(); 1085 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1086 1087 if (refcount_release(&rule->rr_refcount)) { 1088 /* 1089 * rctl_rule_release() is often called when iterating 1090 * over all the uidinfo structures in the system, 1091 * holding uihashtbl_lock. Since rctl_rule_free() 1092 * might end up calling uifree(), this would lead 1093 * to lock recursion. Use taskqueue to avoid this. 1094 */ 1095 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1096 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1097 } 1098 } 1099 1100 static int 1101 rctl_rule_fully_specified(const struct rctl_rule *rule) 1102 { 1103 1104 ASSERT_RACCT_ENABLED(); 1105 1106 switch (rule->rr_subject_type) { 1107 case RCTL_SUBJECT_TYPE_UNDEFINED: 1108 return (0); 1109 case RCTL_SUBJECT_TYPE_PROCESS: 1110 if (rule->rr_subject.rs_proc == NULL) 1111 return (0); 1112 break; 1113 case RCTL_SUBJECT_TYPE_USER: 1114 if (rule->rr_subject.rs_uip == NULL) 1115 return (0); 1116 break; 1117 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1118 if (rule->rr_subject.rs_loginclass == NULL) 1119 return (0); 1120 break; 1121 case RCTL_SUBJECT_TYPE_JAIL: 1122 if (rule->rr_subject.rs_prison_racct == NULL) 1123 return (0); 1124 break; 1125 default: 1126 panic("rctl_rule_fully_specified: unknown subject type %d", 1127 rule->rr_subject_type); 1128 } 1129 if (rule->rr_resource == RACCT_UNDEFINED) 1130 return (0); 1131 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1132 return (0); 1133 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1134 return (0); 1135 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1136 return (0); 1137 1138 return (1); 1139 } 1140 1141 static int 1142 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1143 { 1144 struct rctl_rule *rule; 1145 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1146 *amountstr, *perstr; 1147 id_t id; 1148 int error = 0; 1149 1150 ASSERT_RACCT_ENABLED(); 1151 1152 rule = rctl_rule_alloc(M_WAITOK); 1153 1154 subjectstr = strsep(&rulestr, ":"); 1155 subject_idstr = strsep(&rulestr, ":"); 1156 resourcestr = strsep(&rulestr, ":"); 1157 actionstr = strsep(&rulestr, "=/"); 1158 amountstr = strsep(&rulestr, "/"); 1159 perstr = rulestr; 1160 1161 if (subjectstr == NULL || subjectstr[0] == '\0') 1162 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1163 else { 1164 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1165 if (error != 0) 1166 goto out; 1167 } 1168 1169 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1170 rule->rr_subject.rs_proc = NULL; 1171 rule->rr_subject.rs_uip = NULL; 1172 rule->rr_subject.rs_loginclass = NULL; 1173 rule->rr_subject.rs_prison_racct = NULL; 1174 } else { 1175 switch (rule->rr_subject_type) { 1176 case RCTL_SUBJECT_TYPE_UNDEFINED: 1177 error = EINVAL; 1178 goto out; 1179 case RCTL_SUBJECT_TYPE_PROCESS: 1180 error = str2id(subject_idstr, &id); 1181 if (error != 0) 1182 goto out; 1183 sx_assert(&allproc_lock, SA_LOCKED); 1184 rule->rr_subject.rs_proc = pfind(id); 1185 if (rule->rr_subject.rs_proc == NULL) { 1186 error = ESRCH; 1187 goto out; 1188 } 1189 PROC_UNLOCK(rule->rr_subject.rs_proc); 1190 break; 1191 case RCTL_SUBJECT_TYPE_USER: 1192 error = str2id(subject_idstr, &id); 1193 if (error != 0) 1194 goto out; 1195 rule->rr_subject.rs_uip = uifind(id); 1196 break; 1197 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1198 rule->rr_subject.rs_loginclass = 1199 loginclass_find(subject_idstr); 1200 if (rule->rr_subject.rs_loginclass == NULL) { 1201 error = ENAMETOOLONG; 1202 goto out; 1203 } 1204 break; 1205 case RCTL_SUBJECT_TYPE_JAIL: 1206 rule->rr_subject.rs_prison_racct = 1207 prison_racct_find(subject_idstr); 1208 if (rule->rr_subject.rs_prison_racct == NULL) { 1209 error = ENAMETOOLONG; 1210 goto out; 1211 } 1212 break; 1213 default: 1214 panic("rctl_string_to_rule: unknown subject type %d", 1215 rule->rr_subject_type); 1216 } 1217 } 1218 1219 if (resourcestr == NULL || resourcestr[0] == '\0') 1220 rule->rr_resource = RACCT_UNDEFINED; 1221 else { 1222 error = str2value(resourcestr, &rule->rr_resource, 1223 resourcenames); 1224 if (error != 0) 1225 goto out; 1226 } 1227 1228 if (actionstr == NULL || actionstr[0] == '\0') 1229 rule->rr_action = RCTL_ACTION_UNDEFINED; 1230 else { 1231 error = str2value(actionstr, &rule->rr_action, actionnames); 1232 if (error != 0) 1233 goto out; 1234 } 1235 1236 if (amountstr == NULL || amountstr[0] == '\0') 1237 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1238 else { 1239 error = str2int64(amountstr, &rule->rr_amount); 1240 if (error != 0) 1241 goto out; 1242 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1243 if (rule->rr_amount > INT64_MAX / 1000000) { 1244 error = ERANGE; 1245 goto out; 1246 } 1247 rule->rr_amount *= 1000000; 1248 } 1249 } 1250 1251 if (perstr == NULL || perstr[0] == '\0') 1252 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1253 else { 1254 error = str2value(perstr, &rule->rr_per, subjectnames); 1255 if (error != 0) 1256 goto out; 1257 } 1258 1259 out: 1260 if (error == 0) 1261 *rulep = rule; 1262 else 1263 rctl_rule_release(rule); 1264 1265 return (error); 1266 } 1267 1268 /* 1269 * Link a rule with all the subjects it applies to. 1270 */ 1271 int 1272 rctl_rule_add(struct rctl_rule *rule) 1273 { 1274 struct proc *p; 1275 struct ucred *cred; 1276 struct uidinfo *uip; 1277 struct prison *pr; 1278 struct prison_racct *prr; 1279 struct loginclass *lc; 1280 struct rctl_rule *rule2; 1281 int match; 1282 1283 ASSERT_RACCT_ENABLED(); 1284 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1285 1286 /* 1287 * Some rules just don't make sense, like "deny" rule for an undeniable 1288 * resource. The exception are the RSS and %CPU resources - they are 1289 * not deniable in the racct sense, but the limit is enforced in 1290 * a different way. 1291 */ 1292 if (rule->rr_action == RCTL_ACTION_DENY && 1293 !RACCT_IS_DENIABLE(rule->rr_resource) && 1294 rule->rr_resource != RACCT_RSS && 1295 rule->rr_resource != RACCT_PCTCPU) { 1296 return (EOPNOTSUPP); 1297 } 1298 1299 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1300 !RACCT_IS_DECAYING(rule->rr_resource)) { 1301 return (EOPNOTSUPP); 1302 } 1303 1304 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1305 rule->rr_resource == RACCT_PCTCPU) { 1306 return (EOPNOTSUPP); 1307 } 1308 1309 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1310 RACCT_IS_SLOPPY(rule->rr_resource)) { 1311 return (EOPNOTSUPP); 1312 } 1313 1314 /* 1315 * Make sure there are no duplicated rules. Also, for the "deny" 1316 * rules, remove ones differing only by "amount". 1317 */ 1318 if (rule->rr_action == RCTL_ACTION_DENY) { 1319 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1320 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1321 rctl_rule_remove(rule2); 1322 rctl_rule_release(rule2); 1323 } else 1324 rctl_rule_remove(rule); 1325 1326 switch (rule->rr_subject_type) { 1327 case RCTL_SUBJECT_TYPE_PROCESS: 1328 p = rule->rr_subject.rs_proc; 1329 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1330 1331 rctl_racct_add_rule(p->p_racct, rule); 1332 /* 1333 * In case of per-process rule, we don't have anything more 1334 * to do. 1335 */ 1336 return (0); 1337 1338 case RCTL_SUBJECT_TYPE_USER: 1339 uip = rule->rr_subject.rs_uip; 1340 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1341 rctl_racct_add_rule(uip->ui_racct, rule); 1342 break; 1343 1344 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1345 lc = rule->rr_subject.rs_loginclass; 1346 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1347 rctl_racct_add_rule(lc->lc_racct, rule); 1348 break; 1349 1350 case RCTL_SUBJECT_TYPE_JAIL: 1351 prr = rule->rr_subject.rs_prison_racct; 1352 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1353 rctl_racct_add_rule(prr->prr_racct, rule); 1354 break; 1355 1356 default: 1357 panic("rctl_rule_add: unknown subject type %d", 1358 rule->rr_subject_type); 1359 } 1360 1361 /* 1362 * Now go through all the processes and add the new rule to the ones 1363 * it applies to. 1364 */ 1365 sx_assert(&allproc_lock, SA_LOCKED); 1366 FOREACH_PROC_IN_SYSTEM(p) { 1367 cred = p->p_ucred; 1368 switch (rule->rr_subject_type) { 1369 case RCTL_SUBJECT_TYPE_USER: 1370 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1371 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1372 break; 1373 continue; 1374 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1375 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1376 break; 1377 continue; 1378 case RCTL_SUBJECT_TYPE_JAIL: 1379 match = 0; 1380 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1381 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1382 match = 1; 1383 break; 1384 } 1385 } 1386 if (match) 1387 break; 1388 continue; 1389 default: 1390 panic("rctl_rule_add: unknown subject type %d", 1391 rule->rr_subject_type); 1392 } 1393 1394 rctl_racct_add_rule(p->p_racct, rule); 1395 } 1396 1397 return (0); 1398 } 1399 1400 static void 1401 rctl_rule_pre_callback(void) 1402 { 1403 1404 RACCT_LOCK(); 1405 } 1406 1407 static void 1408 rctl_rule_post_callback(void) 1409 { 1410 1411 RACCT_UNLOCK(); 1412 } 1413 1414 static void 1415 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1416 { 1417 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1418 int found = 0; 1419 1420 ASSERT_RACCT_ENABLED(); 1421 RACCT_LOCK_ASSERT(); 1422 1423 found += rctl_racct_remove_rules(racct, filter); 1424 1425 *((int *)arg3) += found; 1426 } 1427 1428 /* 1429 * Remove all rules that match the filter. 1430 */ 1431 int 1432 rctl_rule_remove(struct rctl_rule *filter) 1433 { 1434 struct proc *p; 1435 int found = 0; 1436 1437 ASSERT_RACCT_ENABLED(); 1438 1439 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1440 filter->rr_subject.rs_proc != NULL) { 1441 p = filter->rr_subject.rs_proc; 1442 RACCT_LOCK(); 1443 found = rctl_racct_remove_rules(p->p_racct, filter); 1444 RACCT_UNLOCK(); 1445 if (found) 1446 return (0); 1447 return (ESRCH); 1448 } 1449 1450 loginclass_racct_foreach(rctl_rule_remove_callback, 1451 rctl_rule_pre_callback, rctl_rule_post_callback, 1452 filter, (void *)&found); 1453 ui_racct_foreach(rctl_rule_remove_callback, 1454 rctl_rule_pre_callback, rctl_rule_post_callback, 1455 filter, (void *)&found); 1456 prison_racct_foreach(rctl_rule_remove_callback, 1457 rctl_rule_pre_callback, rctl_rule_post_callback, 1458 filter, (void *)&found); 1459 1460 sx_assert(&allproc_lock, SA_LOCKED); 1461 RACCT_LOCK(); 1462 FOREACH_PROC_IN_SYSTEM(p) { 1463 found += rctl_racct_remove_rules(p->p_racct, filter); 1464 } 1465 RACCT_UNLOCK(); 1466 1467 if (found) 1468 return (0); 1469 return (ESRCH); 1470 } 1471 1472 /* 1473 * Appends a rule to the sbuf. 1474 */ 1475 static void 1476 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1477 { 1478 int64_t amount; 1479 1480 ASSERT_RACCT_ENABLED(); 1481 1482 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1483 1484 switch (rule->rr_subject_type) { 1485 case RCTL_SUBJECT_TYPE_PROCESS: 1486 if (rule->rr_subject.rs_proc == NULL) 1487 sbuf_putc(sb, ':'); 1488 else 1489 sbuf_printf(sb, "%d:", 1490 rule->rr_subject.rs_proc->p_pid); 1491 break; 1492 case RCTL_SUBJECT_TYPE_USER: 1493 if (rule->rr_subject.rs_uip == NULL) 1494 sbuf_putc(sb, ':'); 1495 else 1496 sbuf_printf(sb, "%d:", 1497 rule->rr_subject.rs_uip->ui_uid); 1498 break; 1499 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1500 if (rule->rr_subject.rs_loginclass == NULL) 1501 sbuf_putc(sb, ':'); 1502 else 1503 sbuf_printf(sb, "%s:", 1504 rule->rr_subject.rs_loginclass->lc_name); 1505 break; 1506 case RCTL_SUBJECT_TYPE_JAIL: 1507 if (rule->rr_subject.rs_prison_racct == NULL) 1508 sbuf_putc(sb, ':'); 1509 else 1510 sbuf_printf(sb, "%s:", 1511 rule->rr_subject.rs_prison_racct->prr_name); 1512 break; 1513 default: 1514 panic("rctl_rule_to_sbuf: unknown subject type %d", 1515 rule->rr_subject_type); 1516 } 1517 1518 amount = rule->rr_amount; 1519 if (amount != RCTL_AMOUNT_UNDEFINED && 1520 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1521 amount /= 1000000; 1522 1523 sbuf_printf(sb, "%s:%s=%jd", 1524 rctl_resource_name(rule->rr_resource), 1525 rctl_action_name(rule->rr_action), 1526 amount); 1527 1528 if (rule->rr_per != rule->rr_subject_type) 1529 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1530 } 1531 1532 /* 1533 * Routine used by RCTL syscalls to read in input string. 1534 */ 1535 static int 1536 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1537 { 1538 char *str; 1539 int error; 1540 1541 ASSERT_RACCT_ENABLED(); 1542 1543 if (inbuflen <= 0) 1544 return (EINVAL); 1545 if (inbuflen > RCTL_MAX_INBUFSIZE) 1546 return (E2BIG); 1547 1548 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1549 error = copyinstr(inbufp, str, inbuflen, NULL); 1550 if (error != 0) { 1551 free(str, M_RCTL); 1552 return (error); 1553 } 1554 1555 *inputstr = str; 1556 1557 return (0); 1558 } 1559 1560 /* 1561 * Routine used by RCTL syscalls to write out output string. 1562 */ 1563 static int 1564 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1565 { 1566 int error; 1567 1568 ASSERT_RACCT_ENABLED(); 1569 1570 if (outputsbuf == NULL) 1571 return (0); 1572 1573 sbuf_finish(outputsbuf); 1574 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1575 sbuf_delete(outputsbuf); 1576 return (ERANGE); 1577 } 1578 error = copyout(sbuf_data(outputsbuf), outbufp, 1579 sbuf_len(outputsbuf) + 1); 1580 sbuf_delete(outputsbuf); 1581 return (error); 1582 } 1583 1584 static struct sbuf * 1585 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1586 { 1587 struct sbuf *sb; 1588 int64_t amount; 1589 int i; 1590 1591 ASSERT_RACCT_ENABLED(); 1592 1593 sb = sbuf_new_auto(); 1594 for (i = 0; i <= RACCT_MAX; i++) { 1595 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1596 continue; 1597 RACCT_LOCK(); 1598 amount = racct->r_resources[i]; 1599 RACCT_UNLOCK(); 1600 if (RACCT_IS_IN_MILLIONS(i)) 1601 amount /= 1000000; 1602 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1603 } 1604 sbuf_setpos(sb, sbuf_len(sb) - 1); 1605 return (sb); 1606 } 1607 1608 int 1609 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1610 { 1611 struct rctl_rule *filter; 1612 struct sbuf *outputsbuf = NULL; 1613 struct proc *p; 1614 struct uidinfo *uip; 1615 struct loginclass *lc; 1616 struct prison_racct *prr; 1617 char *inputstr; 1618 int error; 1619 1620 if (!racct_enable) 1621 return (ENOSYS); 1622 1623 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1624 if (error != 0) 1625 return (error); 1626 1627 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1628 if (error != 0) 1629 return (error); 1630 1631 sx_slock(&allproc_lock); 1632 error = rctl_string_to_rule(inputstr, &filter); 1633 free(inputstr, M_RCTL); 1634 if (error != 0) { 1635 sx_sunlock(&allproc_lock); 1636 return (error); 1637 } 1638 1639 switch (filter->rr_subject_type) { 1640 case RCTL_SUBJECT_TYPE_PROCESS: 1641 p = filter->rr_subject.rs_proc; 1642 if (p == NULL) { 1643 error = EINVAL; 1644 goto out; 1645 } 1646 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1647 break; 1648 case RCTL_SUBJECT_TYPE_USER: 1649 uip = filter->rr_subject.rs_uip; 1650 if (uip == NULL) { 1651 error = EINVAL; 1652 goto out; 1653 } 1654 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1655 break; 1656 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1657 lc = filter->rr_subject.rs_loginclass; 1658 if (lc == NULL) { 1659 error = EINVAL; 1660 goto out; 1661 } 1662 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1663 break; 1664 case RCTL_SUBJECT_TYPE_JAIL: 1665 prr = filter->rr_subject.rs_prison_racct; 1666 if (prr == NULL) { 1667 error = EINVAL; 1668 goto out; 1669 } 1670 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1671 break; 1672 default: 1673 error = EINVAL; 1674 } 1675 out: 1676 rctl_rule_release(filter); 1677 sx_sunlock(&allproc_lock); 1678 if (error != 0) 1679 return (error); 1680 1681 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1682 1683 return (error); 1684 } 1685 1686 static void 1687 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1688 { 1689 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1690 struct rctl_rule_link *link; 1691 struct sbuf *sb = (struct sbuf *)arg3; 1692 1693 ASSERT_RACCT_ENABLED(); 1694 RACCT_LOCK_ASSERT(); 1695 1696 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1697 if (!rctl_rule_matches(link->rrl_rule, filter)) 1698 continue; 1699 rctl_rule_to_sbuf(sb, link->rrl_rule); 1700 sbuf_putc(sb, ','); 1701 } 1702 } 1703 1704 int 1705 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1706 { 1707 struct sbuf *sb; 1708 struct rctl_rule *filter; 1709 struct rctl_rule_link *link; 1710 struct proc *p; 1711 char *inputstr, *buf; 1712 size_t bufsize; 1713 int error; 1714 1715 if (!racct_enable) 1716 return (ENOSYS); 1717 1718 error = priv_check(td, PRIV_RCTL_GET_RULES); 1719 if (error != 0) 1720 return (error); 1721 1722 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1723 if (error != 0) 1724 return (error); 1725 1726 sx_slock(&allproc_lock); 1727 error = rctl_string_to_rule(inputstr, &filter); 1728 free(inputstr, M_RCTL); 1729 if (error != 0) { 1730 sx_sunlock(&allproc_lock); 1731 return (error); 1732 } 1733 1734 bufsize = uap->outbuflen; 1735 if (bufsize > rctl_maxbufsize) { 1736 sx_sunlock(&allproc_lock); 1737 return (E2BIG); 1738 } 1739 1740 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1741 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1742 KASSERT(sb != NULL, ("sbuf_new failed")); 1743 1744 FOREACH_PROC_IN_SYSTEM(p) { 1745 RACCT_LOCK(); 1746 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1747 /* 1748 * Non-process rules will be added to the buffer later. 1749 * Adding them here would result in duplicated output. 1750 */ 1751 if (link->rrl_rule->rr_subject_type != 1752 RCTL_SUBJECT_TYPE_PROCESS) 1753 continue; 1754 if (!rctl_rule_matches(link->rrl_rule, filter)) 1755 continue; 1756 rctl_rule_to_sbuf(sb, link->rrl_rule); 1757 sbuf_putc(sb, ','); 1758 } 1759 RACCT_UNLOCK(); 1760 } 1761 1762 loginclass_racct_foreach(rctl_get_rules_callback, 1763 rctl_rule_pre_callback, rctl_rule_post_callback, 1764 filter, sb); 1765 ui_racct_foreach(rctl_get_rules_callback, 1766 rctl_rule_pre_callback, rctl_rule_post_callback, 1767 filter, sb); 1768 prison_racct_foreach(rctl_get_rules_callback, 1769 rctl_rule_pre_callback, rctl_rule_post_callback, 1770 filter, sb); 1771 if (sbuf_error(sb) == ENOMEM) { 1772 error = ERANGE; 1773 goto out; 1774 } 1775 1776 /* 1777 * Remove trailing ",". 1778 */ 1779 if (sbuf_len(sb) > 0) 1780 sbuf_setpos(sb, sbuf_len(sb) - 1); 1781 1782 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1783 out: 1784 rctl_rule_release(filter); 1785 sx_sunlock(&allproc_lock); 1786 free(buf, M_RCTL); 1787 return (error); 1788 } 1789 1790 int 1791 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1792 { 1793 struct sbuf *sb; 1794 struct rctl_rule *filter; 1795 struct rctl_rule_link *link; 1796 char *inputstr, *buf; 1797 size_t bufsize; 1798 int error; 1799 1800 if (!racct_enable) 1801 return (ENOSYS); 1802 1803 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1804 if (error != 0) 1805 return (error); 1806 1807 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1808 if (error != 0) 1809 return (error); 1810 1811 sx_slock(&allproc_lock); 1812 error = rctl_string_to_rule(inputstr, &filter); 1813 free(inputstr, M_RCTL); 1814 if (error != 0) { 1815 sx_sunlock(&allproc_lock); 1816 return (error); 1817 } 1818 1819 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1820 rctl_rule_release(filter); 1821 sx_sunlock(&allproc_lock); 1822 return (EINVAL); 1823 } 1824 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1825 rctl_rule_release(filter); 1826 sx_sunlock(&allproc_lock); 1827 return (EOPNOTSUPP); 1828 } 1829 if (filter->rr_subject.rs_proc == NULL) { 1830 rctl_rule_release(filter); 1831 sx_sunlock(&allproc_lock); 1832 return (EINVAL); 1833 } 1834 1835 bufsize = uap->outbuflen; 1836 if (bufsize > rctl_maxbufsize) { 1837 rctl_rule_release(filter); 1838 sx_sunlock(&allproc_lock); 1839 return (E2BIG); 1840 } 1841 1842 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1843 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1844 KASSERT(sb != NULL, ("sbuf_new failed")); 1845 1846 RACCT_LOCK(); 1847 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1848 rrl_next) { 1849 rctl_rule_to_sbuf(sb, link->rrl_rule); 1850 sbuf_putc(sb, ','); 1851 } 1852 RACCT_UNLOCK(); 1853 if (sbuf_error(sb) == ENOMEM) { 1854 error = ERANGE; 1855 sbuf_delete(sb); 1856 goto out; 1857 } 1858 1859 /* 1860 * Remove trailing ",". 1861 */ 1862 if (sbuf_len(sb) > 0) 1863 sbuf_setpos(sb, sbuf_len(sb) - 1); 1864 1865 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1866 out: 1867 rctl_rule_release(filter); 1868 sx_sunlock(&allproc_lock); 1869 free(buf, M_RCTL); 1870 return (error); 1871 } 1872 1873 int 1874 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1875 { 1876 struct rctl_rule *rule; 1877 char *inputstr; 1878 int error; 1879 1880 if (!racct_enable) 1881 return (ENOSYS); 1882 1883 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1884 if (error != 0) 1885 return (error); 1886 1887 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1888 if (error != 0) 1889 return (error); 1890 1891 sx_slock(&allproc_lock); 1892 error = rctl_string_to_rule(inputstr, &rule); 1893 free(inputstr, M_RCTL); 1894 if (error != 0) { 1895 sx_sunlock(&allproc_lock); 1896 return (error); 1897 } 1898 /* 1899 * The 'per' part of a rule is optional. 1900 */ 1901 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1902 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1903 rule->rr_per = rule->rr_subject_type; 1904 1905 if (!rctl_rule_fully_specified(rule)) { 1906 error = EINVAL; 1907 goto out; 1908 } 1909 1910 error = rctl_rule_add(rule); 1911 1912 out: 1913 rctl_rule_release(rule); 1914 sx_sunlock(&allproc_lock); 1915 return (error); 1916 } 1917 1918 int 1919 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1920 { 1921 struct rctl_rule *filter; 1922 char *inputstr; 1923 int error; 1924 1925 if (!racct_enable) 1926 return (ENOSYS); 1927 1928 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1929 if (error != 0) 1930 return (error); 1931 1932 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1933 if (error != 0) 1934 return (error); 1935 1936 sx_slock(&allproc_lock); 1937 error = rctl_string_to_rule(inputstr, &filter); 1938 free(inputstr, M_RCTL); 1939 if (error != 0) { 1940 sx_sunlock(&allproc_lock); 1941 return (error); 1942 } 1943 1944 error = rctl_rule_remove(filter); 1945 rctl_rule_release(filter); 1946 sx_sunlock(&allproc_lock); 1947 1948 return (error); 1949 } 1950 1951 /* 1952 * Update RCTL rule list after credential change. 1953 */ 1954 void 1955 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1956 { 1957 LIST_HEAD(, rctl_rule_link) newrules; 1958 struct rctl_rule_link *link, *newlink; 1959 struct uidinfo *newuip; 1960 struct loginclass *newlc; 1961 struct prison_racct *newprr; 1962 int rulecnt, i; 1963 1964 if (!racct_enable) 1965 return; 1966 1967 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1968 1969 newuip = newcred->cr_ruidinfo; 1970 newlc = newcred->cr_loginclass; 1971 newprr = newcred->cr_prison->pr_prison_racct; 1972 1973 LIST_INIT(&newrules); 1974 1975 again: 1976 /* 1977 * First, count the rules that apply to the process with new 1978 * credentials. 1979 */ 1980 rulecnt = 0; 1981 RACCT_LOCK(); 1982 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1983 if (link->rrl_rule->rr_subject_type == 1984 RCTL_SUBJECT_TYPE_PROCESS) 1985 rulecnt++; 1986 } 1987 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 1988 rulecnt++; 1989 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 1990 rulecnt++; 1991 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 1992 rulecnt++; 1993 RACCT_UNLOCK(); 1994 1995 /* 1996 * Create temporary list. We've dropped the rctl_lock in order 1997 * to use M_WAITOK. 1998 */ 1999 for (i = 0; i < rulecnt; i++) { 2000 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 2001 newlink->rrl_rule = NULL; 2002 newlink->rrl_exceeded = 0; 2003 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 2004 } 2005 2006 newlink = LIST_FIRST(&newrules); 2007 2008 /* 2009 * Assign rules to the newly allocated list entries. 2010 */ 2011 RACCT_LOCK(); 2012 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2013 if (link->rrl_rule->rr_subject_type == 2014 RCTL_SUBJECT_TYPE_PROCESS) { 2015 if (newlink == NULL) 2016 goto goaround; 2017 rctl_rule_acquire(link->rrl_rule); 2018 newlink->rrl_rule = link->rrl_rule; 2019 newlink->rrl_exceeded = link->rrl_exceeded; 2020 newlink = LIST_NEXT(newlink, rrl_next); 2021 rulecnt--; 2022 } 2023 } 2024 2025 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2026 if (newlink == NULL) 2027 goto goaround; 2028 rctl_rule_acquire(link->rrl_rule); 2029 newlink->rrl_rule = link->rrl_rule; 2030 newlink->rrl_exceeded = link->rrl_exceeded; 2031 newlink = LIST_NEXT(newlink, rrl_next); 2032 rulecnt--; 2033 } 2034 2035 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2036 if (newlink == NULL) 2037 goto goaround; 2038 rctl_rule_acquire(link->rrl_rule); 2039 newlink->rrl_rule = link->rrl_rule; 2040 newlink->rrl_exceeded = link->rrl_exceeded; 2041 newlink = LIST_NEXT(newlink, rrl_next); 2042 rulecnt--; 2043 } 2044 2045 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2046 if (newlink == NULL) 2047 goto goaround; 2048 rctl_rule_acquire(link->rrl_rule); 2049 newlink->rrl_rule = link->rrl_rule; 2050 newlink->rrl_exceeded = link->rrl_exceeded; 2051 newlink = LIST_NEXT(newlink, rrl_next); 2052 rulecnt--; 2053 } 2054 2055 if (rulecnt == 0) { 2056 /* 2057 * Free the old rule list. 2058 */ 2059 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2060 link = LIST_FIRST(&p->p_racct->r_rule_links); 2061 LIST_REMOVE(link, rrl_next); 2062 rctl_rule_release(link->rrl_rule); 2063 uma_zfree(rctl_rule_link_zone, link); 2064 } 2065 2066 /* 2067 * Replace lists and we're done. 2068 * 2069 * XXX: Is there any way to switch list heads instead 2070 * of iterating here? 2071 */ 2072 while (!LIST_EMPTY(&newrules)) { 2073 newlink = LIST_FIRST(&newrules); 2074 LIST_REMOVE(newlink, rrl_next); 2075 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2076 newlink, rrl_next); 2077 } 2078 2079 RACCT_UNLOCK(); 2080 2081 return; 2082 } 2083 2084 goaround: 2085 RACCT_UNLOCK(); 2086 2087 /* 2088 * Rule list changed while we were not holding the rctl_lock. 2089 * Free the new list and try again. 2090 */ 2091 while (!LIST_EMPTY(&newrules)) { 2092 newlink = LIST_FIRST(&newrules); 2093 LIST_REMOVE(newlink, rrl_next); 2094 if (newlink->rrl_rule != NULL) 2095 rctl_rule_release(newlink->rrl_rule); 2096 uma_zfree(rctl_rule_link_zone, newlink); 2097 } 2098 2099 goto again; 2100 } 2101 2102 /* 2103 * Assign RCTL rules to the newly created process. 2104 */ 2105 int 2106 rctl_proc_fork(struct proc *parent, struct proc *child) 2107 { 2108 struct rctl_rule *rule; 2109 struct rctl_rule_link *link; 2110 int error; 2111 2112 ASSERT_RACCT_ENABLED(); 2113 RACCT_LOCK_ASSERT(); 2114 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2115 2116 LIST_INIT(&child->p_racct->r_rule_links); 2117 2118 /* 2119 * Go through limits applicable to the parent and assign them 2120 * to the child. Rules with 'process' subject have to be duplicated 2121 * in order to make their rr_subject point to the new process. 2122 */ 2123 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2124 if (link->rrl_rule->rr_subject_type == 2125 RCTL_SUBJECT_TYPE_PROCESS) { 2126 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2127 if (rule == NULL) 2128 goto fail; 2129 KASSERT(rule->rr_subject.rs_proc == parent, 2130 ("rule->rr_subject.rs_proc != parent")); 2131 rule->rr_subject.rs_proc = child; 2132 error = rctl_racct_add_rule_locked(child->p_racct, 2133 rule); 2134 rctl_rule_release(rule); 2135 if (error != 0) 2136 goto fail; 2137 } else { 2138 error = rctl_racct_add_rule_locked(child->p_racct, 2139 link->rrl_rule); 2140 if (error != 0) 2141 goto fail; 2142 } 2143 } 2144 2145 return (0); 2146 2147 fail: 2148 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2149 link = LIST_FIRST(&child->p_racct->r_rule_links); 2150 LIST_REMOVE(link, rrl_next); 2151 rctl_rule_release(link->rrl_rule); 2152 uma_zfree(rctl_rule_link_zone, link); 2153 } 2154 2155 return (EAGAIN); 2156 } 2157 2158 /* 2159 * Release rules attached to the racct. 2160 */ 2161 void 2162 rctl_racct_release(struct racct *racct) 2163 { 2164 struct rctl_rule_link *link; 2165 2166 ASSERT_RACCT_ENABLED(); 2167 RACCT_LOCK_ASSERT(); 2168 2169 while (!LIST_EMPTY(&racct->r_rule_links)) { 2170 link = LIST_FIRST(&racct->r_rule_links); 2171 LIST_REMOVE(link, rrl_next); 2172 rctl_rule_release(link->rrl_rule); 2173 uma_zfree(rctl_rule_link_zone, link); 2174 } 2175 } 2176 2177 static void 2178 rctl_init(void) 2179 { 2180 2181 if (!racct_enable) 2182 return; 2183 2184 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2185 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2186 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2187 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2188 UMA_ALIGN_PTR, 0); 2189 2190 /* 2191 * Set default values, making sure not to overwrite the ones 2192 * fetched from tunables. Most of those could be set at the 2193 * declaration, except for the rctl_throttle_max - we cannot 2194 * set it there due to hz not being compile time constant. 2195 */ 2196 if (rctl_throttle_min < 1) 2197 rctl_throttle_min = 1; 2198 if (rctl_throttle_max < rctl_throttle_min) 2199 rctl_throttle_max = 2 * hz; 2200 if (rctl_throttle_pct < 0) 2201 rctl_throttle_pct = 100; 2202 if (rctl_throttle_pct2 < 0) 2203 rctl_throttle_pct2 = 100; 2204 } 2205 2206 #else /* !RCTL */ 2207 2208 #include <sys/types.h> 2209 #include <sys/errno.h> 2210 #include <sys/sysproto.h> 2211 2212 int 2213 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2214 { 2215 2216 return (ENOSYS); 2217 } 2218 2219 int 2220 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2221 { 2222 2223 return (ENOSYS); 2224 } 2225 2226 int 2227 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2228 { 2229 2230 return (ENOSYS); 2231 } 2232 2233 int 2234 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2235 { 2236 2237 return (ENOSYS); 2238 } 2239 2240 int 2241 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2242 { 2243 2244 return (ENOSYS); 2245 } 2246 2247 #endif /* RCTL */ 2248