1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2010 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/devctl.h> 36 #include <sys/malloc.h> 37 #include <sys/queue.h> 38 #include <sys/refcount.h> 39 #include <sys/jail.h> 40 #include <sys/kernel.h> 41 #include <sys/limits.h> 42 #include <sys/loginclass.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/racct.h> 46 #include <sys/rctl.h> 47 #include <sys/resourcevar.h> 48 #include <sys/sx.h> 49 #include <sys/sysproto.h> 50 #include <sys/systm.h> 51 #include <sys/types.h> 52 #include <sys/eventhandler.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/rwlock.h> 56 #include <sys/sbuf.h> 57 #include <sys/taskqueue.h> 58 #include <sys/tree.h> 59 #include <vm/uma.h> 60 61 #ifdef RCTL 62 #ifndef RACCT 63 #error "The RCTL option requires the RACCT option" 64 #endif 65 66 FEATURE(rctl, "Resource Limits"); 67 68 #define HRF_DEFAULT 0 69 #define HRF_DONT_INHERIT 1 70 #define HRF_DONT_ACCUMULATE 2 71 72 #define RCTL_MAX_INBUFSIZE 4 * 1024 73 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 74 #define RCTL_LOG_BUFSIZE 128 75 76 #define RCTL_PCPU_SHIFT (10 * 1000000) 77 78 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 79 static int rctl_log_rate_limit = 10; 80 static int rctl_devctl_rate_limit = 10; 81 82 /* 83 * Values below are initialized in rctl_init(). 84 */ 85 static int rctl_throttle_min = -1; 86 static int rctl_throttle_max = -1; 87 static int rctl_throttle_pct = -1; 88 static int rctl_throttle_pct2 = -1; 89 90 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 91 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 92 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 93 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 94 95 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 96 "Resource Limits"); 97 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 98 &rctl_maxbufsize, 0, "Maximum output buffer size"); 99 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 100 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 101 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 102 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 103 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 104 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 105 &rctl_throttle_min_sysctl, "IU", 106 "Shortest throttling duration, in hz"); 107 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 108 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 109 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 110 &rctl_throttle_max_sysctl, "IU", 111 "Longest throttling duration, in hz"); 112 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 113 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 114 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 115 &rctl_throttle_pct_sysctl, "IU", 116 "Throttling penalty for process consumption, in percent"); 117 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 118 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 119 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 120 &rctl_throttle_pct2_sysctl, "IU", 121 "Throttling penalty for container consumption, in percent"); 122 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 123 124 /* 125 * 'rctl_rule_link' connects a rule with every racct it's related to. 126 * For example, rule 'user:X:openfiles:deny=N/process' is linked 127 * with uidinfo for user X, and to each process of that user. 128 */ 129 struct rctl_rule_link { 130 LIST_ENTRY(rctl_rule_link) rrl_next; 131 struct rctl_rule *rrl_rule; 132 int rrl_exceeded; 133 }; 134 135 struct dict { 136 const char *d_name; 137 int d_value; 138 }; 139 140 static struct dict subjectnames[] = { 141 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 142 { "user", RCTL_SUBJECT_TYPE_USER }, 143 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 144 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 145 { NULL, -1 }}; 146 147 static struct dict resourcenames[] = { 148 { "cputime", RACCT_CPU }, 149 { "datasize", RACCT_DATA }, 150 { "stacksize", RACCT_STACK }, 151 { "coredumpsize", RACCT_CORE }, 152 { "memoryuse", RACCT_RSS }, 153 { "memorylocked", RACCT_MEMLOCK }, 154 { "maxproc", RACCT_NPROC }, 155 { "openfiles", RACCT_NOFILE }, 156 { "vmemoryuse", RACCT_VMEM }, 157 { "pseudoterminals", RACCT_NPTS }, 158 { "swapuse", RACCT_SWAP }, 159 { "nthr", RACCT_NTHR }, 160 { "msgqqueued", RACCT_MSGQQUEUED }, 161 { "msgqsize", RACCT_MSGQSIZE }, 162 { "nmsgq", RACCT_NMSGQ }, 163 { "nsem", RACCT_NSEM }, 164 { "nsemop", RACCT_NSEMOP }, 165 { "nshm", RACCT_NSHM }, 166 { "shmsize", RACCT_SHMSIZE }, 167 { "wallclock", RACCT_WALLCLOCK }, 168 { "pcpu", RACCT_PCTCPU }, 169 { "readbps", RACCT_READBPS }, 170 { "writebps", RACCT_WRITEBPS }, 171 { "readiops", RACCT_READIOPS }, 172 { "writeiops", RACCT_WRITEIOPS }, 173 { NULL, -1 }}; 174 175 static struct dict actionnames[] = { 176 { "sighup", RCTL_ACTION_SIGHUP }, 177 { "sigint", RCTL_ACTION_SIGINT }, 178 { "sigquit", RCTL_ACTION_SIGQUIT }, 179 { "sigill", RCTL_ACTION_SIGILL }, 180 { "sigtrap", RCTL_ACTION_SIGTRAP }, 181 { "sigabrt", RCTL_ACTION_SIGABRT }, 182 { "sigemt", RCTL_ACTION_SIGEMT }, 183 { "sigfpe", RCTL_ACTION_SIGFPE }, 184 { "sigkill", RCTL_ACTION_SIGKILL }, 185 { "sigbus", RCTL_ACTION_SIGBUS }, 186 { "sigsegv", RCTL_ACTION_SIGSEGV }, 187 { "sigsys", RCTL_ACTION_SIGSYS }, 188 { "sigpipe", RCTL_ACTION_SIGPIPE }, 189 { "sigalrm", RCTL_ACTION_SIGALRM }, 190 { "sigterm", RCTL_ACTION_SIGTERM }, 191 { "sigurg", RCTL_ACTION_SIGURG }, 192 { "sigstop", RCTL_ACTION_SIGSTOP }, 193 { "sigtstp", RCTL_ACTION_SIGTSTP }, 194 { "sigchld", RCTL_ACTION_SIGCHLD }, 195 { "sigttin", RCTL_ACTION_SIGTTIN }, 196 { "sigttou", RCTL_ACTION_SIGTTOU }, 197 { "sigio", RCTL_ACTION_SIGIO }, 198 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 199 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 200 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 201 { "sigprof", RCTL_ACTION_SIGPROF }, 202 { "sigwinch", RCTL_ACTION_SIGWINCH }, 203 { "siginfo", RCTL_ACTION_SIGINFO }, 204 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 205 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 206 { "sigthr", RCTL_ACTION_SIGTHR }, 207 { "deny", RCTL_ACTION_DENY }, 208 { "log", RCTL_ACTION_LOG }, 209 { "devctl", RCTL_ACTION_DEVCTL }, 210 { "throttle", RCTL_ACTION_THROTTLE }, 211 { NULL, -1 }}; 212 213 static void rctl_init(void); 214 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 215 216 static uma_zone_t rctl_rule_zone; 217 static uma_zone_t rctl_rule_link_zone; 218 219 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 220 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 221 222 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 223 224 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 225 { 226 int error, val = rctl_throttle_min; 227 228 error = sysctl_handle_int(oidp, &val, 0, req); 229 if (error || !req->newptr) 230 return (error); 231 if (val < 1 || val > rctl_throttle_max) 232 return (EINVAL); 233 234 RACCT_LOCK(); 235 rctl_throttle_min = val; 236 RACCT_UNLOCK(); 237 238 return (0); 239 } 240 241 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 242 { 243 int error, val = rctl_throttle_max; 244 245 error = sysctl_handle_int(oidp, &val, 0, req); 246 if (error || !req->newptr) 247 return (error); 248 if (val < rctl_throttle_min) 249 return (EINVAL); 250 251 RACCT_LOCK(); 252 rctl_throttle_max = val; 253 RACCT_UNLOCK(); 254 255 return (0); 256 } 257 258 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 259 { 260 int error, val = rctl_throttle_pct; 261 262 error = sysctl_handle_int(oidp, &val, 0, req); 263 if (error || !req->newptr) 264 return (error); 265 if (val < 0) 266 return (EINVAL); 267 268 RACCT_LOCK(); 269 rctl_throttle_pct = val; 270 RACCT_UNLOCK(); 271 272 return (0); 273 } 274 275 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 276 { 277 int error, val = rctl_throttle_pct2; 278 279 error = sysctl_handle_int(oidp, &val, 0, req); 280 if (error || !req->newptr) 281 return (error); 282 if (val < 0) 283 return (EINVAL); 284 285 RACCT_LOCK(); 286 rctl_throttle_pct2 = val; 287 RACCT_UNLOCK(); 288 289 return (0); 290 } 291 292 static const char * 293 rctl_subject_type_name(int subject) 294 { 295 int i; 296 297 for (i = 0; subjectnames[i].d_name != NULL; i++) { 298 if (subjectnames[i].d_value == subject) 299 return (subjectnames[i].d_name); 300 } 301 302 panic("rctl_subject_type_name: unknown subject type %d", subject); 303 } 304 305 static const char * 306 rctl_action_name(int action) 307 { 308 int i; 309 310 for (i = 0; actionnames[i].d_name != NULL; i++) { 311 if (actionnames[i].d_value == action) 312 return (actionnames[i].d_name); 313 } 314 315 panic("rctl_action_name: unknown action %d", action); 316 } 317 318 const char * 319 rctl_resource_name(int resource) 320 { 321 int i; 322 323 for (i = 0; resourcenames[i].d_name != NULL; i++) { 324 if (resourcenames[i].d_value == resource) 325 return (resourcenames[i].d_name); 326 } 327 328 panic("rctl_resource_name: unknown resource %d", resource); 329 } 330 331 static struct racct * 332 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 333 { 334 struct ucred *cred = p->p_ucred; 335 336 ASSERT_RACCT_ENABLED(); 337 RACCT_LOCK_ASSERT(); 338 339 switch (rule->rr_per) { 340 case RCTL_SUBJECT_TYPE_PROCESS: 341 return (p->p_racct); 342 case RCTL_SUBJECT_TYPE_USER: 343 return (cred->cr_ruidinfo->ui_racct); 344 case RCTL_SUBJECT_TYPE_LOGINCLASS: 345 return (cred->cr_loginclass->lc_racct); 346 case RCTL_SUBJECT_TYPE_JAIL: 347 return (cred->cr_prison->pr_prison_racct->prr_racct); 348 default: 349 panic("%s: unknown per %d", __func__, rule->rr_per); 350 } 351 } 352 353 /* 354 * Return the amount of resource that can be allocated by 'p' before 355 * hitting 'rule'. 356 */ 357 static int64_t 358 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 359 { 360 const struct racct *racct; 361 int64_t available; 362 363 ASSERT_RACCT_ENABLED(); 364 RACCT_LOCK_ASSERT(); 365 366 racct = rctl_proc_rule_to_racct(p, rule); 367 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 368 369 return (available); 370 } 371 372 /* 373 * Called every second for proc, uidinfo, loginclass, and jail containers. 374 * If the limit isn't exceeded, it decreases the usage amount to zero. 375 * Otherwise, it decreases it by the value of the limit. This way 376 * resource consumption exceeding the limit "carries over" to the next 377 * period. 378 */ 379 void 380 rctl_throttle_decay(struct racct *racct, int resource) 381 { 382 struct rctl_rule *rule; 383 struct rctl_rule_link *link; 384 int64_t minavailable; 385 386 ASSERT_RACCT_ENABLED(); 387 RACCT_LOCK_ASSERT(); 388 389 minavailable = INT64_MAX; 390 391 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 392 rule = link->rrl_rule; 393 394 if (rule->rr_resource != resource) 395 continue; 396 if (rule->rr_action != RCTL_ACTION_THROTTLE) 397 continue; 398 399 if (rule->rr_amount < minavailable) 400 minavailable = rule->rr_amount; 401 } 402 403 if (racct->r_resources[resource] < minavailable) { 404 racct->r_resources[resource] = 0; 405 } else { 406 /* 407 * Cap utilization counter at ten times the limit. Otherwise, 408 * if we changed the rule lowering the allowed amount, it could 409 * take unreasonably long time for the accumulated resource 410 * usage to drop. 411 */ 412 if (racct->r_resources[resource] > minavailable * 10) 413 racct->r_resources[resource] = minavailable * 10; 414 415 racct->r_resources[resource] -= minavailable; 416 } 417 } 418 419 /* 420 * Special version of rctl_get_available() for the %CPU resource. 421 * We slightly cheat here and return less than we normally would. 422 */ 423 int64_t 424 rctl_pcpu_available(const struct proc *p) { 425 struct rctl_rule *rule; 426 struct rctl_rule_link *link; 427 int64_t available, minavailable, limit; 428 429 ASSERT_RACCT_ENABLED(); 430 RACCT_LOCK_ASSERT(); 431 432 minavailable = INT64_MAX; 433 limit = 0; 434 435 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 436 rule = link->rrl_rule; 437 if (rule->rr_resource != RACCT_PCTCPU) 438 continue; 439 if (rule->rr_action != RCTL_ACTION_DENY) 440 continue; 441 available = rctl_available_resource(p, rule); 442 if (available < minavailable) { 443 minavailable = available; 444 limit = rule->rr_amount; 445 } 446 } 447 448 /* 449 * Return slightly less than actual value of the available 450 * %cpu resource. This makes %cpu throttling more aggressive 451 * and lets us act sooner than the limits are already exceeded. 452 */ 453 if (limit != 0) { 454 if (limit > 2 * RCTL_PCPU_SHIFT) 455 minavailable -= RCTL_PCPU_SHIFT; 456 else 457 minavailable -= (limit / 2); 458 } 459 460 return (minavailable); 461 } 462 463 static uint64_t 464 xadd(uint64_t a, uint64_t b) 465 { 466 uint64_t c; 467 468 c = a + b; 469 470 /* 471 * Detect overflow. 472 */ 473 if (c < a || c < b) 474 return (UINT64_MAX); 475 476 return (c); 477 } 478 479 static uint64_t 480 xmul(uint64_t a, uint64_t b) 481 { 482 483 if (b != 0 && a > UINT64_MAX / b) 484 return (UINT64_MAX); 485 486 return (a * b); 487 } 488 489 /* 490 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 491 * to what it keeps allocated now. Returns non-zero if the allocation should 492 * be denied, 0 otherwise. 493 */ 494 int 495 rctl_enforce(struct proc *p, int resource, uint64_t amount) 496 { 497 static struct timeval log_lasttime, devctl_lasttime; 498 static int log_curtime = 0, devctl_curtime = 0; 499 struct rctl_rule *rule; 500 struct rctl_rule_link *link; 501 struct sbuf sb; 502 char *buf; 503 int64_t available; 504 uint64_t sleep_ms, sleep_ratio; 505 int should_deny = 0; 506 507 ASSERT_RACCT_ENABLED(); 508 RACCT_LOCK_ASSERT(); 509 510 /* 511 * There may be more than one matching rule; go through all of them. 512 * Denial should be done last, after logging and sending signals. 513 */ 514 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 515 rule = link->rrl_rule; 516 if (rule->rr_resource != resource) 517 continue; 518 519 available = rctl_available_resource(p, rule); 520 if (available >= (int64_t)amount) { 521 link->rrl_exceeded = 0; 522 continue; 523 } 524 525 switch (rule->rr_action) { 526 case RCTL_ACTION_DENY: 527 should_deny = 1; 528 continue; 529 case RCTL_ACTION_LOG: 530 /* 531 * If rrl_exceeded != 0, it means we've already 532 * logged a warning for this process. 533 */ 534 if (link->rrl_exceeded != 0) 535 continue; 536 537 /* 538 * If the process state is not fully initialized yet, 539 * we can't access most of the required fields, e.g. 540 * p->p_comm. This happens when called from fork1(). 541 * Ignore this rule for now; it will be processed just 542 * after fork, when called from racct_proc_fork_done(). 543 */ 544 if (p->p_state != PRS_NORMAL) 545 continue; 546 547 if (!ppsratecheck(&log_lasttime, &log_curtime, 548 rctl_log_rate_limit)) 549 continue; 550 551 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 552 if (buf == NULL) { 553 printf("rctl_enforce: out of memory\n"); 554 continue; 555 } 556 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 557 rctl_rule_to_sbuf(&sb, rule); 558 sbuf_finish(&sb); 559 printf("rctl: rule \"%s\" matched by pid %d " 560 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 561 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 562 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 563 sbuf_delete(&sb); 564 free(buf, M_RCTL); 565 link->rrl_exceeded = 1; 566 continue; 567 case RCTL_ACTION_DEVCTL: 568 if (link->rrl_exceeded != 0) 569 continue; 570 571 if (p->p_state != PRS_NORMAL) 572 continue; 573 574 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 575 rctl_devctl_rate_limit)) 576 continue; 577 578 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 579 if (buf == NULL) { 580 printf("rctl_enforce: out of memory\n"); 581 continue; 582 } 583 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 584 sbuf_printf(&sb, "rule="); 585 rctl_rule_to_sbuf(&sb, rule); 586 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 587 p->p_pid, p->p_ucred->cr_ruid, 588 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 589 sbuf_finish(&sb); 590 devctl_notify("RCTL", "rule", "matched", 591 sbuf_data(&sb)); 592 sbuf_delete(&sb); 593 free(buf, M_RCTL); 594 link->rrl_exceeded = 1; 595 continue; 596 case RCTL_ACTION_THROTTLE: 597 if (p->p_state != PRS_NORMAL) 598 continue; 599 600 if (rule->rr_amount == 0) { 601 racct_proc_throttle(p, rctl_throttle_max); 602 continue; 603 } 604 605 /* 606 * Make the process sleep for a fraction of second 607 * proportional to the ratio of process' resource 608 * utilization compared to the limit. The point is 609 * to penalize resource hogs: processes that consume 610 * more of the available resources sleep for longer. 611 * 612 * We're trying to defer division until the very end, 613 * to minimize the rounding effects. The following 614 * calculation could have been written in a clearer 615 * way like this: 616 * 617 * sleep_ms = hz * p->p_racct->r_resources[resource] / 618 * rule->rr_amount; 619 * sleep_ms *= rctl_throttle_pct / 100; 620 * if (sleep_ms < rctl_throttle_min) 621 * sleep_ms = rctl_throttle_min; 622 * 623 */ 624 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 625 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 626 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 627 sleep_ms = rctl_throttle_min * rule->rr_amount; 628 629 /* 630 * Multiply that by the ratio of the resource 631 * consumption for the container compared to the limit, 632 * squared. In other words, a process in a container 633 * that is two times over the limit will be throttled 634 * four times as much for hitting the same rule. The 635 * point is to penalize processes more if the container 636 * itself (eg certain UID or jail) is above the limit. 637 */ 638 if (available < 0) 639 sleep_ratio = -available / rule->rr_amount; 640 else 641 sleep_ratio = 0; 642 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 643 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 644 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 645 646 /* 647 * Finally the division. 648 */ 649 sleep_ms /= rule->rr_amount; 650 651 if (sleep_ms > rctl_throttle_max) 652 sleep_ms = rctl_throttle_max; 653 #if 0 654 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n", 655 __func__, p->p_pid, p->p_comm, 656 p->p_racct->r_resources[resource], 657 rule->rr_amount, (uintmax_t)sleep_ms, 658 (uintmax_t)sleep_ratio, (intmax_t)available); 659 #endif 660 661 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 662 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 663 racct_proc_throttle(p, sleep_ms); 664 continue; 665 default: 666 if (link->rrl_exceeded != 0) 667 continue; 668 669 if (p->p_state != PRS_NORMAL) 670 continue; 671 672 KASSERT(rule->rr_action > 0 && 673 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 674 ("rctl_enforce: unknown action %d", 675 rule->rr_action)); 676 677 /* 678 * We're using the fact that RCTL_ACTION_SIG* values 679 * are equal to their counterparts from sys/signal.h. 680 */ 681 kern_psignal(p, rule->rr_action); 682 link->rrl_exceeded = 1; 683 continue; 684 } 685 } 686 687 if (should_deny) { 688 /* 689 * Return fake error code; the caller should change it 690 * into one proper for the situation - EFSIZ, ENOMEM etc. 691 */ 692 return (EDOOFUS); 693 } 694 695 return (0); 696 } 697 698 uint64_t 699 rctl_get_limit(struct proc *p, int resource) 700 { 701 struct rctl_rule *rule; 702 struct rctl_rule_link *link; 703 uint64_t amount = UINT64_MAX; 704 705 ASSERT_RACCT_ENABLED(); 706 RACCT_LOCK_ASSERT(); 707 708 /* 709 * There may be more than one matching rule; go through all of them. 710 * Denial should be done last, after logging and sending signals. 711 */ 712 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 713 rule = link->rrl_rule; 714 if (rule->rr_resource != resource) 715 continue; 716 if (rule->rr_action != RCTL_ACTION_DENY) 717 continue; 718 if (rule->rr_amount < amount) 719 amount = rule->rr_amount; 720 } 721 722 return (amount); 723 } 724 725 uint64_t 726 rctl_get_available(struct proc *p, int resource) 727 { 728 struct rctl_rule *rule; 729 struct rctl_rule_link *link; 730 int64_t available, minavailable, allocated; 731 732 minavailable = INT64_MAX; 733 734 ASSERT_RACCT_ENABLED(); 735 RACCT_LOCK_ASSERT(); 736 737 /* 738 * There may be more than one matching rule; go through all of them. 739 * Denial should be done last, after logging and sending signals. 740 */ 741 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 742 rule = link->rrl_rule; 743 if (rule->rr_resource != resource) 744 continue; 745 if (rule->rr_action != RCTL_ACTION_DENY) 746 continue; 747 available = rctl_available_resource(p, rule); 748 if (available < minavailable) 749 minavailable = available; 750 } 751 752 /* 753 * XXX: Think about this _hard_. 754 */ 755 allocated = p->p_racct->r_resources[resource]; 756 if (minavailable < INT64_MAX - allocated) 757 minavailable += allocated; 758 if (minavailable < 0) 759 minavailable = 0; 760 761 return (minavailable); 762 } 763 764 static int 765 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 766 { 767 768 ASSERT_RACCT_ENABLED(); 769 770 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 771 if (rule->rr_subject_type != filter->rr_subject_type) 772 return (0); 773 774 switch (filter->rr_subject_type) { 775 case RCTL_SUBJECT_TYPE_PROCESS: 776 if (filter->rr_subject.rs_proc != NULL && 777 rule->rr_subject.rs_proc != 778 filter->rr_subject.rs_proc) 779 return (0); 780 break; 781 case RCTL_SUBJECT_TYPE_USER: 782 if (filter->rr_subject.rs_uip != NULL && 783 rule->rr_subject.rs_uip != 784 filter->rr_subject.rs_uip) 785 return (0); 786 break; 787 case RCTL_SUBJECT_TYPE_LOGINCLASS: 788 if (filter->rr_subject.rs_loginclass != NULL && 789 rule->rr_subject.rs_loginclass != 790 filter->rr_subject.rs_loginclass) 791 return (0); 792 break; 793 case RCTL_SUBJECT_TYPE_JAIL: 794 if (filter->rr_subject.rs_prison_racct != NULL && 795 rule->rr_subject.rs_prison_racct != 796 filter->rr_subject.rs_prison_racct) 797 return (0); 798 break; 799 default: 800 panic("rctl_rule_matches: unknown subject type %d", 801 filter->rr_subject_type); 802 } 803 } 804 805 if (filter->rr_resource != RACCT_UNDEFINED) { 806 if (rule->rr_resource != filter->rr_resource) 807 return (0); 808 } 809 810 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 811 if (rule->rr_action != filter->rr_action) 812 return (0); 813 } 814 815 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 816 if (rule->rr_amount != filter->rr_amount) 817 return (0); 818 } 819 820 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 821 if (rule->rr_per != filter->rr_per) 822 return (0); 823 } 824 825 return (1); 826 } 827 828 static int 829 str2value(const char *str, int *value, struct dict *table) 830 { 831 int i; 832 833 if (value == NULL) 834 return (EINVAL); 835 836 for (i = 0; table[i].d_name != NULL; i++) { 837 if (strcasecmp(table[i].d_name, str) == 0) { 838 *value = table[i].d_value; 839 return (0); 840 } 841 } 842 843 return (EINVAL); 844 } 845 846 static int 847 str2id(const char *str, id_t *value) 848 { 849 char *end; 850 851 if (str == NULL) 852 return (EINVAL); 853 854 *value = strtoul(str, &end, 10); 855 if ((size_t)(end - str) != strlen(str)) 856 return (EINVAL); 857 858 return (0); 859 } 860 861 static int 862 str2int64(const char *str, int64_t *value) 863 { 864 char *end; 865 866 if (str == NULL) 867 return (EINVAL); 868 869 *value = strtoul(str, &end, 10); 870 if ((size_t)(end - str) != strlen(str)) 871 return (EINVAL); 872 873 if (*value < 0) 874 return (ERANGE); 875 876 return (0); 877 } 878 879 /* 880 * Connect the rule to the racct, increasing refcount for the rule. 881 */ 882 static void 883 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 884 { 885 struct rctl_rule_link *link; 886 887 ASSERT_RACCT_ENABLED(); 888 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 889 890 rctl_rule_acquire(rule); 891 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 892 link->rrl_rule = rule; 893 link->rrl_exceeded = 0; 894 895 RACCT_LOCK(); 896 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 897 RACCT_UNLOCK(); 898 } 899 900 static int 901 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 902 { 903 struct rctl_rule_link *link; 904 905 ASSERT_RACCT_ENABLED(); 906 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 907 RACCT_LOCK_ASSERT(); 908 909 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 910 if (link == NULL) 911 return (ENOMEM); 912 rctl_rule_acquire(rule); 913 link->rrl_rule = rule; 914 link->rrl_exceeded = 0; 915 916 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 917 918 return (0); 919 } 920 921 /* 922 * Remove limits for a rules matching the filter and release 923 * the refcounts for the rules, possibly freeing them. Returns 924 * the number of limit structures removed. 925 */ 926 static int 927 rctl_racct_remove_rules(struct racct *racct, 928 const struct rctl_rule *filter) 929 { 930 struct rctl_rule_link *link, *linktmp; 931 int removed = 0; 932 933 ASSERT_RACCT_ENABLED(); 934 RACCT_LOCK_ASSERT(); 935 936 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 937 if (!rctl_rule_matches(link->rrl_rule, filter)) 938 continue; 939 940 LIST_REMOVE(link, rrl_next); 941 rctl_rule_release(link->rrl_rule); 942 uma_zfree(rctl_rule_link_zone, link); 943 removed++; 944 } 945 return (removed); 946 } 947 948 static void 949 rctl_rule_acquire_subject(struct rctl_rule *rule) 950 { 951 952 ASSERT_RACCT_ENABLED(); 953 954 switch (rule->rr_subject_type) { 955 case RCTL_SUBJECT_TYPE_UNDEFINED: 956 case RCTL_SUBJECT_TYPE_PROCESS: 957 break; 958 case RCTL_SUBJECT_TYPE_JAIL: 959 if (rule->rr_subject.rs_prison_racct != NULL) 960 prison_racct_hold(rule->rr_subject.rs_prison_racct); 961 break; 962 case RCTL_SUBJECT_TYPE_USER: 963 if (rule->rr_subject.rs_uip != NULL) 964 uihold(rule->rr_subject.rs_uip); 965 break; 966 case RCTL_SUBJECT_TYPE_LOGINCLASS: 967 if (rule->rr_subject.rs_loginclass != NULL) 968 loginclass_hold(rule->rr_subject.rs_loginclass); 969 break; 970 default: 971 panic("rctl_rule_acquire_subject: unknown subject type %d", 972 rule->rr_subject_type); 973 } 974 } 975 976 static void 977 rctl_rule_release_subject(struct rctl_rule *rule) 978 { 979 980 ASSERT_RACCT_ENABLED(); 981 982 switch (rule->rr_subject_type) { 983 case RCTL_SUBJECT_TYPE_UNDEFINED: 984 case RCTL_SUBJECT_TYPE_PROCESS: 985 break; 986 case RCTL_SUBJECT_TYPE_JAIL: 987 if (rule->rr_subject.rs_prison_racct != NULL) 988 prison_racct_free(rule->rr_subject.rs_prison_racct); 989 break; 990 case RCTL_SUBJECT_TYPE_USER: 991 if (rule->rr_subject.rs_uip != NULL) 992 uifree(rule->rr_subject.rs_uip); 993 break; 994 case RCTL_SUBJECT_TYPE_LOGINCLASS: 995 if (rule->rr_subject.rs_loginclass != NULL) 996 loginclass_free(rule->rr_subject.rs_loginclass); 997 break; 998 default: 999 panic("rctl_rule_release_subject: unknown subject type %d", 1000 rule->rr_subject_type); 1001 } 1002 } 1003 1004 struct rctl_rule * 1005 rctl_rule_alloc(int flags) 1006 { 1007 struct rctl_rule *rule; 1008 1009 ASSERT_RACCT_ENABLED(); 1010 1011 rule = uma_zalloc(rctl_rule_zone, flags); 1012 if (rule == NULL) 1013 return (NULL); 1014 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1015 rule->rr_subject.rs_proc = NULL; 1016 rule->rr_subject.rs_uip = NULL; 1017 rule->rr_subject.rs_loginclass = NULL; 1018 rule->rr_subject.rs_prison_racct = NULL; 1019 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1020 rule->rr_resource = RACCT_UNDEFINED; 1021 rule->rr_action = RCTL_ACTION_UNDEFINED; 1022 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1023 refcount_init(&rule->rr_refcount, 1); 1024 1025 return (rule); 1026 } 1027 1028 struct rctl_rule * 1029 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1030 { 1031 struct rctl_rule *copy; 1032 1033 ASSERT_RACCT_ENABLED(); 1034 1035 copy = uma_zalloc(rctl_rule_zone, flags); 1036 if (copy == NULL) 1037 return (NULL); 1038 copy->rr_subject_type = rule->rr_subject_type; 1039 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1040 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1041 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1042 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1043 copy->rr_per = rule->rr_per; 1044 copy->rr_resource = rule->rr_resource; 1045 copy->rr_action = rule->rr_action; 1046 copy->rr_amount = rule->rr_amount; 1047 refcount_init(©->rr_refcount, 1); 1048 rctl_rule_acquire_subject(copy); 1049 1050 return (copy); 1051 } 1052 1053 void 1054 rctl_rule_acquire(struct rctl_rule *rule) 1055 { 1056 1057 ASSERT_RACCT_ENABLED(); 1058 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1059 1060 refcount_acquire(&rule->rr_refcount); 1061 } 1062 1063 static void 1064 rctl_rule_free(void *context, int pending) 1065 { 1066 struct rctl_rule *rule; 1067 1068 rule = (struct rctl_rule *)context; 1069 1070 ASSERT_RACCT_ENABLED(); 1071 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1072 1073 /* 1074 * We don't need locking here; rule is guaranteed to be inaccessible. 1075 */ 1076 1077 rctl_rule_release_subject(rule); 1078 uma_zfree(rctl_rule_zone, rule); 1079 } 1080 1081 void 1082 rctl_rule_release(struct rctl_rule *rule) 1083 { 1084 1085 ASSERT_RACCT_ENABLED(); 1086 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1087 1088 if (refcount_release(&rule->rr_refcount)) { 1089 /* 1090 * rctl_rule_release() is often called when iterating 1091 * over all the uidinfo structures in the system, 1092 * holding uihashtbl_lock. Since rctl_rule_free() 1093 * might end up calling uifree(), this would lead 1094 * to lock recursion. Use taskqueue to avoid this. 1095 */ 1096 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1097 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1098 } 1099 } 1100 1101 static int 1102 rctl_rule_fully_specified(const struct rctl_rule *rule) 1103 { 1104 1105 ASSERT_RACCT_ENABLED(); 1106 1107 switch (rule->rr_subject_type) { 1108 case RCTL_SUBJECT_TYPE_UNDEFINED: 1109 return (0); 1110 case RCTL_SUBJECT_TYPE_PROCESS: 1111 if (rule->rr_subject.rs_proc == NULL) 1112 return (0); 1113 break; 1114 case RCTL_SUBJECT_TYPE_USER: 1115 if (rule->rr_subject.rs_uip == NULL) 1116 return (0); 1117 break; 1118 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1119 if (rule->rr_subject.rs_loginclass == NULL) 1120 return (0); 1121 break; 1122 case RCTL_SUBJECT_TYPE_JAIL: 1123 if (rule->rr_subject.rs_prison_racct == NULL) 1124 return (0); 1125 break; 1126 default: 1127 panic("rctl_rule_fully_specified: unknown subject type %d", 1128 rule->rr_subject_type); 1129 } 1130 if (rule->rr_resource == RACCT_UNDEFINED) 1131 return (0); 1132 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1133 return (0); 1134 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1135 return (0); 1136 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1137 return (0); 1138 1139 return (1); 1140 } 1141 1142 static int 1143 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1144 { 1145 struct rctl_rule *rule; 1146 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1147 *amountstr, *perstr; 1148 id_t id; 1149 int error = 0; 1150 1151 ASSERT_RACCT_ENABLED(); 1152 1153 rule = rctl_rule_alloc(M_WAITOK); 1154 1155 subjectstr = strsep(&rulestr, ":"); 1156 subject_idstr = strsep(&rulestr, ":"); 1157 resourcestr = strsep(&rulestr, ":"); 1158 actionstr = strsep(&rulestr, "=/"); 1159 amountstr = strsep(&rulestr, "/"); 1160 perstr = rulestr; 1161 1162 if (subjectstr == NULL || subjectstr[0] == '\0') 1163 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1164 else { 1165 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1166 if (error != 0) 1167 goto out; 1168 } 1169 1170 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1171 rule->rr_subject.rs_proc = NULL; 1172 rule->rr_subject.rs_uip = NULL; 1173 rule->rr_subject.rs_loginclass = NULL; 1174 rule->rr_subject.rs_prison_racct = NULL; 1175 } else { 1176 switch (rule->rr_subject_type) { 1177 case RCTL_SUBJECT_TYPE_UNDEFINED: 1178 error = EINVAL; 1179 goto out; 1180 case RCTL_SUBJECT_TYPE_PROCESS: 1181 error = str2id(subject_idstr, &id); 1182 if (error != 0) 1183 goto out; 1184 sx_assert(&allproc_lock, SA_LOCKED); 1185 rule->rr_subject.rs_proc = pfind(id); 1186 if (rule->rr_subject.rs_proc == NULL) { 1187 error = ESRCH; 1188 goto out; 1189 } 1190 PROC_UNLOCK(rule->rr_subject.rs_proc); 1191 break; 1192 case RCTL_SUBJECT_TYPE_USER: 1193 error = str2id(subject_idstr, &id); 1194 if (error != 0) 1195 goto out; 1196 rule->rr_subject.rs_uip = uifind(id); 1197 break; 1198 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1199 rule->rr_subject.rs_loginclass = 1200 loginclass_find(subject_idstr); 1201 if (rule->rr_subject.rs_loginclass == NULL) { 1202 error = ENAMETOOLONG; 1203 goto out; 1204 } 1205 break; 1206 case RCTL_SUBJECT_TYPE_JAIL: 1207 rule->rr_subject.rs_prison_racct = 1208 prison_racct_find(subject_idstr); 1209 if (rule->rr_subject.rs_prison_racct == NULL) { 1210 error = ENAMETOOLONG; 1211 goto out; 1212 } 1213 break; 1214 default: 1215 panic("rctl_string_to_rule: unknown subject type %d", 1216 rule->rr_subject_type); 1217 } 1218 } 1219 1220 if (resourcestr == NULL || resourcestr[0] == '\0') 1221 rule->rr_resource = RACCT_UNDEFINED; 1222 else { 1223 error = str2value(resourcestr, &rule->rr_resource, 1224 resourcenames); 1225 if (error != 0) 1226 goto out; 1227 } 1228 1229 if (actionstr == NULL || actionstr[0] == '\0') 1230 rule->rr_action = RCTL_ACTION_UNDEFINED; 1231 else { 1232 error = str2value(actionstr, &rule->rr_action, actionnames); 1233 if (error != 0) 1234 goto out; 1235 } 1236 1237 if (amountstr == NULL || amountstr[0] == '\0') 1238 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1239 else { 1240 error = str2int64(amountstr, &rule->rr_amount); 1241 if (error != 0) 1242 goto out; 1243 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1244 if (rule->rr_amount > INT64_MAX / 1000000) { 1245 error = ERANGE; 1246 goto out; 1247 } 1248 rule->rr_amount *= 1000000; 1249 } 1250 } 1251 1252 if (perstr == NULL || perstr[0] == '\0') 1253 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1254 else { 1255 error = str2value(perstr, &rule->rr_per, subjectnames); 1256 if (error != 0) 1257 goto out; 1258 } 1259 1260 out: 1261 if (error == 0) 1262 *rulep = rule; 1263 else 1264 rctl_rule_release(rule); 1265 1266 return (error); 1267 } 1268 1269 /* 1270 * Link a rule with all the subjects it applies to. 1271 */ 1272 int 1273 rctl_rule_add(struct rctl_rule *rule) 1274 { 1275 struct proc *p; 1276 struct ucred *cred; 1277 struct uidinfo *uip; 1278 struct prison *pr; 1279 struct prison_racct *prr; 1280 struct loginclass *lc; 1281 struct rctl_rule *rule2; 1282 int match; 1283 1284 ASSERT_RACCT_ENABLED(); 1285 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1286 1287 /* 1288 * Some rules just don't make sense, like "deny" rule for an undeniable 1289 * resource. The exception are the RSS and %CPU resources - they are 1290 * not deniable in the racct sense, but the limit is enforced in 1291 * a different way. 1292 */ 1293 if (rule->rr_action == RCTL_ACTION_DENY && 1294 !RACCT_IS_DENIABLE(rule->rr_resource) && 1295 rule->rr_resource != RACCT_RSS && 1296 rule->rr_resource != RACCT_PCTCPU) { 1297 return (EOPNOTSUPP); 1298 } 1299 1300 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1301 !RACCT_IS_DECAYING(rule->rr_resource)) { 1302 return (EOPNOTSUPP); 1303 } 1304 1305 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1306 rule->rr_resource == RACCT_PCTCPU) { 1307 return (EOPNOTSUPP); 1308 } 1309 1310 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1311 RACCT_IS_SLOPPY(rule->rr_resource)) { 1312 return (EOPNOTSUPP); 1313 } 1314 1315 /* 1316 * Make sure there are no duplicated rules. Also, for the "deny" 1317 * rules, remove ones differing only by "amount". 1318 */ 1319 if (rule->rr_action == RCTL_ACTION_DENY) { 1320 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1321 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1322 rctl_rule_remove(rule2); 1323 rctl_rule_release(rule2); 1324 } else 1325 rctl_rule_remove(rule); 1326 1327 switch (rule->rr_subject_type) { 1328 case RCTL_SUBJECT_TYPE_PROCESS: 1329 p = rule->rr_subject.rs_proc; 1330 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1331 1332 rctl_racct_add_rule(p->p_racct, rule); 1333 /* 1334 * In case of per-process rule, we don't have anything more 1335 * to do. 1336 */ 1337 return (0); 1338 1339 case RCTL_SUBJECT_TYPE_USER: 1340 uip = rule->rr_subject.rs_uip; 1341 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1342 rctl_racct_add_rule(uip->ui_racct, rule); 1343 break; 1344 1345 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1346 lc = rule->rr_subject.rs_loginclass; 1347 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1348 rctl_racct_add_rule(lc->lc_racct, rule); 1349 break; 1350 1351 case RCTL_SUBJECT_TYPE_JAIL: 1352 prr = rule->rr_subject.rs_prison_racct; 1353 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1354 rctl_racct_add_rule(prr->prr_racct, rule); 1355 break; 1356 1357 default: 1358 panic("rctl_rule_add: unknown subject type %d", 1359 rule->rr_subject_type); 1360 } 1361 1362 /* 1363 * Now go through all the processes and add the new rule to the ones 1364 * it applies to. 1365 */ 1366 sx_assert(&allproc_lock, SA_LOCKED); 1367 FOREACH_PROC_IN_SYSTEM(p) { 1368 cred = p->p_ucred; 1369 switch (rule->rr_subject_type) { 1370 case RCTL_SUBJECT_TYPE_USER: 1371 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1372 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1373 break; 1374 continue; 1375 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1376 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1377 break; 1378 continue; 1379 case RCTL_SUBJECT_TYPE_JAIL: 1380 match = 0; 1381 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1382 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1383 match = 1; 1384 break; 1385 } 1386 } 1387 if (match) 1388 break; 1389 continue; 1390 default: 1391 panic("rctl_rule_add: unknown subject type %d", 1392 rule->rr_subject_type); 1393 } 1394 1395 rctl_racct_add_rule(p->p_racct, rule); 1396 } 1397 1398 return (0); 1399 } 1400 1401 static void 1402 rctl_rule_pre_callback(void) 1403 { 1404 1405 RACCT_LOCK(); 1406 } 1407 1408 static void 1409 rctl_rule_post_callback(void) 1410 { 1411 1412 RACCT_UNLOCK(); 1413 } 1414 1415 static void 1416 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1417 { 1418 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1419 int found = 0; 1420 1421 ASSERT_RACCT_ENABLED(); 1422 RACCT_LOCK_ASSERT(); 1423 1424 found += rctl_racct_remove_rules(racct, filter); 1425 1426 *((int *)arg3) += found; 1427 } 1428 1429 /* 1430 * Remove all rules that match the filter. 1431 */ 1432 int 1433 rctl_rule_remove(struct rctl_rule *filter) 1434 { 1435 struct proc *p; 1436 int found = 0; 1437 1438 ASSERT_RACCT_ENABLED(); 1439 1440 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1441 filter->rr_subject.rs_proc != NULL) { 1442 p = filter->rr_subject.rs_proc; 1443 RACCT_LOCK(); 1444 found = rctl_racct_remove_rules(p->p_racct, filter); 1445 RACCT_UNLOCK(); 1446 if (found) 1447 return (0); 1448 return (ESRCH); 1449 } 1450 1451 loginclass_racct_foreach(rctl_rule_remove_callback, 1452 rctl_rule_pre_callback, rctl_rule_post_callback, 1453 filter, (void *)&found); 1454 ui_racct_foreach(rctl_rule_remove_callback, 1455 rctl_rule_pre_callback, rctl_rule_post_callback, 1456 filter, (void *)&found); 1457 prison_racct_foreach(rctl_rule_remove_callback, 1458 rctl_rule_pre_callback, rctl_rule_post_callback, 1459 filter, (void *)&found); 1460 1461 sx_assert(&allproc_lock, SA_LOCKED); 1462 RACCT_LOCK(); 1463 FOREACH_PROC_IN_SYSTEM(p) { 1464 found += rctl_racct_remove_rules(p->p_racct, filter); 1465 } 1466 RACCT_UNLOCK(); 1467 1468 if (found) 1469 return (0); 1470 return (ESRCH); 1471 } 1472 1473 /* 1474 * Appends a rule to the sbuf. 1475 */ 1476 static void 1477 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1478 { 1479 int64_t amount; 1480 1481 ASSERT_RACCT_ENABLED(); 1482 1483 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1484 1485 switch (rule->rr_subject_type) { 1486 case RCTL_SUBJECT_TYPE_PROCESS: 1487 if (rule->rr_subject.rs_proc == NULL) 1488 sbuf_printf(sb, ":"); 1489 else 1490 sbuf_printf(sb, "%d:", 1491 rule->rr_subject.rs_proc->p_pid); 1492 break; 1493 case RCTL_SUBJECT_TYPE_USER: 1494 if (rule->rr_subject.rs_uip == NULL) 1495 sbuf_printf(sb, ":"); 1496 else 1497 sbuf_printf(sb, "%d:", 1498 rule->rr_subject.rs_uip->ui_uid); 1499 break; 1500 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1501 if (rule->rr_subject.rs_loginclass == NULL) 1502 sbuf_printf(sb, ":"); 1503 else 1504 sbuf_printf(sb, "%s:", 1505 rule->rr_subject.rs_loginclass->lc_name); 1506 break; 1507 case RCTL_SUBJECT_TYPE_JAIL: 1508 if (rule->rr_subject.rs_prison_racct == NULL) 1509 sbuf_printf(sb, ":"); 1510 else 1511 sbuf_printf(sb, "%s:", 1512 rule->rr_subject.rs_prison_racct->prr_name); 1513 break; 1514 default: 1515 panic("rctl_rule_to_sbuf: unknown subject type %d", 1516 rule->rr_subject_type); 1517 } 1518 1519 amount = rule->rr_amount; 1520 if (amount != RCTL_AMOUNT_UNDEFINED && 1521 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1522 amount /= 1000000; 1523 1524 sbuf_printf(sb, "%s:%s=%jd", 1525 rctl_resource_name(rule->rr_resource), 1526 rctl_action_name(rule->rr_action), 1527 amount); 1528 1529 if (rule->rr_per != rule->rr_subject_type) 1530 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1531 } 1532 1533 /* 1534 * Routine used by RCTL syscalls to read in input string. 1535 */ 1536 static int 1537 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1538 { 1539 char *str; 1540 int error; 1541 1542 ASSERT_RACCT_ENABLED(); 1543 1544 if (inbuflen <= 0) 1545 return (EINVAL); 1546 if (inbuflen > RCTL_MAX_INBUFSIZE) 1547 return (E2BIG); 1548 1549 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1550 error = copyinstr(inbufp, str, inbuflen, NULL); 1551 if (error != 0) { 1552 free(str, M_RCTL); 1553 return (error); 1554 } 1555 1556 *inputstr = str; 1557 1558 return (0); 1559 } 1560 1561 /* 1562 * Routine used by RCTL syscalls to write out output string. 1563 */ 1564 static int 1565 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1566 { 1567 int error; 1568 1569 ASSERT_RACCT_ENABLED(); 1570 1571 if (outputsbuf == NULL) 1572 return (0); 1573 1574 sbuf_finish(outputsbuf); 1575 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1576 sbuf_delete(outputsbuf); 1577 return (ERANGE); 1578 } 1579 error = copyout(sbuf_data(outputsbuf), outbufp, 1580 sbuf_len(outputsbuf) + 1); 1581 sbuf_delete(outputsbuf); 1582 return (error); 1583 } 1584 1585 static struct sbuf * 1586 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1587 { 1588 struct sbuf *sb; 1589 int64_t amount; 1590 int i; 1591 1592 ASSERT_RACCT_ENABLED(); 1593 1594 sb = sbuf_new_auto(); 1595 for (i = 0; i <= RACCT_MAX; i++) { 1596 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1597 continue; 1598 RACCT_LOCK(); 1599 amount = racct->r_resources[i]; 1600 RACCT_UNLOCK(); 1601 if (RACCT_IS_IN_MILLIONS(i)) 1602 amount /= 1000000; 1603 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1604 } 1605 sbuf_setpos(sb, sbuf_len(sb) - 1); 1606 return (sb); 1607 } 1608 1609 int 1610 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1611 { 1612 struct rctl_rule *filter; 1613 struct sbuf *outputsbuf = NULL; 1614 struct proc *p; 1615 struct uidinfo *uip; 1616 struct loginclass *lc; 1617 struct prison_racct *prr; 1618 char *inputstr; 1619 int error; 1620 1621 if (!racct_enable) 1622 return (ENOSYS); 1623 1624 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1625 if (error != 0) 1626 return (error); 1627 1628 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1629 if (error != 0) 1630 return (error); 1631 1632 sx_slock(&allproc_lock); 1633 error = rctl_string_to_rule(inputstr, &filter); 1634 free(inputstr, M_RCTL); 1635 if (error != 0) { 1636 sx_sunlock(&allproc_lock); 1637 return (error); 1638 } 1639 1640 switch (filter->rr_subject_type) { 1641 case RCTL_SUBJECT_TYPE_PROCESS: 1642 p = filter->rr_subject.rs_proc; 1643 if (p == NULL) { 1644 error = EINVAL; 1645 goto out; 1646 } 1647 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1648 break; 1649 case RCTL_SUBJECT_TYPE_USER: 1650 uip = filter->rr_subject.rs_uip; 1651 if (uip == NULL) { 1652 error = EINVAL; 1653 goto out; 1654 } 1655 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1656 break; 1657 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1658 lc = filter->rr_subject.rs_loginclass; 1659 if (lc == NULL) { 1660 error = EINVAL; 1661 goto out; 1662 } 1663 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1664 break; 1665 case RCTL_SUBJECT_TYPE_JAIL: 1666 prr = filter->rr_subject.rs_prison_racct; 1667 if (prr == NULL) { 1668 error = EINVAL; 1669 goto out; 1670 } 1671 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1672 break; 1673 default: 1674 error = EINVAL; 1675 } 1676 out: 1677 rctl_rule_release(filter); 1678 sx_sunlock(&allproc_lock); 1679 if (error != 0) 1680 return (error); 1681 1682 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1683 1684 return (error); 1685 } 1686 1687 static void 1688 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1689 { 1690 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1691 struct rctl_rule_link *link; 1692 struct sbuf *sb = (struct sbuf *)arg3; 1693 1694 ASSERT_RACCT_ENABLED(); 1695 RACCT_LOCK_ASSERT(); 1696 1697 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1698 if (!rctl_rule_matches(link->rrl_rule, filter)) 1699 continue; 1700 rctl_rule_to_sbuf(sb, link->rrl_rule); 1701 sbuf_printf(sb, ","); 1702 } 1703 } 1704 1705 int 1706 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1707 { 1708 struct sbuf *sb; 1709 struct rctl_rule *filter; 1710 struct rctl_rule_link *link; 1711 struct proc *p; 1712 char *inputstr, *buf; 1713 size_t bufsize; 1714 int error; 1715 1716 if (!racct_enable) 1717 return (ENOSYS); 1718 1719 error = priv_check(td, PRIV_RCTL_GET_RULES); 1720 if (error != 0) 1721 return (error); 1722 1723 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1724 if (error != 0) 1725 return (error); 1726 1727 sx_slock(&allproc_lock); 1728 error = rctl_string_to_rule(inputstr, &filter); 1729 free(inputstr, M_RCTL); 1730 if (error != 0) { 1731 sx_sunlock(&allproc_lock); 1732 return (error); 1733 } 1734 1735 bufsize = uap->outbuflen; 1736 if (bufsize > rctl_maxbufsize) { 1737 sx_sunlock(&allproc_lock); 1738 return (E2BIG); 1739 } 1740 1741 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1742 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1743 KASSERT(sb != NULL, ("sbuf_new failed")); 1744 1745 FOREACH_PROC_IN_SYSTEM(p) { 1746 RACCT_LOCK(); 1747 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1748 /* 1749 * Non-process rules will be added to the buffer later. 1750 * Adding them here would result in duplicated output. 1751 */ 1752 if (link->rrl_rule->rr_subject_type != 1753 RCTL_SUBJECT_TYPE_PROCESS) 1754 continue; 1755 if (!rctl_rule_matches(link->rrl_rule, filter)) 1756 continue; 1757 rctl_rule_to_sbuf(sb, link->rrl_rule); 1758 sbuf_printf(sb, ","); 1759 } 1760 RACCT_UNLOCK(); 1761 } 1762 1763 loginclass_racct_foreach(rctl_get_rules_callback, 1764 rctl_rule_pre_callback, rctl_rule_post_callback, 1765 filter, sb); 1766 ui_racct_foreach(rctl_get_rules_callback, 1767 rctl_rule_pre_callback, rctl_rule_post_callback, 1768 filter, sb); 1769 prison_racct_foreach(rctl_get_rules_callback, 1770 rctl_rule_pre_callback, rctl_rule_post_callback, 1771 filter, sb); 1772 if (sbuf_error(sb) == ENOMEM) { 1773 error = ERANGE; 1774 goto out; 1775 } 1776 1777 /* 1778 * Remove trailing ",". 1779 */ 1780 if (sbuf_len(sb) > 0) 1781 sbuf_setpos(sb, sbuf_len(sb) - 1); 1782 1783 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1784 out: 1785 rctl_rule_release(filter); 1786 sx_sunlock(&allproc_lock); 1787 free(buf, M_RCTL); 1788 return (error); 1789 } 1790 1791 int 1792 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1793 { 1794 struct sbuf *sb; 1795 struct rctl_rule *filter; 1796 struct rctl_rule_link *link; 1797 char *inputstr, *buf; 1798 size_t bufsize; 1799 int error; 1800 1801 if (!racct_enable) 1802 return (ENOSYS); 1803 1804 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1805 if (error != 0) 1806 return (error); 1807 1808 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1809 if (error != 0) 1810 return (error); 1811 1812 sx_slock(&allproc_lock); 1813 error = rctl_string_to_rule(inputstr, &filter); 1814 free(inputstr, M_RCTL); 1815 if (error != 0) { 1816 sx_sunlock(&allproc_lock); 1817 return (error); 1818 } 1819 1820 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1821 rctl_rule_release(filter); 1822 sx_sunlock(&allproc_lock); 1823 return (EINVAL); 1824 } 1825 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1826 rctl_rule_release(filter); 1827 sx_sunlock(&allproc_lock); 1828 return (EOPNOTSUPP); 1829 } 1830 if (filter->rr_subject.rs_proc == NULL) { 1831 rctl_rule_release(filter); 1832 sx_sunlock(&allproc_lock); 1833 return (EINVAL); 1834 } 1835 1836 bufsize = uap->outbuflen; 1837 if (bufsize > rctl_maxbufsize) { 1838 rctl_rule_release(filter); 1839 sx_sunlock(&allproc_lock); 1840 return (E2BIG); 1841 } 1842 1843 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1844 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1845 KASSERT(sb != NULL, ("sbuf_new failed")); 1846 1847 RACCT_LOCK(); 1848 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1849 rrl_next) { 1850 rctl_rule_to_sbuf(sb, link->rrl_rule); 1851 sbuf_printf(sb, ","); 1852 } 1853 RACCT_UNLOCK(); 1854 if (sbuf_error(sb) == ENOMEM) { 1855 error = ERANGE; 1856 sbuf_delete(sb); 1857 goto out; 1858 } 1859 1860 /* 1861 * Remove trailing ",". 1862 */ 1863 if (sbuf_len(sb) > 0) 1864 sbuf_setpos(sb, sbuf_len(sb) - 1); 1865 1866 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1867 out: 1868 rctl_rule_release(filter); 1869 sx_sunlock(&allproc_lock); 1870 free(buf, M_RCTL); 1871 return (error); 1872 } 1873 1874 int 1875 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1876 { 1877 struct rctl_rule *rule; 1878 char *inputstr; 1879 int error; 1880 1881 if (!racct_enable) 1882 return (ENOSYS); 1883 1884 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1885 if (error != 0) 1886 return (error); 1887 1888 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1889 if (error != 0) 1890 return (error); 1891 1892 sx_slock(&allproc_lock); 1893 error = rctl_string_to_rule(inputstr, &rule); 1894 free(inputstr, M_RCTL); 1895 if (error != 0) { 1896 sx_sunlock(&allproc_lock); 1897 return (error); 1898 } 1899 /* 1900 * The 'per' part of a rule is optional. 1901 */ 1902 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1903 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1904 rule->rr_per = rule->rr_subject_type; 1905 1906 if (!rctl_rule_fully_specified(rule)) { 1907 error = EINVAL; 1908 goto out; 1909 } 1910 1911 error = rctl_rule_add(rule); 1912 1913 out: 1914 rctl_rule_release(rule); 1915 sx_sunlock(&allproc_lock); 1916 return (error); 1917 } 1918 1919 int 1920 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1921 { 1922 struct rctl_rule *filter; 1923 char *inputstr; 1924 int error; 1925 1926 if (!racct_enable) 1927 return (ENOSYS); 1928 1929 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1930 if (error != 0) 1931 return (error); 1932 1933 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1934 if (error != 0) 1935 return (error); 1936 1937 sx_slock(&allproc_lock); 1938 error = rctl_string_to_rule(inputstr, &filter); 1939 free(inputstr, M_RCTL); 1940 if (error != 0) { 1941 sx_sunlock(&allproc_lock); 1942 return (error); 1943 } 1944 1945 error = rctl_rule_remove(filter); 1946 rctl_rule_release(filter); 1947 sx_sunlock(&allproc_lock); 1948 1949 return (error); 1950 } 1951 1952 /* 1953 * Update RCTL rule list after credential change. 1954 */ 1955 void 1956 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1957 { 1958 LIST_HEAD(, rctl_rule_link) newrules; 1959 struct rctl_rule_link *link, *newlink; 1960 struct uidinfo *newuip; 1961 struct loginclass *newlc; 1962 struct prison_racct *newprr; 1963 int rulecnt, i; 1964 1965 if (!racct_enable) 1966 return; 1967 1968 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1969 1970 newuip = newcred->cr_ruidinfo; 1971 newlc = newcred->cr_loginclass; 1972 newprr = newcred->cr_prison->pr_prison_racct; 1973 1974 LIST_INIT(&newrules); 1975 1976 again: 1977 /* 1978 * First, count the rules that apply to the process with new 1979 * credentials. 1980 */ 1981 rulecnt = 0; 1982 RACCT_LOCK(); 1983 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1984 if (link->rrl_rule->rr_subject_type == 1985 RCTL_SUBJECT_TYPE_PROCESS) 1986 rulecnt++; 1987 } 1988 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 1989 rulecnt++; 1990 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 1991 rulecnt++; 1992 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 1993 rulecnt++; 1994 RACCT_UNLOCK(); 1995 1996 /* 1997 * Create temporary list. We've dropped the rctl_lock in order 1998 * to use M_WAITOK. 1999 */ 2000 for (i = 0; i < rulecnt; i++) { 2001 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 2002 newlink->rrl_rule = NULL; 2003 newlink->rrl_exceeded = 0; 2004 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 2005 } 2006 2007 newlink = LIST_FIRST(&newrules); 2008 2009 /* 2010 * Assign rules to the newly allocated list entries. 2011 */ 2012 RACCT_LOCK(); 2013 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2014 if (link->rrl_rule->rr_subject_type == 2015 RCTL_SUBJECT_TYPE_PROCESS) { 2016 if (newlink == NULL) 2017 goto goaround; 2018 rctl_rule_acquire(link->rrl_rule); 2019 newlink->rrl_rule = link->rrl_rule; 2020 newlink->rrl_exceeded = link->rrl_exceeded; 2021 newlink = LIST_NEXT(newlink, rrl_next); 2022 rulecnt--; 2023 } 2024 } 2025 2026 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2027 if (newlink == NULL) 2028 goto goaround; 2029 rctl_rule_acquire(link->rrl_rule); 2030 newlink->rrl_rule = link->rrl_rule; 2031 newlink->rrl_exceeded = link->rrl_exceeded; 2032 newlink = LIST_NEXT(newlink, rrl_next); 2033 rulecnt--; 2034 } 2035 2036 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2037 if (newlink == NULL) 2038 goto goaround; 2039 rctl_rule_acquire(link->rrl_rule); 2040 newlink->rrl_rule = link->rrl_rule; 2041 newlink->rrl_exceeded = link->rrl_exceeded; 2042 newlink = LIST_NEXT(newlink, rrl_next); 2043 rulecnt--; 2044 } 2045 2046 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2047 if (newlink == NULL) 2048 goto goaround; 2049 rctl_rule_acquire(link->rrl_rule); 2050 newlink->rrl_rule = link->rrl_rule; 2051 newlink->rrl_exceeded = link->rrl_exceeded; 2052 newlink = LIST_NEXT(newlink, rrl_next); 2053 rulecnt--; 2054 } 2055 2056 if (rulecnt == 0) { 2057 /* 2058 * Free the old rule list. 2059 */ 2060 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2061 link = LIST_FIRST(&p->p_racct->r_rule_links); 2062 LIST_REMOVE(link, rrl_next); 2063 rctl_rule_release(link->rrl_rule); 2064 uma_zfree(rctl_rule_link_zone, link); 2065 } 2066 2067 /* 2068 * Replace lists and we're done. 2069 * 2070 * XXX: Is there any way to switch list heads instead 2071 * of iterating here? 2072 */ 2073 while (!LIST_EMPTY(&newrules)) { 2074 newlink = LIST_FIRST(&newrules); 2075 LIST_REMOVE(newlink, rrl_next); 2076 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2077 newlink, rrl_next); 2078 } 2079 2080 RACCT_UNLOCK(); 2081 2082 return; 2083 } 2084 2085 goaround: 2086 RACCT_UNLOCK(); 2087 2088 /* 2089 * Rule list changed while we were not holding the rctl_lock. 2090 * Free the new list and try again. 2091 */ 2092 while (!LIST_EMPTY(&newrules)) { 2093 newlink = LIST_FIRST(&newrules); 2094 LIST_REMOVE(newlink, rrl_next); 2095 if (newlink->rrl_rule != NULL) 2096 rctl_rule_release(newlink->rrl_rule); 2097 uma_zfree(rctl_rule_link_zone, newlink); 2098 } 2099 2100 goto again; 2101 } 2102 2103 /* 2104 * Assign RCTL rules to the newly created process. 2105 */ 2106 int 2107 rctl_proc_fork(struct proc *parent, struct proc *child) 2108 { 2109 struct rctl_rule *rule; 2110 struct rctl_rule_link *link; 2111 int error; 2112 2113 ASSERT_RACCT_ENABLED(); 2114 RACCT_LOCK_ASSERT(); 2115 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2116 2117 LIST_INIT(&child->p_racct->r_rule_links); 2118 2119 /* 2120 * Go through limits applicable to the parent and assign them 2121 * to the child. Rules with 'process' subject have to be duplicated 2122 * in order to make their rr_subject point to the new process. 2123 */ 2124 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2125 if (link->rrl_rule->rr_subject_type == 2126 RCTL_SUBJECT_TYPE_PROCESS) { 2127 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2128 if (rule == NULL) 2129 goto fail; 2130 KASSERT(rule->rr_subject.rs_proc == parent, 2131 ("rule->rr_subject.rs_proc != parent")); 2132 rule->rr_subject.rs_proc = child; 2133 error = rctl_racct_add_rule_locked(child->p_racct, 2134 rule); 2135 rctl_rule_release(rule); 2136 if (error != 0) 2137 goto fail; 2138 } else { 2139 error = rctl_racct_add_rule_locked(child->p_racct, 2140 link->rrl_rule); 2141 if (error != 0) 2142 goto fail; 2143 } 2144 } 2145 2146 return (0); 2147 2148 fail: 2149 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2150 link = LIST_FIRST(&child->p_racct->r_rule_links); 2151 LIST_REMOVE(link, rrl_next); 2152 rctl_rule_release(link->rrl_rule); 2153 uma_zfree(rctl_rule_link_zone, link); 2154 } 2155 2156 return (EAGAIN); 2157 } 2158 2159 /* 2160 * Release rules attached to the racct. 2161 */ 2162 void 2163 rctl_racct_release(struct racct *racct) 2164 { 2165 struct rctl_rule_link *link; 2166 2167 ASSERT_RACCT_ENABLED(); 2168 RACCT_LOCK_ASSERT(); 2169 2170 while (!LIST_EMPTY(&racct->r_rule_links)) { 2171 link = LIST_FIRST(&racct->r_rule_links); 2172 LIST_REMOVE(link, rrl_next); 2173 rctl_rule_release(link->rrl_rule); 2174 uma_zfree(rctl_rule_link_zone, link); 2175 } 2176 } 2177 2178 static void 2179 rctl_init(void) 2180 { 2181 2182 if (!racct_enable) 2183 return; 2184 2185 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2186 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2187 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2188 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2189 UMA_ALIGN_PTR, 0); 2190 2191 /* 2192 * Set default values, making sure not to overwrite the ones 2193 * fetched from tunables. Most of those could be set at the 2194 * declaration, except for the rctl_throttle_max - we cannot 2195 * set it there due to hz not being compile time constant. 2196 */ 2197 if (rctl_throttle_min < 1) 2198 rctl_throttle_min = 1; 2199 if (rctl_throttle_max < rctl_throttle_min) 2200 rctl_throttle_max = 2 * hz; 2201 if (rctl_throttle_pct < 0) 2202 rctl_throttle_pct = 100; 2203 if (rctl_throttle_pct2 < 0) 2204 rctl_throttle_pct2 = 100; 2205 } 2206 2207 #else /* !RCTL */ 2208 2209 int 2210 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2211 { 2212 2213 return (ENOSYS); 2214 } 2215 2216 int 2217 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2218 { 2219 2220 return (ENOSYS); 2221 } 2222 2223 int 2224 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2225 { 2226 2227 return (ENOSYS); 2228 } 2229 2230 int 2231 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2232 { 2233 2234 return (ENOSYS); 2235 } 2236 2237 int 2238 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2239 { 2240 2241 return (ENOSYS); 2242 } 2243 2244 #endif /* !RCTL */ 2245