1 /*- 2 * Copyright (c) 2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Edward Tomasz Napierala under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/malloc.h> 38 #include <sys/queue.h> 39 #include <sys/refcount.h> 40 #include <sys/jail.h> 41 #include <sys/kernel.h> 42 #include <sys/limits.h> 43 #include <sys/loginclass.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/racct.h> 47 #include <sys/rctl.h> 48 #include <sys/resourcevar.h> 49 #include <sys/sx.h> 50 #include <sys/sysent.h> 51 #include <sys/sysproto.h> 52 #include <sys/systm.h> 53 #include <sys/types.h> 54 #include <sys/eventhandler.h> 55 #include <sys/lock.h> 56 #include <sys/mutex.h> 57 #include <sys/rwlock.h> 58 #include <sys/sbuf.h> 59 #include <sys/taskqueue.h> 60 #include <sys/tree.h> 61 #include <vm/uma.h> 62 63 #ifdef RCTL 64 #ifndef RACCT 65 #error "The RCTL option requires the RACCT option" 66 #endif 67 68 FEATURE(rctl, "Resource Limits"); 69 70 #define HRF_DEFAULT 0 71 #define HRF_DONT_INHERIT 1 72 #define HRF_DONT_ACCUMULATE 2 73 74 #define RCTL_MAX_INBUFSIZE 4 * 1024 75 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 76 #define RCTL_LOG_BUFSIZE 128 77 78 #define RCTL_PCPU_SHIFT (10 * 1000000) 79 80 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 81 static int rctl_log_rate_limit = 10; 82 static int rctl_devctl_rate_limit = 10; 83 84 /* 85 * Values below are initialized in rctl_init(). 86 */ 87 static int rctl_throttle_min = -1; 88 static int rctl_throttle_max = -1; 89 static int rctl_throttle_pct = -1; 90 static int rctl_throttle_pct2 = -1; 91 92 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 93 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 94 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 95 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 96 97 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits"); 98 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 99 &rctl_maxbufsize, 0, "Maximum output buffer size"); 100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 101 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 103 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 104 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 105 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU", 106 "Shortest throttling duration, in hz"); 107 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 108 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 109 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU", 110 "Longest throttling duration, in hz"); 111 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 112 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 113 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU", 114 "Throttling penalty for process consumption, in percent"); 115 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 117 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU", 118 "Throttling penalty for container consumption, in percent"); 119 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 120 121 /* 122 * 'rctl_rule_link' connects a rule with every racct it's related to. 123 * For example, rule 'user:X:openfiles:deny=N/process' is linked 124 * with uidinfo for user X, and to each process of that user. 125 */ 126 struct rctl_rule_link { 127 LIST_ENTRY(rctl_rule_link) rrl_next; 128 struct rctl_rule *rrl_rule; 129 int rrl_exceeded; 130 }; 131 132 struct dict { 133 const char *d_name; 134 int d_value; 135 }; 136 137 static struct dict subjectnames[] = { 138 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 139 { "user", RCTL_SUBJECT_TYPE_USER }, 140 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 141 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 142 { NULL, -1 }}; 143 144 static struct dict resourcenames[] = { 145 { "cputime", RACCT_CPU }, 146 { "datasize", RACCT_DATA }, 147 { "stacksize", RACCT_STACK }, 148 { "coredumpsize", RACCT_CORE }, 149 { "memoryuse", RACCT_RSS }, 150 { "memorylocked", RACCT_MEMLOCK }, 151 { "maxproc", RACCT_NPROC }, 152 { "openfiles", RACCT_NOFILE }, 153 { "vmemoryuse", RACCT_VMEM }, 154 { "pseudoterminals", RACCT_NPTS }, 155 { "swapuse", RACCT_SWAP }, 156 { "nthr", RACCT_NTHR }, 157 { "msgqqueued", RACCT_MSGQQUEUED }, 158 { "msgqsize", RACCT_MSGQSIZE }, 159 { "nmsgq", RACCT_NMSGQ }, 160 { "nsem", RACCT_NSEM }, 161 { "nsemop", RACCT_NSEMOP }, 162 { "nshm", RACCT_NSHM }, 163 { "shmsize", RACCT_SHMSIZE }, 164 { "wallclock", RACCT_WALLCLOCK }, 165 { "pcpu", RACCT_PCTCPU }, 166 { "readbps", RACCT_READBPS }, 167 { "writebps", RACCT_WRITEBPS }, 168 { "readiops", RACCT_READIOPS }, 169 { "writeiops", RACCT_WRITEIOPS }, 170 { NULL, -1 }}; 171 172 static struct dict actionnames[] = { 173 { "sighup", RCTL_ACTION_SIGHUP }, 174 { "sigint", RCTL_ACTION_SIGINT }, 175 { "sigquit", RCTL_ACTION_SIGQUIT }, 176 { "sigill", RCTL_ACTION_SIGILL }, 177 { "sigtrap", RCTL_ACTION_SIGTRAP }, 178 { "sigabrt", RCTL_ACTION_SIGABRT }, 179 { "sigemt", RCTL_ACTION_SIGEMT }, 180 { "sigfpe", RCTL_ACTION_SIGFPE }, 181 { "sigkill", RCTL_ACTION_SIGKILL }, 182 { "sigbus", RCTL_ACTION_SIGBUS }, 183 { "sigsegv", RCTL_ACTION_SIGSEGV }, 184 { "sigsys", RCTL_ACTION_SIGSYS }, 185 { "sigpipe", RCTL_ACTION_SIGPIPE }, 186 { "sigalrm", RCTL_ACTION_SIGALRM }, 187 { "sigterm", RCTL_ACTION_SIGTERM }, 188 { "sigurg", RCTL_ACTION_SIGURG }, 189 { "sigstop", RCTL_ACTION_SIGSTOP }, 190 { "sigtstp", RCTL_ACTION_SIGTSTP }, 191 { "sigchld", RCTL_ACTION_SIGCHLD }, 192 { "sigttin", RCTL_ACTION_SIGTTIN }, 193 { "sigttou", RCTL_ACTION_SIGTTOU }, 194 { "sigio", RCTL_ACTION_SIGIO }, 195 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 196 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 197 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 198 { "sigprof", RCTL_ACTION_SIGPROF }, 199 { "sigwinch", RCTL_ACTION_SIGWINCH }, 200 { "siginfo", RCTL_ACTION_SIGINFO }, 201 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 202 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 203 { "sigthr", RCTL_ACTION_SIGTHR }, 204 { "deny", RCTL_ACTION_DENY }, 205 { "log", RCTL_ACTION_LOG }, 206 { "devctl", RCTL_ACTION_DEVCTL }, 207 { "throttle", RCTL_ACTION_THROTTLE }, 208 { NULL, -1 }}; 209 210 static void rctl_init(void); 211 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 212 213 static uma_zone_t rctl_rule_zone; 214 static uma_zone_t rctl_rule_link_zone; 215 static struct rwlock rctl_lock; 216 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock"); 217 218 #define RCTL_RLOCK() rw_rlock(&rctl_lock) 219 #define RCTL_RUNLOCK() rw_runlock(&rctl_lock) 220 #define RCTL_WLOCK() rw_wlock(&rctl_lock) 221 #define RCTL_WUNLOCK() rw_wunlock(&rctl_lock) 222 #define RCTL_LOCK_ASSERT() rw_assert(&rctl_lock, RA_LOCKED) 223 #define RCTL_WLOCK_ASSERT() rw_assert(&rctl_lock, RA_WLOCKED) 224 225 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 226 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 227 228 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 229 230 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 231 { 232 int error, val = rctl_throttle_min; 233 234 error = sysctl_handle_int(oidp, &val, 0, req); 235 if (error || !req->newptr) 236 return (error); 237 if (val < 1 || val > rctl_throttle_max) 238 return (EINVAL); 239 240 RCTL_WLOCK(); 241 rctl_throttle_min = val; 242 RCTL_WUNLOCK(); 243 244 return (0); 245 } 246 247 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 248 { 249 int error, val = rctl_throttle_max; 250 251 error = sysctl_handle_int(oidp, &val, 0, req); 252 if (error || !req->newptr) 253 return (error); 254 if (val < rctl_throttle_min) 255 return (EINVAL); 256 257 RCTL_WLOCK(); 258 rctl_throttle_max = val; 259 RCTL_WUNLOCK(); 260 261 return (0); 262 } 263 264 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 265 { 266 int error, val = rctl_throttle_pct; 267 268 error = sysctl_handle_int(oidp, &val, 0, req); 269 if (error || !req->newptr) 270 return (error); 271 if (val < 0) 272 return (EINVAL); 273 274 RCTL_WLOCK(); 275 rctl_throttle_pct = val; 276 RCTL_WUNLOCK(); 277 278 return (0); 279 } 280 281 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 282 { 283 int error, val = rctl_throttle_pct2; 284 285 error = sysctl_handle_int(oidp, &val, 0, req); 286 if (error || !req->newptr) 287 return (error); 288 if (val < 0) 289 return (EINVAL); 290 291 RCTL_WLOCK(); 292 rctl_throttle_pct2 = val; 293 RCTL_WUNLOCK(); 294 295 return (0); 296 } 297 298 static const char * 299 rctl_subject_type_name(int subject) 300 { 301 int i; 302 303 for (i = 0; subjectnames[i].d_name != NULL; i++) { 304 if (subjectnames[i].d_value == subject) 305 return (subjectnames[i].d_name); 306 } 307 308 panic("rctl_subject_type_name: unknown subject type %d", subject); 309 } 310 311 static const char * 312 rctl_action_name(int action) 313 { 314 int i; 315 316 for (i = 0; actionnames[i].d_name != NULL; i++) { 317 if (actionnames[i].d_value == action) 318 return (actionnames[i].d_name); 319 } 320 321 panic("rctl_action_name: unknown action %d", action); 322 } 323 324 const char * 325 rctl_resource_name(int resource) 326 { 327 int i; 328 329 for (i = 0; resourcenames[i].d_name != NULL; i++) { 330 if (resourcenames[i].d_value == resource) 331 return (resourcenames[i].d_name); 332 } 333 334 panic("rctl_resource_name: unknown resource %d", resource); 335 } 336 337 static struct racct * 338 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 339 { 340 struct ucred *cred = p->p_ucred; 341 342 ASSERT_RACCT_ENABLED(); 343 RCTL_LOCK_ASSERT(); 344 345 switch (rule->rr_per) { 346 case RCTL_SUBJECT_TYPE_PROCESS: 347 return (p->p_racct); 348 case RCTL_SUBJECT_TYPE_USER: 349 return (cred->cr_ruidinfo->ui_racct); 350 case RCTL_SUBJECT_TYPE_LOGINCLASS: 351 return (cred->cr_loginclass->lc_racct); 352 case RCTL_SUBJECT_TYPE_JAIL: 353 return (cred->cr_prison->pr_prison_racct->prr_racct); 354 default: 355 panic("%s: unknown per %d", __func__, rule->rr_per); 356 } 357 } 358 359 /* 360 * Return the amount of resource that can be allocated by 'p' before 361 * hitting 'rule'. 362 */ 363 static int64_t 364 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 365 { 366 const struct racct *racct; 367 int64_t available; 368 369 ASSERT_RACCT_ENABLED(); 370 RCTL_LOCK_ASSERT(); 371 372 racct = rctl_proc_rule_to_racct(p, rule); 373 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 374 375 return (available); 376 } 377 378 /* 379 * Called every second for proc, uidinfo, loginclass, and jail containers. 380 * If the limit isn't exceeded, it decreases the usage amount to zero. 381 * Otherwise, it decreases it by the value of the limit. This way 382 * resource consumption exceeding the limit "carries over" to the next 383 * period. 384 */ 385 void 386 rctl_throttle_decay(struct racct *racct, int resource) 387 { 388 struct rctl_rule *rule; 389 struct rctl_rule_link *link; 390 int64_t minavailable; 391 392 ASSERT_RACCT_ENABLED(); 393 394 minavailable = INT64_MAX; 395 396 RCTL_RLOCK(); 397 398 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 399 rule = link->rrl_rule; 400 401 if (rule->rr_resource != resource) 402 continue; 403 if (rule->rr_action != RCTL_ACTION_THROTTLE) 404 continue; 405 406 if (rule->rr_amount < minavailable) 407 minavailable = rule->rr_amount; 408 } 409 410 RCTL_RUNLOCK(); 411 412 if (racct->r_resources[resource] < minavailable) { 413 racct->r_resources[resource] = 0; 414 } else { 415 /* 416 * Cap utilization counter at ten times the limit. Otherwise, 417 * if we changed the rule lowering the allowed amount, it could 418 * take unreasonably long time for the accumulated resource 419 * usage to drop. 420 */ 421 if (racct->r_resources[resource] > minavailable * 10) 422 racct->r_resources[resource] = minavailable * 10; 423 424 racct->r_resources[resource] -= minavailable; 425 } 426 } 427 428 /* 429 * Special version of rctl_get_available() for the %CPU resource. 430 * We slightly cheat here and return less than we normally would. 431 */ 432 int64_t 433 rctl_pcpu_available(const struct proc *p) { 434 struct rctl_rule *rule; 435 struct rctl_rule_link *link; 436 int64_t available, minavailable, limit; 437 438 ASSERT_RACCT_ENABLED(); 439 440 minavailable = INT64_MAX; 441 limit = 0; 442 443 RCTL_RLOCK(); 444 445 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 446 rule = link->rrl_rule; 447 if (rule->rr_resource != RACCT_PCTCPU) 448 continue; 449 if (rule->rr_action != RCTL_ACTION_DENY) 450 continue; 451 available = rctl_available_resource(p, rule); 452 if (available < minavailable) { 453 minavailable = available; 454 limit = rule->rr_amount; 455 } 456 } 457 458 RCTL_RUNLOCK(); 459 460 /* 461 * Return slightly less than actual value of the available 462 * %cpu resource. This makes %cpu throttling more agressive 463 * and lets us act sooner than the limits are already exceeded. 464 */ 465 if (limit != 0) { 466 if (limit > 2 * RCTL_PCPU_SHIFT) 467 minavailable -= RCTL_PCPU_SHIFT; 468 else 469 minavailable -= (limit / 2); 470 } 471 472 return (minavailable); 473 } 474 475 static uint64_t 476 xadd(uint64_t a, uint64_t b) 477 { 478 uint64_t c; 479 480 c = a + b; 481 482 /* 483 * Detect overflow. 484 */ 485 if (c < a || c < b) 486 return (UINT64_MAX); 487 488 return (c); 489 } 490 491 static uint64_t 492 xmul(uint64_t a, uint64_t b) 493 { 494 495 if (b != 0 && a > UINT64_MAX / b) 496 return (UINT64_MAX); 497 498 return (a * b); 499 } 500 501 /* 502 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 503 * to what it keeps allocated now. Returns non-zero if the allocation should 504 * be denied, 0 otherwise. 505 */ 506 int 507 rctl_enforce(struct proc *p, int resource, uint64_t amount) 508 { 509 static struct timeval log_lasttime, devctl_lasttime; 510 static int log_curtime = 0, devctl_curtime = 0; 511 struct rctl_rule *rule; 512 struct rctl_rule_link *link; 513 struct sbuf sb; 514 char *buf; 515 int64_t available; 516 uint64_t sleep_ms, sleep_ratio; 517 int should_deny = 0; 518 519 520 ASSERT_RACCT_ENABLED(); 521 522 RCTL_RLOCK(); 523 524 /* 525 * There may be more than one matching rule; go through all of them. 526 * Denial should be done last, after logging and sending signals. 527 */ 528 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 529 rule = link->rrl_rule; 530 if (rule->rr_resource != resource) 531 continue; 532 533 available = rctl_available_resource(p, rule); 534 if (available >= (int64_t)amount) { 535 link->rrl_exceeded = 0; 536 continue; 537 } 538 539 switch (rule->rr_action) { 540 case RCTL_ACTION_DENY: 541 should_deny = 1; 542 continue; 543 case RCTL_ACTION_LOG: 544 /* 545 * If rrl_exceeded != 0, it means we've already 546 * logged a warning for this process. 547 */ 548 if (link->rrl_exceeded != 0) 549 continue; 550 551 /* 552 * If the process state is not fully initialized yet, 553 * we can't access most of the required fields, e.g. 554 * p->p_comm. This happens when called from fork1(). 555 * Ignore this rule for now; it will be processed just 556 * after fork, when called from racct_proc_fork_done(). 557 */ 558 if (p->p_state != PRS_NORMAL) 559 continue; 560 561 if (!ppsratecheck(&log_lasttime, &log_curtime, 562 rctl_log_rate_limit)) 563 continue; 564 565 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 566 if (buf == NULL) { 567 printf("rctl_enforce: out of memory\n"); 568 continue; 569 } 570 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 571 rctl_rule_to_sbuf(&sb, rule); 572 sbuf_finish(&sb); 573 printf("rctl: rule \"%s\" matched by pid %d " 574 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 575 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 576 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 577 sbuf_delete(&sb); 578 free(buf, M_RCTL); 579 link->rrl_exceeded = 1; 580 continue; 581 case RCTL_ACTION_DEVCTL: 582 if (link->rrl_exceeded != 0) 583 continue; 584 585 if (p->p_state != PRS_NORMAL) 586 continue; 587 588 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 589 rctl_devctl_rate_limit)) 590 continue; 591 592 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 593 if (buf == NULL) { 594 printf("rctl_enforce: out of memory\n"); 595 continue; 596 } 597 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 598 sbuf_printf(&sb, "rule="); 599 rctl_rule_to_sbuf(&sb, rule); 600 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 601 p->p_pid, p->p_ucred->cr_ruid, 602 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 603 sbuf_finish(&sb); 604 devctl_notify_f("RCTL", "rule", "matched", 605 sbuf_data(&sb), M_NOWAIT); 606 sbuf_delete(&sb); 607 free(buf, M_RCTL); 608 link->rrl_exceeded = 1; 609 continue; 610 case RCTL_ACTION_THROTTLE: 611 if (p->p_state != PRS_NORMAL) 612 continue; 613 614 /* 615 * Make the process sleep for a fraction of second 616 * proportional to the ratio of process' resource 617 * utilization compared to the limit. The point is 618 * to penalize resource hogs: processes that consume 619 * more of the available resources sleep for longer. 620 * 621 * We're trying to defer division until the very end, 622 * to minimize the rounding effects. The following 623 * calculation could have been written in a clearer 624 * way like this: 625 * 626 * sleep_ms = hz * p->p_racct->r_resources[resource] / 627 * rule->rr_amount; 628 * sleep_ms *= rctl_throttle_pct / 100; 629 * if (sleep_ms < rctl_throttle_min) 630 * sleep_ms = rctl_throttle_min; 631 * 632 */ 633 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 634 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 635 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 636 sleep_ms = rctl_throttle_min * rule->rr_amount; 637 638 /* 639 * Multiply that by the ratio of the resource 640 * consumption for the container compared to the limit, 641 * squared. In other words, a process in a container 642 * that is two times over the limit will be throttled 643 * four times as much for hitting the same rule. The 644 * point is to penalize processes more if the container 645 * itself (eg certain UID or jail) is above the limit. 646 */ 647 if (available < 0) 648 sleep_ratio = -available / rule->rr_amount; 649 else 650 sleep_ratio = 0; 651 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 652 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 653 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 654 655 /* 656 * Finally the division. 657 */ 658 sleep_ms /= rule->rr_amount; 659 660 if (sleep_ms > rctl_throttle_max) 661 sleep_ms = rctl_throttle_max; 662 #if 0 663 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ld ms (ratio %ld, available %ld)\n", 664 __func__, p->p_pid, p->p_comm, 665 p->p_racct->r_resources[resource], 666 rule->rr_amount, sleep_ms, sleep_ratio, available); 667 #endif 668 669 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 670 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 671 racct_proc_throttle(p, sleep_ms); 672 continue; 673 default: 674 if (link->rrl_exceeded != 0) 675 continue; 676 677 if (p->p_state != PRS_NORMAL) 678 continue; 679 680 KASSERT(rule->rr_action > 0 && 681 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 682 ("rctl_enforce: unknown action %d", 683 rule->rr_action)); 684 685 /* 686 * We're using the fact that RCTL_ACTION_SIG* values 687 * are equal to their counterparts from sys/signal.h. 688 */ 689 kern_psignal(p, rule->rr_action); 690 link->rrl_exceeded = 1; 691 continue; 692 } 693 } 694 695 RCTL_RUNLOCK(); 696 697 if (should_deny) { 698 /* 699 * Return fake error code; the caller should change it 700 * into one proper for the situation - EFSIZ, ENOMEM etc. 701 */ 702 return (EDOOFUS); 703 } 704 705 return (0); 706 } 707 708 uint64_t 709 rctl_get_limit(struct proc *p, int resource) 710 { 711 struct rctl_rule *rule; 712 struct rctl_rule_link *link; 713 uint64_t amount = UINT64_MAX; 714 715 ASSERT_RACCT_ENABLED(); 716 717 RCTL_RLOCK(); 718 719 /* 720 * There may be more than one matching rule; go through all of them. 721 * Denial should be done last, after logging and sending signals. 722 */ 723 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 724 rule = link->rrl_rule; 725 if (rule->rr_resource != resource) 726 continue; 727 if (rule->rr_action != RCTL_ACTION_DENY) 728 continue; 729 if (rule->rr_amount < amount) 730 amount = rule->rr_amount; 731 } 732 733 RCTL_RUNLOCK(); 734 735 return (amount); 736 } 737 738 uint64_t 739 rctl_get_available(struct proc *p, int resource) 740 { 741 struct rctl_rule *rule; 742 struct rctl_rule_link *link; 743 int64_t available, minavailable, allocated; 744 745 minavailable = INT64_MAX; 746 747 ASSERT_RACCT_ENABLED(); 748 749 RCTL_RLOCK(); 750 751 /* 752 * There may be more than one matching rule; go through all of them. 753 * Denial should be done last, after logging and sending signals. 754 */ 755 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 756 rule = link->rrl_rule; 757 if (rule->rr_resource != resource) 758 continue; 759 if (rule->rr_action != RCTL_ACTION_DENY) 760 continue; 761 available = rctl_available_resource(p, rule); 762 if (available < minavailable) 763 minavailable = available; 764 } 765 766 RCTL_RUNLOCK(); 767 768 /* 769 * XXX: Think about this _hard_. 770 */ 771 allocated = p->p_racct->r_resources[resource]; 772 if (minavailable < INT64_MAX - allocated) 773 minavailable += allocated; 774 if (minavailable < 0) 775 minavailable = 0; 776 return (minavailable); 777 } 778 779 static int 780 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 781 { 782 783 ASSERT_RACCT_ENABLED(); 784 785 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 786 if (rule->rr_subject_type != filter->rr_subject_type) 787 return (0); 788 789 switch (filter->rr_subject_type) { 790 case RCTL_SUBJECT_TYPE_PROCESS: 791 if (filter->rr_subject.rs_proc != NULL && 792 rule->rr_subject.rs_proc != 793 filter->rr_subject.rs_proc) 794 return (0); 795 break; 796 case RCTL_SUBJECT_TYPE_USER: 797 if (filter->rr_subject.rs_uip != NULL && 798 rule->rr_subject.rs_uip != 799 filter->rr_subject.rs_uip) 800 return (0); 801 break; 802 case RCTL_SUBJECT_TYPE_LOGINCLASS: 803 if (filter->rr_subject.rs_loginclass != NULL && 804 rule->rr_subject.rs_loginclass != 805 filter->rr_subject.rs_loginclass) 806 return (0); 807 break; 808 case RCTL_SUBJECT_TYPE_JAIL: 809 if (filter->rr_subject.rs_prison_racct != NULL && 810 rule->rr_subject.rs_prison_racct != 811 filter->rr_subject.rs_prison_racct) 812 return (0); 813 break; 814 default: 815 panic("rctl_rule_matches: unknown subject type %d", 816 filter->rr_subject_type); 817 } 818 } 819 820 if (filter->rr_resource != RACCT_UNDEFINED) { 821 if (rule->rr_resource != filter->rr_resource) 822 return (0); 823 } 824 825 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 826 if (rule->rr_action != filter->rr_action) 827 return (0); 828 } 829 830 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 831 if (rule->rr_amount != filter->rr_amount) 832 return (0); 833 } 834 835 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 836 if (rule->rr_per != filter->rr_per) 837 return (0); 838 } 839 840 return (1); 841 } 842 843 static int 844 str2value(const char *str, int *value, struct dict *table) 845 { 846 int i; 847 848 if (value == NULL) 849 return (EINVAL); 850 851 for (i = 0; table[i].d_name != NULL; i++) { 852 if (strcasecmp(table[i].d_name, str) == 0) { 853 *value = table[i].d_value; 854 return (0); 855 } 856 } 857 858 return (EINVAL); 859 } 860 861 static int 862 str2id(const char *str, id_t *value) 863 { 864 char *end; 865 866 if (str == NULL) 867 return (EINVAL); 868 869 *value = strtoul(str, &end, 10); 870 if ((size_t)(end - str) != strlen(str)) 871 return (EINVAL); 872 873 return (0); 874 } 875 876 static int 877 str2int64(const char *str, int64_t *value) 878 { 879 char *end; 880 881 if (str == NULL) 882 return (EINVAL); 883 884 *value = strtoul(str, &end, 10); 885 if ((size_t)(end - str) != strlen(str)) 886 return (EINVAL); 887 888 if (*value < 0) 889 return (ERANGE); 890 891 return (0); 892 } 893 894 /* 895 * Connect the rule to the racct, increasing refcount for the rule. 896 */ 897 static void 898 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 899 { 900 struct rctl_rule_link *link; 901 902 ASSERT_RACCT_ENABLED(); 903 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 904 905 rctl_rule_acquire(rule); 906 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 907 link->rrl_rule = rule; 908 link->rrl_exceeded = 0; 909 910 RCTL_WLOCK(); 911 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 912 RCTL_WUNLOCK(); 913 } 914 915 static int 916 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 917 { 918 struct rctl_rule_link *link; 919 920 ASSERT_RACCT_ENABLED(); 921 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 922 RCTL_WLOCK_ASSERT(); 923 924 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 925 if (link == NULL) 926 return (ENOMEM); 927 rctl_rule_acquire(rule); 928 link->rrl_rule = rule; 929 link->rrl_exceeded = 0; 930 931 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 932 return (0); 933 } 934 935 /* 936 * Remove limits for a rules matching the filter and release 937 * the refcounts for the rules, possibly freeing them. Returns 938 * the number of limit structures removed. 939 */ 940 static int 941 rctl_racct_remove_rules(struct racct *racct, 942 const struct rctl_rule *filter) 943 { 944 struct rctl_rule_link *link, *linktmp; 945 int removed = 0; 946 947 ASSERT_RACCT_ENABLED(); 948 RCTL_WLOCK_ASSERT(); 949 950 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 951 if (!rctl_rule_matches(link->rrl_rule, filter)) 952 continue; 953 954 LIST_REMOVE(link, rrl_next); 955 rctl_rule_release(link->rrl_rule); 956 uma_zfree(rctl_rule_link_zone, link); 957 removed++; 958 } 959 return (removed); 960 } 961 962 static void 963 rctl_rule_acquire_subject(struct rctl_rule *rule) 964 { 965 966 ASSERT_RACCT_ENABLED(); 967 968 switch (rule->rr_subject_type) { 969 case RCTL_SUBJECT_TYPE_UNDEFINED: 970 case RCTL_SUBJECT_TYPE_PROCESS: 971 break; 972 case RCTL_SUBJECT_TYPE_JAIL: 973 if (rule->rr_subject.rs_prison_racct != NULL) 974 prison_racct_hold(rule->rr_subject.rs_prison_racct); 975 break; 976 case RCTL_SUBJECT_TYPE_USER: 977 if (rule->rr_subject.rs_uip != NULL) 978 uihold(rule->rr_subject.rs_uip); 979 break; 980 case RCTL_SUBJECT_TYPE_LOGINCLASS: 981 if (rule->rr_subject.rs_loginclass != NULL) 982 loginclass_hold(rule->rr_subject.rs_loginclass); 983 break; 984 default: 985 panic("rctl_rule_acquire_subject: unknown subject type %d", 986 rule->rr_subject_type); 987 } 988 } 989 990 static void 991 rctl_rule_release_subject(struct rctl_rule *rule) 992 { 993 994 ASSERT_RACCT_ENABLED(); 995 996 switch (rule->rr_subject_type) { 997 case RCTL_SUBJECT_TYPE_UNDEFINED: 998 case RCTL_SUBJECT_TYPE_PROCESS: 999 break; 1000 case RCTL_SUBJECT_TYPE_JAIL: 1001 if (rule->rr_subject.rs_prison_racct != NULL) 1002 prison_racct_free(rule->rr_subject.rs_prison_racct); 1003 break; 1004 case RCTL_SUBJECT_TYPE_USER: 1005 if (rule->rr_subject.rs_uip != NULL) 1006 uifree(rule->rr_subject.rs_uip); 1007 break; 1008 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1009 if (rule->rr_subject.rs_loginclass != NULL) 1010 loginclass_free(rule->rr_subject.rs_loginclass); 1011 break; 1012 default: 1013 panic("rctl_rule_release_subject: unknown subject type %d", 1014 rule->rr_subject_type); 1015 } 1016 } 1017 1018 struct rctl_rule * 1019 rctl_rule_alloc(int flags) 1020 { 1021 struct rctl_rule *rule; 1022 1023 ASSERT_RACCT_ENABLED(); 1024 1025 rule = uma_zalloc(rctl_rule_zone, flags); 1026 if (rule == NULL) 1027 return (NULL); 1028 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1029 rule->rr_subject.rs_proc = NULL; 1030 rule->rr_subject.rs_uip = NULL; 1031 rule->rr_subject.rs_loginclass = NULL; 1032 rule->rr_subject.rs_prison_racct = NULL; 1033 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1034 rule->rr_resource = RACCT_UNDEFINED; 1035 rule->rr_action = RCTL_ACTION_UNDEFINED; 1036 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1037 refcount_init(&rule->rr_refcount, 1); 1038 1039 return (rule); 1040 } 1041 1042 struct rctl_rule * 1043 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1044 { 1045 struct rctl_rule *copy; 1046 1047 ASSERT_RACCT_ENABLED(); 1048 1049 copy = uma_zalloc(rctl_rule_zone, flags); 1050 if (copy == NULL) 1051 return (NULL); 1052 copy->rr_subject_type = rule->rr_subject_type; 1053 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1054 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1055 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1056 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1057 copy->rr_per = rule->rr_per; 1058 copy->rr_resource = rule->rr_resource; 1059 copy->rr_action = rule->rr_action; 1060 copy->rr_amount = rule->rr_amount; 1061 refcount_init(©->rr_refcount, 1); 1062 rctl_rule_acquire_subject(copy); 1063 1064 return (copy); 1065 } 1066 1067 void 1068 rctl_rule_acquire(struct rctl_rule *rule) 1069 { 1070 1071 ASSERT_RACCT_ENABLED(); 1072 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1073 1074 refcount_acquire(&rule->rr_refcount); 1075 } 1076 1077 static void 1078 rctl_rule_free(void *context, int pending) 1079 { 1080 struct rctl_rule *rule; 1081 1082 rule = (struct rctl_rule *)context; 1083 1084 ASSERT_RACCT_ENABLED(); 1085 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1086 1087 /* 1088 * We don't need locking here; rule is guaranteed to be inaccessible. 1089 */ 1090 1091 rctl_rule_release_subject(rule); 1092 uma_zfree(rctl_rule_zone, rule); 1093 } 1094 1095 void 1096 rctl_rule_release(struct rctl_rule *rule) 1097 { 1098 1099 ASSERT_RACCT_ENABLED(); 1100 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1101 1102 if (refcount_release(&rule->rr_refcount)) { 1103 /* 1104 * rctl_rule_release() is often called when iterating 1105 * over all the uidinfo structures in the system, 1106 * holding uihashtbl_lock. Since rctl_rule_free() 1107 * might end up calling uifree(), this would lead 1108 * to lock recursion. Use taskqueue to avoid this. 1109 */ 1110 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1111 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1112 } 1113 } 1114 1115 static int 1116 rctl_rule_fully_specified(const struct rctl_rule *rule) 1117 { 1118 1119 ASSERT_RACCT_ENABLED(); 1120 1121 switch (rule->rr_subject_type) { 1122 case RCTL_SUBJECT_TYPE_UNDEFINED: 1123 return (0); 1124 case RCTL_SUBJECT_TYPE_PROCESS: 1125 if (rule->rr_subject.rs_proc == NULL) 1126 return (0); 1127 break; 1128 case RCTL_SUBJECT_TYPE_USER: 1129 if (rule->rr_subject.rs_uip == NULL) 1130 return (0); 1131 break; 1132 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1133 if (rule->rr_subject.rs_loginclass == NULL) 1134 return (0); 1135 break; 1136 case RCTL_SUBJECT_TYPE_JAIL: 1137 if (rule->rr_subject.rs_prison_racct == NULL) 1138 return (0); 1139 break; 1140 default: 1141 panic("rctl_rule_fully_specified: unknown subject type %d", 1142 rule->rr_subject_type); 1143 } 1144 if (rule->rr_resource == RACCT_UNDEFINED) 1145 return (0); 1146 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1147 return (0); 1148 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1149 return (0); 1150 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1151 return (0); 1152 1153 return (1); 1154 } 1155 1156 static int 1157 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1158 { 1159 struct rctl_rule *rule; 1160 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1161 *amountstr, *perstr; 1162 id_t id; 1163 int error = 0; 1164 1165 ASSERT_RACCT_ENABLED(); 1166 1167 rule = rctl_rule_alloc(M_WAITOK); 1168 1169 subjectstr = strsep(&rulestr, ":"); 1170 subject_idstr = strsep(&rulestr, ":"); 1171 resourcestr = strsep(&rulestr, ":"); 1172 actionstr = strsep(&rulestr, "=/"); 1173 amountstr = strsep(&rulestr, "/"); 1174 perstr = rulestr; 1175 1176 if (subjectstr == NULL || subjectstr[0] == '\0') 1177 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1178 else { 1179 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1180 if (error != 0) 1181 goto out; 1182 } 1183 1184 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1185 rule->rr_subject.rs_proc = NULL; 1186 rule->rr_subject.rs_uip = NULL; 1187 rule->rr_subject.rs_loginclass = NULL; 1188 rule->rr_subject.rs_prison_racct = NULL; 1189 } else { 1190 switch (rule->rr_subject_type) { 1191 case RCTL_SUBJECT_TYPE_UNDEFINED: 1192 error = EINVAL; 1193 goto out; 1194 case RCTL_SUBJECT_TYPE_PROCESS: 1195 error = str2id(subject_idstr, &id); 1196 if (error != 0) 1197 goto out; 1198 sx_assert(&allproc_lock, SA_LOCKED); 1199 rule->rr_subject.rs_proc = pfind(id); 1200 if (rule->rr_subject.rs_proc == NULL) { 1201 error = ESRCH; 1202 goto out; 1203 } 1204 PROC_UNLOCK(rule->rr_subject.rs_proc); 1205 break; 1206 case RCTL_SUBJECT_TYPE_USER: 1207 error = str2id(subject_idstr, &id); 1208 if (error != 0) 1209 goto out; 1210 rule->rr_subject.rs_uip = uifind(id); 1211 break; 1212 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1213 rule->rr_subject.rs_loginclass = 1214 loginclass_find(subject_idstr); 1215 if (rule->rr_subject.rs_loginclass == NULL) { 1216 error = ENAMETOOLONG; 1217 goto out; 1218 } 1219 break; 1220 case RCTL_SUBJECT_TYPE_JAIL: 1221 rule->rr_subject.rs_prison_racct = 1222 prison_racct_find(subject_idstr); 1223 if (rule->rr_subject.rs_prison_racct == NULL) { 1224 error = ENAMETOOLONG; 1225 goto out; 1226 } 1227 break; 1228 default: 1229 panic("rctl_string_to_rule: unknown subject type %d", 1230 rule->rr_subject_type); 1231 } 1232 } 1233 1234 if (resourcestr == NULL || resourcestr[0] == '\0') 1235 rule->rr_resource = RACCT_UNDEFINED; 1236 else { 1237 error = str2value(resourcestr, &rule->rr_resource, 1238 resourcenames); 1239 if (error != 0) 1240 goto out; 1241 } 1242 1243 if (actionstr == NULL || actionstr[0] == '\0') 1244 rule->rr_action = RCTL_ACTION_UNDEFINED; 1245 else { 1246 error = str2value(actionstr, &rule->rr_action, actionnames); 1247 if (error != 0) 1248 goto out; 1249 } 1250 1251 if (amountstr == NULL || amountstr[0] == '\0') 1252 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1253 else { 1254 error = str2int64(amountstr, &rule->rr_amount); 1255 if (error != 0) 1256 goto out; 1257 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1258 if (rule->rr_amount > INT64_MAX / 1000000) { 1259 error = ERANGE; 1260 goto out; 1261 } 1262 rule->rr_amount *= 1000000; 1263 } 1264 } 1265 1266 if (perstr == NULL || perstr[0] == '\0') 1267 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1268 else { 1269 error = str2value(perstr, &rule->rr_per, subjectnames); 1270 if (error != 0) 1271 goto out; 1272 } 1273 1274 out: 1275 if (error == 0) 1276 *rulep = rule; 1277 else 1278 rctl_rule_release(rule); 1279 1280 return (error); 1281 } 1282 1283 /* 1284 * Link a rule with all the subjects it applies to. 1285 */ 1286 int 1287 rctl_rule_add(struct rctl_rule *rule) 1288 { 1289 struct proc *p; 1290 struct ucred *cred; 1291 struct uidinfo *uip; 1292 struct prison *pr; 1293 struct prison_racct *prr; 1294 struct loginclass *lc; 1295 struct rctl_rule *rule2; 1296 int match; 1297 1298 ASSERT_RACCT_ENABLED(); 1299 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1300 1301 /* 1302 * Some rules just don't make sense, like "deny" rule for an undeniable 1303 * resource. The exception are the RSS and %CPU resources - they are 1304 * not deniable in the racct sense, but the limit is enforced in 1305 * a different way. 1306 */ 1307 if (rule->rr_action == RCTL_ACTION_DENY && 1308 !RACCT_IS_DENIABLE(rule->rr_resource) && 1309 rule->rr_resource != RACCT_RSS && 1310 rule->rr_resource != RACCT_PCTCPU) { 1311 return (EOPNOTSUPP); 1312 } 1313 1314 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1315 !RACCT_IS_DECAYING(rule->rr_resource)) { 1316 return (EOPNOTSUPP); 1317 } 1318 1319 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1320 rule->rr_resource == RACCT_PCTCPU) { 1321 return (EOPNOTSUPP); 1322 } 1323 1324 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1325 RACCT_IS_SLOPPY(rule->rr_resource)) { 1326 return (EOPNOTSUPP); 1327 } 1328 1329 /* 1330 * Make sure there are no duplicated rules. Also, for the "deny" 1331 * rules, remove ones differing only by "amount". 1332 */ 1333 if (rule->rr_action == RCTL_ACTION_DENY) { 1334 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1335 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1336 rctl_rule_remove(rule2); 1337 rctl_rule_release(rule2); 1338 } else 1339 rctl_rule_remove(rule); 1340 1341 switch (rule->rr_subject_type) { 1342 case RCTL_SUBJECT_TYPE_PROCESS: 1343 p = rule->rr_subject.rs_proc; 1344 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1345 1346 rctl_racct_add_rule(p->p_racct, rule); 1347 /* 1348 * In case of per-process rule, we don't have anything more 1349 * to do. 1350 */ 1351 return (0); 1352 1353 case RCTL_SUBJECT_TYPE_USER: 1354 uip = rule->rr_subject.rs_uip; 1355 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1356 rctl_racct_add_rule(uip->ui_racct, rule); 1357 break; 1358 1359 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1360 lc = rule->rr_subject.rs_loginclass; 1361 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1362 rctl_racct_add_rule(lc->lc_racct, rule); 1363 break; 1364 1365 case RCTL_SUBJECT_TYPE_JAIL: 1366 prr = rule->rr_subject.rs_prison_racct; 1367 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1368 rctl_racct_add_rule(prr->prr_racct, rule); 1369 break; 1370 1371 default: 1372 panic("rctl_rule_add: unknown subject type %d", 1373 rule->rr_subject_type); 1374 } 1375 1376 /* 1377 * Now go through all the processes and add the new rule to the ones 1378 * it applies to. 1379 */ 1380 sx_assert(&allproc_lock, SA_LOCKED); 1381 FOREACH_PROC_IN_SYSTEM(p) { 1382 cred = p->p_ucred; 1383 switch (rule->rr_subject_type) { 1384 case RCTL_SUBJECT_TYPE_USER: 1385 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1386 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1387 break; 1388 continue; 1389 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1390 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1391 break; 1392 continue; 1393 case RCTL_SUBJECT_TYPE_JAIL: 1394 match = 0; 1395 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1396 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1397 match = 1; 1398 break; 1399 } 1400 } 1401 if (match) 1402 break; 1403 continue; 1404 default: 1405 panic("rctl_rule_add: unknown subject type %d", 1406 rule->rr_subject_type); 1407 } 1408 1409 rctl_racct_add_rule(p->p_racct, rule); 1410 } 1411 1412 return (0); 1413 } 1414 1415 static void 1416 rctl_rule_pre_callback(void) 1417 { 1418 1419 RCTL_WLOCK(); 1420 } 1421 1422 static void 1423 rctl_rule_post_callback(void) 1424 { 1425 1426 RCTL_WUNLOCK(); 1427 } 1428 1429 static void 1430 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1431 { 1432 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1433 int found = 0; 1434 1435 ASSERT_RACCT_ENABLED(); 1436 RCTL_WLOCK_ASSERT(); 1437 1438 found += rctl_racct_remove_rules(racct, filter); 1439 1440 *((int *)arg3) += found; 1441 } 1442 1443 /* 1444 * Remove all rules that match the filter. 1445 */ 1446 int 1447 rctl_rule_remove(struct rctl_rule *filter) 1448 { 1449 struct proc *p; 1450 int found = 0; 1451 1452 ASSERT_RACCT_ENABLED(); 1453 1454 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1455 filter->rr_subject.rs_proc != NULL) { 1456 p = filter->rr_subject.rs_proc; 1457 RCTL_WLOCK(); 1458 found = rctl_racct_remove_rules(p->p_racct, filter); 1459 RCTL_WUNLOCK(); 1460 if (found) 1461 return (0); 1462 return (ESRCH); 1463 } 1464 1465 loginclass_racct_foreach(rctl_rule_remove_callback, 1466 rctl_rule_pre_callback, rctl_rule_post_callback, 1467 filter, (void *)&found); 1468 ui_racct_foreach(rctl_rule_remove_callback, 1469 rctl_rule_pre_callback, rctl_rule_post_callback, 1470 filter, (void *)&found); 1471 prison_racct_foreach(rctl_rule_remove_callback, 1472 rctl_rule_pre_callback, rctl_rule_post_callback, 1473 filter, (void *)&found); 1474 1475 sx_assert(&allproc_lock, SA_LOCKED); 1476 RCTL_WLOCK(); 1477 FOREACH_PROC_IN_SYSTEM(p) { 1478 found += rctl_racct_remove_rules(p->p_racct, filter); 1479 } 1480 RCTL_WUNLOCK(); 1481 1482 if (found) 1483 return (0); 1484 return (ESRCH); 1485 } 1486 1487 /* 1488 * Appends a rule to the sbuf. 1489 */ 1490 static void 1491 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1492 { 1493 int64_t amount; 1494 1495 ASSERT_RACCT_ENABLED(); 1496 1497 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1498 1499 switch (rule->rr_subject_type) { 1500 case RCTL_SUBJECT_TYPE_PROCESS: 1501 if (rule->rr_subject.rs_proc == NULL) 1502 sbuf_printf(sb, ":"); 1503 else 1504 sbuf_printf(sb, "%d:", 1505 rule->rr_subject.rs_proc->p_pid); 1506 break; 1507 case RCTL_SUBJECT_TYPE_USER: 1508 if (rule->rr_subject.rs_uip == NULL) 1509 sbuf_printf(sb, ":"); 1510 else 1511 sbuf_printf(sb, "%d:", 1512 rule->rr_subject.rs_uip->ui_uid); 1513 break; 1514 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1515 if (rule->rr_subject.rs_loginclass == NULL) 1516 sbuf_printf(sb, ":"); 1517 else 1518 sbuf_printf(sb, "%s:", 1519 rule->rr_subject.rs_loginclass->lc_name); 1520 break; 1521 case RCTL_SUBJECT_TYPE_JAIL: 1522 if (rule->rr_subject.rs_prison_racct == NULL) 1523 sbuf_printf(sb, ":"); 1524 else 1525 sbuf_printf(sb, "%s:", 1526 rule->rr_subject.rs_prison_racct->prr_name); 1527 break; 1528 default: 1529 panic("rctl_rule_to_sbuf: unknown subject type %d", 1530 rule->rr_subject_type); 1531 } 1532 1533 amount = rule->rr_amount; 1534 if (amount != RCTL_AMOUNT_UNDEFINED && 1535 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1536 amount /= 1000000; 1537 1538 sbuf_printf(sb, "%s:%s=%jd", 1539 rctl_resource_name(rule->rr_resource), 1540 rctl_action_name(rule->rr_action), 1541 amount); 1542 1543 if (rule->rr_per != rule->rr_subject_type) 1544 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1545 } 1546 1547 /* 1548 * Routine used by RCTL syscalls to read in input string. 1549 */ 1550 static int 1551 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1552 { 1553 char *str; 1554 int error; 1555 1556 ASSERT_RACCT_ENABLED(); 1557 1558 if (inbuflen <= 0) 1559 return (EINVAL); 1560 if (inbuflen > RCTL_MAX_INBUFSIZE) 1561 return (E2BIG); 1562 1563 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1564 error = copyinstr(inbufp, str, inbuflen, NULL); 1565 if (error != 0) { 1566 free(str, M_RCTL); 1567 return (error); 1568 } 1569 1570 *inputstr = str; 1571 1572 return (0); 1573 } 1574 1575 /* 1576 * Routine used by RCTL syscalls to write out output string. 1577 */ 1578 static int 1579 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1580 { 1581 int error; 1582 1583 ASSERT_RACCT_ENABLED(); 1584 1585 if (outputsbuf == NULL) 1586 return (0); 1587 1588 sbuf_finish(outputsbuf); 1589 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1590 sbuf_delete(outputsbuf); 1591 return (ERANGE); 1592 } 1593 error = copyout(sbuf_data(outputsbuf), outbufp, 1594 sbuf_len(outputsbuf) + 1); 1595 sbuf_delete(outputsbuf); 1596 return (error); 1597 } 1598 1599 static struct sbuf * 1600 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1601 { 1602 struct sbuf *sb; 1603 int64_t amount; 1604 int i; 1605 1606 ASSERT_RACCT_ENABLED(); 1607 1608 sb = sbuf_new_auto(); 1609 for (i = 0; i <= RACCT_MAX; i++) { 1610 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1611 continue; 1612 amount = racct->r_resources[i]; 1613 if (RACCT_IS_IN_MILLIONS(i)) 1614 amount /= 1000000; 1615 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1616 } 1617 sbuf_setpos(sb, sbuf_len(sb) - 1); 1618 return (sb); 1619 } 1620 1621 int 1622 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1623 { 1624 struct rctl_rule *filter; 1625 struct sbuf *outputsbuf = NULL; 1626 struct proc *p; 1627 struct uidinfo *uip; 1628 struct loginclass *lc; 1629 struct prison_racct *prr; 1630 char *inputstr; 1631 int error; 1632 1633 if (!racct_enable) 1634 return (ENOSYS); 1635 1636 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1637 if (error != 0) 1638 return (error); 1639 1640 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1641 if (error != 0) 1642 return (error); 1643 1644 sx_slock(&allproc_lock); 1645 error = rctl_string_to_rule(inputstr, &filter); 1646 free(inputstr, M_RCTL); 1647 if (error != 0) { 1648 sx_sunlock(&allproc_lock); 1649 return (error); 1650 } 1651 1652 switch (filter->rr_subject_type) { 1653 case RCTL_SUBJECT_TYPE_PROCESS: 1654 p = filter->rr_subject.rs_proc; 1655 if (p == NULL) { 1656 error = EINVAL; 1657 goto out; 1658 } 1659 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1660 break; 1661 case RCTL_SUBJECT_TYPE_USER: 1662 uip = filter->rr_subject.rs_uip; 1663 if (uip == NULL) { 1664 error = EINVAL; 1665 goto out; 1666 } 1667 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1668 break; 1669 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1670 lc = filter->rr_subject.rs_loginclass; 1671 if (lc == NULL) { 1672 error = EINVAL; 1673 goto out; 1674 } 1675 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1676 break; 1677 case RCTL_SUBJECT_TYPE_JAIL: 1678 prr = filter->rr_subject.rs_prison_racct; 1679 if (prr == NULL) { 1680 error = EINVAL; 1681 goto out; 1682 } 1683 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1684 break; 1685 default: 1686 error = EINVAL; 1687 } 1688 out: 1689 rctl_rule_release(filter); 1690 sx_sunlock(&allproc_lock); 1691 if (error != 0) 1692 return (error); 1693 1694 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1695 1696 return (error); 1697 } 1698 1699 static void 1700 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1701 { 1702 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1703 struct rctl_rule_link *link; 1704 struct sbuf *sb = (struct sbuf *)arg3; 1705 1706 ASSERT_RACCT_ENABLED(); 1707 RCTL_LOCK_ASSERT(); 1708 1709 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1710 if (!rctl_rule_matches(link->rrl_rule, filter)) 1711 continue; 1712 rctl_rule_to_sbuf(sb, link->rrl_rule); 1713 sbuf_printf(sb, ","); 1714 } 1715 } 1716 1717 int 1718 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1719 { 1720 struct sbuf *sb; 1721 struct rctl_rule *filter; 1722 struct rctl_rule_link *link; 1723 struct proc *p; 1724 char *inputstr, *buf; 1725 size_t bufsize; 1726 int error; 1727 1728 if (!racct_enable) 1729 return (ENOSYS); 1730 1731 error = priv_check(td, PRIV_RCTL_GET_RULES); 1732 if (error != 0) 1733 return (error); 1734 1735 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1736 if (error != 0) 1737 return (error); 1738 1739 sx_slock(&allproc_lock); 1740 error = rctl_string_to_rule(inputstr, &filter); 1741 free(inputstr, M_RCTL); 1742 if (error != 0) { 1743 sx_sunlock(&allproc_lock); 1744 return (error); 1745 } 1746 1747 bufsize = uap->outbuflen; 1748 if (bufsize > rctl_maxbufsize) { 1749 sx_sunlock(&allproc_lock); 1750 return (E2BIG); 1751 } 1752 1753 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1754 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1755 KASSERT(sb != NULL, ("sbuf_new failed")); 1756 1757 FOREACH_PROC_IN_SYSTEM(p) { 1758 RCTL_RLOCK(); 1759 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1760 /* 1761 * Non-process rules will be added to the buffer later. 1762 * Adding them here would result in duplicated output. 1763 */ 1764 if (link->rrl_rule->rr_subject_type != 1765 RCTL_SUBJECT_TYPE_PROCESS) 1766 continue; 1767 if (!rctl_rule_matches(link->rrl_rule, filter)) 1768 continue; 1769 rctl_rule_to_sbuf(sb, link->rrl_rule); 1770 sbuf_printf(sb, ","); 1771 } 1772 RCTL_RUNLOCK(); 1773 } 1774 1775 loginclass_racct_foreach(rctl_get_rules_callback, 1776 rctl_rule_pre_callback, rctl_rule_post_callback, 1777 filter, sb); 1778 ui_racct_foreach(rctl_get_rules_callback, 1779 rctl_rule_pre_callback, rctl_rule_post_callback, 1780 filter, sb); 1781 prison_racct_foreach(rctl_get_rules_callback, 1782 rctl_rule_pre_callback, rctl_rule_post_callback, 1783 filter, sb); 1784 if (sbuf_error(sb) == ENOMEM) { 1785 error = ERANGE; 1786 goto out; 1787 } 1788 1789 /* 1790 * Remove trailing ",". 1791 */ 1792 if (sbuf_len(sb) > 0) 1793 sbuf_setpos(sb, sbuf_len(sb) - 1); 1794 1795 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1796 out: 1797 rctl_rule_release(filter); 1798 sx_sunlock(&allproc_lock); 1799 free(buf, M_RCTL); 1800 return (error); 1801 } 1802 1803 int 1804 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1805 { 1806 struct sbuf *sb; 1807 struct rctl_rule *filter; 1808 struct rctl_rule_link *link; 1809 char *inputstr, *buf; 1810 size_t bufsize; 1811 int error; 1812 1813 if (!racct_enable) 1814 return (ENOSYS); 1815 1816 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1817 if (error != 0) 1818 return (error); 1819 1820 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1821 if (error != 0) 1822 return (error); 1823 1824 sx_slock(&allproc_lock); 1825 error = rctl_string_to_rule(inputstr, &filter); 1826 free(inputstr, M_RCTL); 1827 if (error != 0) { 1828 sx_sunlock(&allproc_lock); 1829 return (error); 1830 } 1831 1832 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1833 rctl_rule_release(filter); 1834 sx_sunlock(&allproc_lock); 1835 return (EINVAL); 1836 } 1837 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1838 rctl_rule_release(filter); 1839 sx_sunlock(&allproc_lock); 1840 return (EOPNOTSUPP); 1841 } 1842 if (filter->rr_subject.rs_proc == NULL) { 1843 rctl_rule_release(filter); 1844 sx_sunlock(&allproc_lock); 1845 return (EINVAL); 1846 } 1847 1848 bufsize = uap->outbuflen; 1849 if (bufsize > rctl_maxbufsize) { 1850 rctl_rule_release(filter); 1851 sx_sunlock(&allproc_lock); 1852 return (E2BIG); 1853 } 1854 1855 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1856 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1857 KASSERT(sb != NULL, ("sbuf_new failed")); 1858 1859 RCTL_RLOCK(); 1860 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1861 rrl_next) { 1862 rctl_rule_to_sbuf(sb, link->rrl_rule); 1863 sbuf_printf(sb, ","); 1864 } 1865 RCTL_RUNLOCK(); 1866 if (sbuf_error(sb) == ENOMEM) { 1867 error = ERANGE; 1868 goto out; 1869 } 1870 1871 /* 1872 * Remove trailing ",". 1873 */ 1874 if (sbuf_len(sb) > 0) 1875 sbuf_setpos(sb, sbuf_len(sb) - 1); 1876 1877 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1878 out: 1879 rctl_rule_release(filter); 1880 sx_sunlock(&allproc_lock); 1881 free(buf, M_RCTL); 1882 return (error); 1883 } 1884 1885 int 1886 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1887 { 1888 struct rctl_rule *rule; 1889 char *inputstr; 1890 int error; 1891 1892 if (!racct_enable) 1893 return (ENOSYS); 1894 1895 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1896 if (error != 0) 1897 return (error); 1898 1899 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1900 if (error != 0) 1901 return (error); 1902 1903 sx_slock(&allproc_lock); 1904 error = rctl_string_to_rule(inputstr, &rule); 1905 free(inputstr, M_RCTL); 1906 if (error != 0) { 1907 sx_sunlock(&allproc_lock); 1908 return (error); 1909 } 1910 /* 1911 * The 'per' part of a rule is optional. 1912 */ 1913 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1914 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1915 rule->rr_per = rule->rr_subject_type; 1916 1917 if (!rctl_rule_fully_specified(rule)) { 1918 error = EINVAL; 1919 goto out; 1920 } 1921 1922 error = rctl_rule_add(rule); 1923 1924 out: 1925 rctl_rule_release(rule); 1926 sx_sunlock(&allproc_lock); 1927 return (error); 1928 } 1929 1930 int 1931 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1932 { 1933 struct rctl_rule *filter; 1934 char *inputstr; 1935 int error; 1936 1937 if (!racct_enable) 1938 return (ENOSYS); 1939 1940 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1941 if (error != 0) 1942 return (error); 1943 1944 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1945 if (error != 0) 1946 return (error); 1947 1948 sx_slock(&allproc_lock); 1949 error = rctl_string_to_rule(inputstr, &filter); 1950 free(inputstr, M_RCTL); 1951 if (error != 0) { 1952 sx_sunlock(&allproc_lock); 1953 return (error); 1954 } 1955 1956 error = rctl_rule_remove(filter); 1957 rctl_rule_release(filter); 1958 sx_sunlock(&allproc_lock); 1959 1960 return (error); 1961 } 1962 1963 /* 1964 * Update RCTL rule list after credential change. 1965 */ 1966 void 1967 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1968 { 1969 LIST_HEAD(, rctl_rule_link) newrules; 1970 struct rctl_rule_link *link, *newlink; 1971 struct uidinfo *newuip; 1972 struct loginclass *newlc; 1973 struct prison_racct *newprr; 1974 int rulecnt, i; 1975 1976 ASSERT_RACCT_ENABLED(); 1977 1978 newuip = newcred->cr_ruidinfo; 1979 newlc = newcred->cr_loginclass; 1980 newprr = newcred->cr_prison->pr_prison_racct; 1981 1982 LIST_INIT(&newrules); 1983 1984 again: 1985 /* 1986 * First, count the rules that apply to the process with new 1987 * credentials. 1988 */ 1989 rulecnt = 0; 1990 RCTL_RLOCK(); 1991 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1992 if (link->rrl_rule->rr_subject_type == 1993 RCTL_SUBJECT_TYPE_PROCESS) 1994 rulecnt++; 1995 } 1996 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 1997 rulecnt++; 1998 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 1999 rulecnt++; 2000 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 2001 rulecnt++; 2002 RCTL_RUNLOCK(); 2003 2004 /* 2005 * Create temporary list. We've dropped the rctl_lock in order 2006 * to use M_WAITOK. 2007 */ 2008 for (i = 0; i < rulecnt; i++) { 2009 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 2010 newlink->rrl_rule = NULL; 2011 newlink->rrl_exceeded = 0; 2012 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 2013 } 2014 2015 newlink = LIST_FIRST(&newrules); 2016 2017 /* 2018 * Assign rules to the newly allocated list entries. 2019 */ 2020 RCTL_WLOCK(); 2021 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2022 if (link->rrl_rule->rr_subject_type == 2023 RCTL_SUBJECT_TYPE_PROCESS) { 2024 if (newlink == NULL) 2025 goto goaround; 2026 rctl_rule_acquire(link->rrl_rule); 2027 newlink->rrl_rule = link->rrl_rule; 2028 newlink->rrl_exceeded = link->rrl_exceeded; 2029 newlink = LIST_NEXT(newlink, rrl_next); 2030 rulecnt--; 2031 } 2032 } 2033 2034 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2035 if (newlink == NULL) 2036 goto goaround; 2037 rctl_rule_acquire(link->rrl_rule); 2038 newlink->rrl_rule = link->rrl_rule; 2039 newlink->rrl_exceeded = link->rrl_exceeded; 2040 newlink = LIST_NEXT(newlink, rrl_next); 2041 rulecnt--; 2042 } 2043 2044 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2045 if (newlink == NULL) 2046 goto goaround; 2047 rctl_rule_acquire(link->rrl_rule); 2048 newlink->rrl_rule = link->rrl_rule; 2049 newlink->rrl_exceeded = link->rrl_exceeded; 2050 newlink = LIST_NEXT(newlink, rrl_next); 2051 rulecnt--; 2052 } 2053 2054 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2055 if (newlink == NULL) 2056 goto goaround; 2057 rctl_rule_acquire(link->rrl_rule); 2058 newlink->rrl_rule = link->rrl_rule; 2059 newlink->rrl_exceeded = link->rrl_exceeded; 2060 newlink = LIST_NEXT(newlink, rrl_next); 2061 rulecnt--; 2062 } 2063 2064 if (rulecnt == 0) { 2065 /* 2066 * Free the old rule list. 2067 */ 2068 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2069 link = LIST_FIRST(&p->p_racct->r_rule_links); 2070 LIST_REMOVE(link, rrl_next); 2071 rctl_rule_release(link->rrl_rule); 2072 uma_zfree(rctl_rule_link_zone, link); 2073 } 2074 2075 /* 2076 * Replace lists and we're done. 2077 * 2078 * XXX: Is there any way to switch list heads instead 2079 * of iterating here? 2080 */ 2081 while (!LIST_EMPTY(&newrules)) { 2082 newlink = LIST_FIRST(&newrules); 2083 LIST_REMOVE(newlink, rrl_next); 2084 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2085 newlink, rrl_next); 2086 } 2087 2088 RCTL_WUNLOCK(); 2089 2090 return; 2091 } 2092 2093 goaround: 2094 RCTL_WUNLOCK(); 2095 2096 /* 2097 * Rule list changed while we were not holding the rctl_lock. 2098 * Free the new list and try again. 2099 */ 2100 while (!LIST_EMPTY(&newrules)) { 2101 newlink = LIST_FIRST(&newrules); 2102 LIST_REMOVE(newlink, rrl_next); 2103 if (newlink->rrl_rule != NULL) 2104 rctl_rule_release(newlink->rrl_rule); 2105 uma_zfree(rctl_rule_link_zone, newlink); 2106 } 2107 2108 goto again; 2109 } 2110 2111 /* 2112 * Assign RCTL rules to the newly created process. 2113 */ 2114 int 2115 rctl_proc_fork(struct proc *parent, struct proc *child) 2116 { 2117 struct rctl_rule *rule; 2118 struct rctl_rule_link *link; 2119 int error; 2120 2121 LIST_INIT(&child->p_racct->r_rule_links); 2122 2123 ASSERT_RACCT_ENABLED(); 2124 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2125 2126 RCTL_WLOCK(); 2127 2128 /* 2129 * Go through limits applicable to the parent and assign them 2130 * to the child. Rules with 'process' subject have to be duplicated 2131 * in order to make their rr_subject point to the new process. 2132 */ 2133 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2134 if (link->rrl_rule->rr_subject_type == 2135 RCTL_SUBJECT_TYPE_PROCESS) { 2136 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2137 if (rule == NULL) 2138 goto fail; 2139 KASSERT(rule->rr_subject.rs_proc == parent, 2140 ("rule->rr_subject.rs_proc != parent")); 2141 rule->rr_subject.rs_proc = child; 2142 error = rctl_racct_add_rule_locked(child->p_racct, 2143 rule); 2144 rctl_rule_release(rule); 2145 if (error != 0) 2146 goto fail; 2147 } else { 2148 error = rctl_racct_add_rule_locked(child->p_racct, 2149 link->rrl_rule); 2150 if (error != 0) 2151 goto fail; 2152 } 2153 } 2154 2155 RCTL_WUNLOCK(); 2156 return (0); 2157 2158 fail: 2159 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2160 link = LIST_FIRST(&child->p_racct->r_rule_links); 2161 LIST_REMOVE(link, rrl_next); 2162 rctl_rule_release(link->rrl_rule); 2163 uma_zfree(rctl_rule_link_zone, link); 2164 } 2165 RCTL_WUNLOCK(); 2166 return (EAGAIN); 2167 } 2168 2169 /* 2170 * Release rules attached to the racct. 2171 */ 2172 void 2173 rctl_racct_release(struct racct *racct) 2174 { 2175 struct rctl_rule_link *link; 2176 2177 ASSERT_RACCT_ENABLED(); 2178 2179 RCTL_WLOCK(); 2180 while (!LIST_EMPTY(&racct->r_rule_links)) { 2181 link = LIST_FIRST(&racct->r_rule_links); 2182 LIST_REMOVE(link, rrl_next); 2183 rctl_rule_release(link->rrl_rule); 2184 uma_zfree(rctl_rule_link_zone, link); 2185 } 2186 RCTL_WUNLOCK(); 2187 } 2188 2189 static void 2190 rctl_init(void) 2191 { 2192 2193 if (!racct_enable) 2194 return; 2195 2196 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2197 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2198 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2199 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2200 UMA_ALIGN_PTR, 0); 2201 2202 /* 2203 * Set default values, making sure not to overwrite the ones 2204 * fetched from tunables. Most of those could be set at the 2205 * declaration, except for the rctl_throttle_max - we cannot 2206 * set it there due to hz not being compile time constant. 2207 */ 2208 if (rctl_throttle_min < 1) 2209 rctl_throttle_min = 1; 2210 if (rctl_throttle_max < rctl_throttle_min) 2211 rctl_throttle_max = 2 * hz; 2212 if (rctl_throttle_pct < 0) 2213 rctl_throttle_pct = 100; 2214 if (rctl_throttle_pct2 < 0) 2215 rctl_throttle_pct2 = 100; 2216 } 2217 2218 #else /* !RCTL */ 2219 2220 int 2221 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2222 { 2223 2224 return (ENOSYS); 2225 } 2226 2227 int 2228 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2229 { 2230 2231 return (ENOSYS); 2232 } 2233 2234 int 2235 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2236 { 2237 2238 return (ENOSYS); 2239 } 2240 2241 int 2242 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2243 { 2244 2245 return (ENOSYS); 2246 } 2247 2248 int 2249 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2250 { 2251 2252 return (ENOSYS); 2253 } 2254 2255 #endif /* !RCTL */ 2256