1 /*- 2 * Copyright (c) 2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Edward Tomasz Napierala under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/malloc.h> 38 #include <sys/queue.h> 39 #include <sys/refcount.h> 40 #include <sys/jail.h> 41 #include <sys/kernel.h> 42 #include <sys/limits.h> 43 #include <sys/loginclass.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/racct.h> 47 #include <sys/rctl.h> 48 #include <sys/resourcevar.h> 49 #include <sys/sx.h> 50 #include <sys/sysent.h> 51 #include <sys/sysproto.h> 52 #include <sys/systm.h> 53 #include <sys/types.h> 54 #include <sys/eventhandler.h> 55 #include <sys/lock.h> 56 #include <sys/mutex.h> 57 #include <sys/rwlock.h> 58 #include <sys/sbuf.h> 59 #include <sys/taskqueue.h> 60 #include <sys/tree.h> 61 #include <vm/uma.h> 62 63 #ifdef RCTL 64 #ifndef RACCT 65 #error "The RCTL option requires the RACCT option" 66 #endif 67 68 FEATURE(rctl, "Resource Limits"); 69 70 #define HRF_DEFAULT 0 71 #define HRF_DONT_INHERIT 1 72 #define HRF_DONT_ACCUMULATE 2 73 74 #define RCTL_MAX_INBUFSIZE 4 * 1024 75 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 76 #define RCTL_LOG_BUFSIZE 128 77 78 #define RCTL_PCPU_SHIFT (10 * 1000000) 79 80 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; 81 static int rctl_log_rate_limit = 10; 82 static int rctl_devctl_rate_limit = 10; 83 84 /* 85 * Values below are initialized in rctl_init(). 86 */ 87 static int rctl_throttle_min = -1; 88 static int rctl_throttle_max = -1; 89 static int rctl_throttle_pct = -1; 90 static int rctl_throttle_pct2 = -1; 91 92 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS); 93 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS); 94 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS); 95 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS); 96 97 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits"); 98 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, 99 &rctl_maxbufsize, 0, "Maximum output buffer size"); 100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW, 101 &rctl_log_rate_limit, 0, "Maximum number of log messages per second"); 102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN, 103 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second"); 104 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min, 105 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU", 106 "Shortest throttling duration, in hz"); 107 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min); 108 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max, 109 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU", 110 "Longest throttling duration, in hz"); 111 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max); 112 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct, 113 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU", 114 "Throttling penalty for process consumption, in percent"); 115 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct); 116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2, 117 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU", 118 "Throttling penalty for container consumption, in percent"); 119 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2); 120 121 /* 122 * 'rctl_rule_link' connects a rule with every racct it's related to. 123 * For example, rule 'user:X:openfiles:deny=N/process' is linked 124 * with uidinfo for user X, and to each process of that user. 125 */ 126 struct rctl_rule_link { 127 LIST_ENTRY(rctl_rule_link) rrl_next; 128 struct rctl_rule *rrl_rule; 129 int rrl_exceeded; 130 }; 131 132 struct dict { 133 const char *d_name; 134 int d_value; 135 }; 136 137 static struct dict subjectnames[] = { 138 { "process", RCTL_SUBJECT_TYPE_PROCESS }, 139 { "user", RCTL_SUBJECT_TYPE_USER }, 140 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, 141 { "jail", RCTL_SUBJECT_TYPE_JAIL }, 142 { NULL, -1 }}; 143 144 static struct dict resourcenames[] = { 145 { "cputime", RACCT_CPU }, 146 { "datasize", RACCT_DATA }, 147 { "stacksize", RACCT_STACK }, 148 { "coredumpsize", RACCT_CORE }, 149 { "memoryuse", RACCT_RSS }, 150 { "memorylocked", RACCT_MEMLOCK }, 151 { "maxproc", RACCT_NPROC }, 152 { "openfiles", RACCT_NOFILE }, 153 { "vmemoryuse", RACCT_VMEM }, 154 { "pseudoterminals", RACCT_NPTS }, 155 { "swapuse", RACCT_SWAP }, 156 { "nthr", RACCT_NTHR }, 157 { "msgqqueued", RACCT_MSGQQUEUED }, 158 { "msgqsize", RACCT_MSGQSIZE }, 159 { "nmsgq", RACCT_NMSGQ }, 160 { "nsem", RACCT_NSEM }, 161 { "nsemop", RACCT_NSEMOP }, 162 { "nshm", RACCT_NSHM }, 163 { "shmsize", RACCT_SHMSIZE }, 164 { "wallclock", RACCT_WALLCLOCK }, 165 { "pcpu", RACCT_PCTCPU }, 166 { "readbps", RACCT_READBPS }, 167 { "writebps", RACCT_WRITEBPS }, 168 { "readiops", RACCT_READIOPS }, 169 { "writeiops", RACCT_WRITEIOPS }, 170 { NULL, -1 }}; 171 172 static struct dict actionnames[] = { 173 { "sighup", RCTL_ACTION_SIGHUP }, 174 { "sigint", RCTL_ACTION_SIGINT }, 175 { "sigquit", RCTL_ACTION_SIGQUIT }, 176 { "sigill", RCTL_ACTION_SIGILL }, 177 { "sigtrap", RCTL_ACTION_SIGTRAP }, 178 { "sigabrt", RCTL_ACTION_SIGABRT }, 179 { "sigemt", RCTL_ACTION_SIGEMT }, 180 { "sigfpe", RCTL_ACTION_SIGFPE }, 181 { "sigkill", RCTL_ACTION_SIGKILL }, 182 { "sigbus", RCTL_ACTION_SIGBUS }, 183 { "sigsegv", RCTL_ACTION_SIGSEGV }, 184 { "sigsys", RCTL_ACTION_SIGSYS }, 185 { "sigpipe", RCTL_ACTION_SIGPIPE }, 186 { "sigalrm", RCTL_ACTION_SIGALRM }, 187 { "sigterm", RCTL_ACTION_SIGTERM }, 188 { "sigurg", RCTL_ACTION_SIGURG }, 189 { "sigstop", RCTL_ACTION_SIGSTOP }, 190 { "sigtstp", RCTL_ACTION_SIGTSTP }, 191 { "sigchld", RCTL_ACTION_SIGCHLD }, 192 { "sigttin", RCTL_ACTION_SIGTTIN }, 193 { "sigttou", RCTL_ACTION_SIGTTOU }, 194 { "sigio", RCTL_ACTION_SIGIO }, 195 { "sigxcpu", RCTL_ACTION_SIGXCPU }, 196 { "sigxfsz", RCTL_ACTION_SIGXFSZ }, 197 { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, 198 { "sigprof", RCTL_ACTION_SIGPROF }, 199 { "sigwinch", RCTL_ACTION_SIGWINCH }, 200 { "siginfo", RCTL_ACTION_SIGINFO }, 201 { "sigusr1", RCTL_ACTION_SIGUSR1 }, 202 { "sigusr2", RCTL_ACTION_SIGUSR2 }, 203 { "sigthr", RCTL_ACTION_SIGTHR }, 204 { "deny", RCTL_ACTION_DENY }, 205 { "log", RCTL_ACTION_LOG }, 206 { "devctl", RCTL_ACTION_DEVCTL }, 207 { "throttle", RCTL_ACTION_THROTTLE }, 208 { NULL, -1 }}; 209 210 static void rctl_init(void); 211 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); 212 213 static uma_zone_t rctl_rule_link_zone; 214 static uma_zone_t rctl_rule_zone; 215 static struct rwlock rctl_lock; 216 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock"); 217 218 #define RCTL_RLOCK() rw_rlock(&rctl_lock) 219 #define RCTL_RUNLOCK() rw_runlock(&rctl_lock) 220 #define RCTL_WLOCK() rw_wlock(&rctl_lock) 221 #define RCTL_WUNLOCK() rw_wunlock(&rctl_lock) 222 #define RCTL_LOCK_ASSERT() rw_assert(&rctl_lock, RA_LOCKED) 223 #define RCTL_WLOCK_ASSERT() rw_assert(&rctl_lock, RA_WLOCKED) 224 225 static int rctl_rule_fully_specified(const struct rctl_rule *rule); 226 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); 227 228 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); 229 230 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS) 231 { 232 int val = rctl_throttle_min; 233 int error; 234 235 error = sysctl_handle_int(oidp, &val, 0, req); 236 if (error || !req->newptr) 237 return (error); 238 if (val < 1 || val > rctl_throttle_max) 239 return (EINVAL); 240 241 RCTL_WLOCK(); 242 rctl_throttle_min = val; 243 RCTL_WUNLOCK(); 244 245 return (0); 246 } 247 248 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS) 249 { 250 int val = rctl_throttle_max; 251 int error; 252 253 error = sysctl_handle_int(oidp, &val, 0, req); 254 if (error || !req->newptr) 255 return (error); 256 if (val < rctl_throttle_min) 257 return (EINVAL); 258 259 RCTL_WLOCK(); 260 rctl_throttle_max = val; 261 RCTL_WUNLOCK(); 262 263 return (0); 264 } 265 266 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS) 267 { 268 int val = rctl_throttle_pct; 269 int error; 270 271 error = sysctl_handle_int(oidp, &val, 0, req); 272 if (error || !req->newptr) 273 return (error); 274 if (val < 0) 275 return (EINVAL); 276 277 RCTL_WLOCK(); 278 rctl_throttle_pct = val; 279 RCTL_WUNLOCK(); 280 281 return (0); 282 } 283 284 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS) 285 { 286 int val = rctl_throttle_pct2; 287 int error; 288 289 error = sysctl_handle_int(oidp, &val, 0, req); 290 if (error || !req->newptr) 291 return (error); 292 if (val < 0) 293 return (EINVAL); 294 295 RCTL_WLOCK(); 296 rctl_throttle_pct2 = val; 297 RCTL_WUNLOCK(); 298 299 return (0); 300 } 301 302 static const char * 303 rctl_subject_type_name(int subject) 304 { 305 int i; 306 307 for (i = 0; subjectnames[i].d_name != NULL; i++) { 308 if (subjectnames[i].d_value == subject) 309 return (subjectnames[i].d_name); 310 } 311 312 panic("rctl_subject_type_name: unknown subject type %d", subject); 313 } 314 315 static const char * 316 rctl_action_name(int action) 317 { 318 int i; 319 320 for (i = 0; actionnames[i].d_name != NULL; i++) { 321 if (actionnames[i].d_value == action) 322 return (actionnames[i].d_name); 323 } 324 325 panic("rctl_action_name: unknown action %d", action); 326 } 327 328 const char * 329 rctl_resource_name(int resource) 330 { 331 int i; 332 333 for (i = 0; resourcenames[i].d_name != NULL; i++) { 334 if (resourcenames[i].d_value == resource) 335 return (resourcenames[i].d_name); 336 } 337 338 panic("rctl_resource_name: unknown resource %d", resource); 339 } 340 341 static struct racct * 342 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule) 343 { 344 struct ucred *cred = p->p_ucred; 345 346 ASSERT_RACCT_ENABLED(); 347 RCTL_LOCK_ASSERT(); 348 349 switch (rule->rr_per) { 350 case RCTL_SUBJECT_TYPE_PROCESS: 351 return (p->p_racct); 352 case RCTL_SUBJECT_TYPE_USER: 353 return (cred->cr_ruidinfo->ui_racct); 354 case RCTL_SUBJECT_TYPE_LOGINCLASS: 355 return (cred->cr_loginclass->lc_racct); 356 case RCTL_SUBJECT_TYPE_JAIL: 357 return (cred->cr_prison->pr_prison_racct->prr_racct); 358 default: 359 panic("%s: unknown per %d", __func__, rule->rr_per); 360 } 361 } 362 363 /* 364 * Return the amount of resource that can be allocated by 'p' before 365 * hitting 'rule'. 366 */ 367 static int64_t 368 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) 369 { 370 int64_t available; 371 const struct racct *racct; 372 373 ASSERT_RACCT_ENABLED(); 374 RCTL_LOCK_ASSERT(); 375 376 racct = rctl_proc_rule_to_racct(p, rule); 377 available = rule->rr_amount - racct->r_resources[rule->rr_resource]; 378 379 return (available); 380 } 381 382 /* 383 * Called every second for proc, uidinfo, loginclass, and jail containers. 384 * If the limit isn't exceeded, it decreases the usage amount to zero. 385 * Otherwise, it decreases it by the value of the limit. This way 386 * resource consumption exceeding the limit "carries over" to the next 387 * period. 388 */ 389 void 390 rctl_throttle_decay(struct racct *racct, int resource) 391 { 392 struct rctl_rule *rule; 393 struct rctl_rule_link *link; 394 int64_t minavailable; 395 396 ASSERT_RACCT_ENABLED(); 397 398 minavailable = INT64_MAX; 399 400 RCTL_RLOCK(); 401 402 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 403 rule = link->rrl_rule; 404 405 if (rule->rr_resource != resource) 406 continue; 407 if (rule->rr_action != RCTL_ACTION_THROTTLE) 408 continue; 409 410 if (rule->rr_amount < minavailable) 411 minavailable = rule->rr_amount; 412 } 413 414 RCTL_RUNLOCK(); 415 416 if (racct->r_resources[resource] < minavailable) { 417 racct->r_resources[resource] = 0; 418 } else { 419 /* 420 * Cap utilization counter at ten times the limit. Otherwise, 421 * if we changed the rule lowering the allowed amount, it could 422 * take unreasonably long time for the accumulated resource 423 * usage to drop. 424 */ 425 if (racct->r_resources[resource] > minavailable * 10) 426 racct->r_resources[resource] = minavailable * 10; 427 428 racct->r_resources[resource] -= minavailable; 429 } 430 } 431 432 /* 433 * Special version of rctl_get_available() for the %CPU resource. 434 * We slightly cheat here and return less than we normally would. 435 */ 436 int64_t 437 rctl_pcpu_available(const struct proc *p) { 438 struct rctl_rule *rule; 439 struct rctl_rule_link *link; 440 int64_t available, minavailable, limit; 441 442 ASSERT_RACCT_ENABLED(); 443 444 minavailable = INT64_MAX; 445 limit = 0; 446 447 RCTL_RLOCK(); 448 449 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 450 rule = link->rrl_rule; 451 if (rule->rr_resource != RACCT_PCTCPU) 452 continue; 453 if (rule->rr_action != RCTL_ACTION_DENY) 454 continue; 455 available = rctl_available_resource(p, rule); 456 if (available < minavailable) { 457 minavailable = available; 458 limit = rule->rr_amount; 459 } 460 } 461 462 RCTL_RUNLOCK(); 463 464 /* 465 * Return slightly less than actual value of the available 466 * %cpu resource. This makes %cpu throttling more agressive 467 * and lets us act sooner than the limits are already exceeded. 468 */ 469 if (limit != 0) { 470 if (limit > 2 * RCTL_PCPU_SHIFT) 471 minavailable -= RCTL_PCPU_SHIFT; 472 else 473 minavailable -= (limit / 2); 474 } 475 476 return (minavailable); 477 } 478 479 static uint64_t 480 xadd(uint64_t a, uint64_t b) 481 { 482 uint64_t c; 483 484 c = a + b; 485 486 /* 487 * Detect overflow. 488 */ 489 if (c < a || c < b) 490 return (UINT64_MAX); 491 492 return (c); 493 } 494 495 static uint64_t 496 xmul(uint64_t a, uint64_t b) 497 { 498 499 if (b != 0 && a > UINT64_MAX / b) 500 return (UINT64_MAX); 501 502 return (a * b); 503 } 504 505 /* 506 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition 507 * to what it keeps allocated now. Returns non-zero if the allocation should 508 * be denied, 0 otherwise. 509 */ 510 int 511 rctl_enforce(struct proc *p, int resource, uint64_t amount) 512 { 513 static struct timeval log_lasttime, devctl_lasttime; 514 static int log_curtime = 0, devctl_curtime = 0; 515 struct rctl_rule *rule; 516 struct rctl_rule_link *link; 517 struct sbuf sb; 518 int64_t available; 519 uint64_t sleep_ms, sleep_ratio; 520 int should_deny = 0; 521 char *buf; 522 523 524 ASSERT_RACCT_ENABLED(); 525 526 RCTL_RLOCK(); 527 528 /* 529 * There may be more than one matching rule; go through all of them. 530 * Denial should be done last, after logging and sending signals. 531 */ 532 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 533 rule = link->rrl_rule; 534 if (rule->rr_resource != resource) 535 continue; 536 537 available = rctl_available_resource(p, rule); 538 if (available >= (int64_t)amount) { 539 link->rrl_exceeded = 0; 540 continue; 541 } 542 543 switch (rule->rr_action) { 544 case RCTL_ACTION_DENY: 545 should_deny = 1; 546 continue; 547 case RCTL_ACTION_LOG: 548 /* 549 * If rrl_exceeded != 0, it means we've already 550 * logged a warning for this process. 551 */ 552 if (link->rrl_exceeded != 0) 553 continue; 554 555 /* 556 * If the process state is not fully initialized yet, 557 * we can't access most of the required fields, e.g. 558 * p->p_comm. This happens when called from fork1(). 559 * Ignore this rule for now; it will be processed just 560 * after fork, when called from racct_proc_fork_done(). 561 */ 562 if (p->p_state != PRS_NORMAL) 563 continue; 564 565 if (!ppsratecheck(&log_lasttime, &log_curtime, 566 rctl_log_rate_limit)) 567 continue; 568 569 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 570 if (buf == NULL) { 571 printf("rctl_enforce: out of memory\n"); 572 continue; 573 } 574 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 575 rctl_rule_to_sbuf(&sb, rule); 576 sbuf_finish(&sb); 577 printf("rctl: rule \"%s\" matched by pid %d " 578 "(%s), uid %d, jail %s\n", sbuf_data(&sb), 579 p->p_pid, p->p_comm, p->p_ucred->cr_uid, 580 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 581 sbuf_delete(&sb); 582 free(buf, M_RCTL); 583 link->rrl_exceeded = 1; 584 continue; 585 case RCTL_ACTION_DEVCTL: 586 if (link->rrl_exceeded != 0) 587 continue; 588 589 if (p->p_state != PRS_NORMAL) 590 continue; 591 592 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime, 593 rctl_devctl_rate_limit)) 594 continue; 595 596 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); 597 if (buf == NULL) { 598 printf("rctl_enforce: out of memory\n"); 599 continue; 600 } 601 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); 602 sbuf_printf(&sb, "rule="); 603 rctl_rule_to_sbuf(&sb, rule); 604 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", 605 p->p_pid, p->p_ucred->cr_ruid, 606 p->p_ucred->cr_prison->pr_prison_racct->prr_name); 607 sbuf_finish(&sb); 608 devctl_notify_f("RCTL", "rule", "matched", 609 sbuf_data(&sb), M_NOWAIT); 610 sbuf_delete(&sb); 611 free(buf, M_RCTL); 612 link->rrl_exceeded = 1; 613 continue; 614 case RCTL_ACTION_THROTTLE: 615 if (p->p_state != PRS_NORMAL) 616 continue; 617 618 /* 619 * Make the process sleep for a fraction of second 620 * proportional to the ratio of process' resource 621 * utilization compared to the limit. The point is 622 * to penalize resource hogs: processes that consume 623 * more of the available resources sleep for longer. 624 * 625 * We're trying to defer division until the very end, 626 * to minimize the rounding effects. The following 627 * calculation could have been written in a clearer 628 * way like this: 629 * 630 * sleep_ms = hz * p->p_racct->r_resources[resource] / 631 * rule->rr_amount; 632 * sleep_ms *= rctl_throttle_pct / 100; 633 * if (sleep_ms < rctl_throttle_min) 634 * sleep_ms = rctl_throttle_min; 635 * 636 */ 637 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]); 638 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100; 639 if (sleep_ms < rctl_throttle_min * rule->rr_amount) 640 sleep_ms = rctl_throttle_min * rule->rr_amount; 641 642 /* 643 * Multiply that by the ratio of the resource 644 * consumption for the container compared to the limit, 645 * squared. In other words, a process in a container 646 * that is two times over the limit will be throttled 647 * four times as much for hitting the same rule. The 648 * point is to penalize processes more if the container 649 * itself (eg certain UID or jail) is above the limit. 650 */ 651 if (available < 0) 652 sleep_ratio = -available / rule->rr_amount; 653 else 654 sleep_ratio = 0; 655 sleep_ratio = xmul(sleep_ratio, sleep_ratio); 656 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100; 657 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio)); 658 659 /* 660 * Finally the division. 661 */ 662 sleep_ms /= rule->rr_amount; 663 664 if (sleep_ms > rctl_throttle_max) 665 sleep_ms = rctl_throttle_max; 666 #if 0 667 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ld ms (ratio %ld, available %ld)\n", 668 __func__, p->p_pid, p->p_comm, 669 p->p_racct->r_resources[resource], 670 rule->rr_amount, sleep_ms, sleep_ratio, available); 671 #endif 672 673 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n", 674 __func__, (uintmax_t)sleep_ms, rctl_throttle_min)); 675 racct_proc_throttle(p, sleep_ms); 676 continue; 677 default: 678 if (link->rrl_exceeded != 0) 679 continue; 680 681 if (p->p_state != PRS_NORMAL) 682 continue; 683 684 KASSERT(rule->rr_action > 0 && 685 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, 686 ("rctl_enforce: unknown action %d", 687 rule->rr_action)); 688 689 /* 690 * We're using the fact that RCTL_ACTION_SIG* values 691 * are equal to their counterparts from sys/signal.h. 692 */ 693 kern_psignal(p, rule->rr_action); 694 link->rrl_exceeded = 1; 695 continue; 696 } 697 } 698 699 RCTL_RUNLOCK(); 700 701 if (should_deny) { 702 /* 703 * Return fake error code; the caller should change it 704 * into one proper for the situation - EFSIZ, ENOMEM etc. 705 */ 706 return (EDOOFUS); 707 } 708 709 return (0); 710 } 711 712 uint64_t 713 rctl_get_limit(struct proc *p, int resource) 714 { 715 struct rctl_rule *rule; 716 struct rctl_rule_link *link; 717 uint64_t amount = UINT64_MAX; 718 719 ASSERT_RACCT_ENABLED(); 720 721 RCTL_RLOCK(); 722 723 /* 724 * There may be more than one matching rule; go through all of them. 725 * Denial should be done last, after logging and sending signals. 726 */ 727 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 728 rule = link->rrl_rule; 729 if (rule->rr_resource != resource) 730 continue; 731 if (rule->rr_action != RCTL_ACTION_DENY) 732 continue; 733 if (rule->rr_amount < amount) 734 amount = rule->rr_amount; 735 } 736 737 RCTL_RUNLOCK(); 738 739 return (amount); 740 } 741 742 uint64_t 743 rctl_get_available(struct proc *p, int resource) 744 { 745 struct rctl_rule *rule; 746 struct rctl_rule_link *link; 747 int64_t available, minavailable, allocated; 748 749 minavailable = INT64_MAX; 750 751 ASSERT_RACCT_ENABLED(); 752 753 RCTL_RLOCK(); 754 755 /* 756 * There may be more than one matching rule; go through all of them. 757 * Denial should be done last, after logging and sending signals. 758 */ 759 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 760 rule = link->rrl_rule; 761 if (rule->rr_resource != resource) 762 continue; 763 if (rule->rr_action != RCTL_ACTION_DENY) 764 continue; 765 available = rctl_available_resource(p, rule); 766 if (available < minavailable) 767 minavailable = available; 768 } 769 770 RCTL_RUNLOCK(); 771 772 /* 773 * XXX: Think about this _hard_. 774 */ 775 allocated = p->p_racct->r_resources[resource]; 776 if (minavailable < INT64_MAX - allocated) 777 minavailable += allocated; 778 if (minavailable < 0) 779 minavailable = 0; 780 return (minavailable); 781 } 782 783 static int 784 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) 785 { 786 787 ASSERT_RACCT_ENABLED(); 788 789 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { 790 if (rule->rr_subject_type != filter->rr_subject_type) 791 return (0); 792 793 switch (filter->rr_subject_type) { 794 case RCTL_SUBJECT_TYPE_PROCESS: 795 if (filter->rr_subject.rs_proc != NULL && 796 rule->rr_subject.rs_proc != 797 filter->rr_subject.rs_proc) 798 return (0); 799 break; 800 case RCTL_SUBJECT_TYPE_USER: 801 if (filter->rr_subject.rs_uip != NULL && 802 rule->rr_subject.rs_uip != 803 filter->rr_subject.rs_uip) 804 return (0); 805 break; 806 case RCTL_SUBJECT_TYPE_LOGINCLASS: 807 if (filter->rr_subject.rs_loginclass != NULL && 808 rule->rr_subject.rs_loginclass != 809 filter->rr_subject.rs_loginclass) 810 return (0); 811 break; 812 case RCTL_SUBJECT_TYPE_JAIL: 813 if (filter->rr_subject.rs_prison_racct != NULL && 814 rule->rr_subject.rs_prison_racct != 815 filter->rr_subject.rs_prison_racct) 816 return (0); 817 break; 818 default: 819 panic("rctl_rule_matches: unknown subject type %d", 820 filter->rr_subject_type); 821 } 822 } 823 824 if (filter->rr_resource != RACCT_UNDEFINED) { 825 if (rule->rr_resource != filter->rr_resource) 826 return (0); 827 } 828 829 if (filter->rr_action != RCTL_ACTION_UNDEFINED) { 830 if (rule->rr_action != filter->rr_action) 831 return (0); 832 } 833 834 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { 835 if (rule->rr_amount != filter->rr_amount) 836 return (0); 837 } 838 839 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { 840 if (rule->rr_per != filter->rr_per) 841 return (0); 842 } 843 844 return (1); 845 } 846 847 static int 848 str2value(const char *str, int *value, struct dict *table) 849 { 850 int i; 851 852 if (value == NULL) 853 return (EINVAL); 854 855 for (i = 0; table[i].d_name != NULL; i++) { 856 if (strcasecmp(table[i].d_name, str) == 0) { 857 *value = table[i].d_value; 858 return (0); 859 } 860 } 861 862 return (EINVAL); 863 } 864 865 static int 866 str2id(const char *str, id_t *value) 867 { 868 char *end; 869 870 if (str == NULL) 871 return (EINVAL); 872 873 *value = strtoul(str, &end, 10); 874 if ((size_t)(end - str) != strlen(str)) 875 return (EINVAL); 876 877 return (0); 878 } 879 880 static int 881 str2int64(const char *str, int64_t *value) 882 { 883 char *end; 884 885 if (str == NULL) 886 return (EINVAL); 887 888 *value = strtoul(str, &end, 10); 889 if ((size_t)(end - str) != strlen(str)) 890 return (EINVAL); 891 892 if (*value < 0) 893 return (ERANGE); 894 895 return (0); 896 } 897 898 /* 899 * Connect the rule to the racct, increasing refcount for the rule. 900 */ 901 static void 902 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) 903 { 904 struct rctl_rule_link *link; 905 906 ASSERT_RACCT_ENABLED(); 907 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 908 909 rctl_rule_acquire(rule); 910 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 911 link->rrl_rule = rule; 912 link->rrl_exceeded = 0; 913 914 RCTL_WLOCK(); 915 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 916 RCTL_WUNLOCK(); 917 } 918 919 static int 920 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) 921 { 922 struct rctl_rule_link *link; 923 924 ASSERT_RACCT_ENABLED(); 925 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 926 RCTL_WLOCK_ASSERT(); 927 928 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); 929 if (link == NULL) 930 return (ENOMEM); 931 rctl_rule_acquire(rule); 932 link->rrl_rule = rule; 933 link->rrl_exceeded = 0; 934 935 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); 936 return (0); 937 } 938 939 /* 940 * Remove limits for a rules matching the filter and release 941 * the refcounts for the rules, possibly freeing them. Returns 942 * the number of limit structures removed. 943 */ 944 static int 945 rctl_racct_remove_rules(struct racct *racct, 946 const struct rctl_rule *filter) 947 { 948 int removed = 0; 949 struct rctl_rule_link *link, *linktmp; 950 951 ASSERT_RACCT_ENABLED(); 952 RCTL_WLOCK_ASSERT(); 953 954 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { 955 if (!rctl_rule_matches(link->rrl_rule, filter)) 956 continue; 957 958 LIST_REMOVE(link, rrl_next); 959 rctl_rule_release(link->rrl_rule); 960 uma_zfree(rctl_rule_link_zone, link); 961 removed++; 962 } 963 return (removed); 964 } 965 966 static void 967 rctl_rule_acquire_subject(struct rctl_rule *rule) 968 { 969 970 ASSERT_RACCT_ENABLED(); 971 972 switch (rule->rr_subject_type) { 973 case RCTL_SUBJECT_TYPE_UNDEFINED: 974 case RCTL_SUBJECT_TYPE_PROCESS: 975 break; 976 case RCTL_SUBJECT_TYPE_JAIL: 977 if (rule->rr_subject.rs_prison_racct != NULL) 978 prison_racct_hold(rule->rr_subject.rs_prison_racct); 979 break; 980 case RCTL_SUBJECT_TYPE_USER: 981 if (rule->rr_subject.rs_uip != NULL) 982 uihold(rule->rr_subject.rs_uip); 983 break; 984 case RCTL_SUBJECT_TYPE_LOGINCLASS: 985 if (rule->rr_subject.rs_loginclass != NULL) 986 loginclass_hold(rule->rr_subject.rs_loginclass); 987 break; 988 default: 989 panic("rctl_rule_acquire_subject: unknown subject type %d", 990 rule->rr_subject_type); 991 } 992 } 993 994 static void 995 rctl_rule_release_subject(struct rctl_rule *rule) 996 { 997 998 ASSERT_RACCT_ENABLED(); 999 1000 switch (rule->rr_subject_type) { 1001 case RCTL_SUBJECT_TYPE_UNDEFINED: 1002 case RCTL_SUBJECT_TYPE_PROCESS: 1003 break; 1004 case RCTL_SUBJECT_TYPE_JAIL: 1005 if (rule->rr_subject.rs_prison_racct != NULL) 1006 prison_racct_free(rule->rr_subject.rs_prison_racct); 1007 break; 1008 case RCTL_SUBJECT_TYPE_USER: 1009 if (rule->rr_subject.rs_uip != NULL) 1010 uifree(rule->rr_subject.rs_uip); 1011 break; 1012 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1013 if (rule->rr_subject.rs_loginclass != NULL) 1014 loginclass_free(rule->rr_subject.rs_loginclass); 1015 break; 1016 default: 1017 panic("rctl_rule_release_subject: unknown subject type %d", 1018 rule->rr_subject_type); 1019 } 1020 } 1021 1022 struct rctl_rule * 1023 rctl_rule_alloc(int flags) 1024 { 1025 struct rctl_rule *rule; 1026 1027 ASSERT_RACCT_ENABLED(); 1028 1029 rule = uma_zalloc(rctl_rule_zone, flags); 1030 if (rule == NULL) 1031 return (NULL); 1032 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1033 rule->rr_subject.rs_proc = NULL; 1034 rule->rr_subject.rs_uip = NULL; 1035 rule->rr_subject.rs_loginclass = NULL; 1036 rule->rr_subject.rs_prison_racct = NULL; 1037 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1038 rule->rr_resource = RACCT_UNDEFINED; 1039 rule->rr_action = RCTL_ACTION_UNDEFINED; 1040 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1041 refcount_init(&rule->rr_refcount, 1); 1042 1043 return (rule); 1044 } 1045 1046 struct rctl_rule * 1047 rctl_rule_duplicate(const struct rctl_rule *rule, int flags) 1048 { 1049 struct rctl_rule *copy; 1050 1051 ASSERT_RACCT_ENABLED(); 1052 1053 copy = uma_zalloc(rctl_rule_zone, flags); 1054 if (copy == NULL) 1055 return (NULL); 1056 copy->rr_subject_type = rule->rr_subject_type; 1057 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; 1058 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; 1059 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; 1060 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; 1061 copy->rr_per = rule->rr_per; 1062 copy->rr_resource = rule->rr_resource; 1063 copy->rr_action = rule->rr_action; 1064 copy->rr_amount = rule->rr_amount; 1065 refcount_init(©->rr_refcount, 1); 1066 rctl_rule_acquire_subject(copy); 1067 1068 return (copy); 1069 } 1070 1071 void 1072 rctl_rule_acquire(struct rctl_rule *rule) 1073 { 1074 1075 ASSERT_RACCT_ENABLED(); 1076 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1077 1078 refcount_acquire(&rule->rr_refcount); 1079 } 1080 1081 static void 1082 rctl_rule_free(void *context, int pending) 1083 { 1084 struct rctl_rule *rule; 1085 1086 rule = (struct rctl_rule *)context; 1087 1088 ASSERT_RACCT_ENABLED(); 1089 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); 1090 1091 /* 1092 * We don't need locking here; rule is guaranteed to be inaccessible. 1093 */ 1094 1095 rctl_rule_release_subject(rule); 1096 uma_zfree(rctl_rule_zone, rule); 1097 } 1098 1099 void 1100 rctl_rule_release(struct rctl_rule *rule) 1101 { 1102 1103 ASSERT_RACCT_ENABLED(); 1104 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); 1105 1106 if (refcount_release(&rule->rr_refcount)) { 1107 /* 1108 * rctl_rule_release() is often called when iterating 1109 * over all the uidinfo structures in the system, 1110 * holding uihashtbl_lock. Since rctl_rule_free() 1111 * might end up calling uifree(), this would lead 1112 * to lock recursion. Use taskqueue to avoid this. 1113 */ 1114 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); 1115 taskqueue_enqueue(taskqueue_thread, &rule->rr_task); 1116 } 1117 } 1118 1119 static int 1120 rctl_rule_fully_specified(const struct rctl_rule *rule) 1121 { 1122 1123 ASSERT_RACCT_ENABLED(); 1124 1125 switch (rule->rr_subject_type) { 1126 case RCTL_SUBJECT_TYPE_UNDEFINED: 1127 return (0); 1128 case RCTL_SUBJECT_TYPE_PROCESS: 1129 if (rule->rr_subject.rs_proc == NULL) 1130 return (0); 1131 break; 1132 case RCTL_SUBJECT_TYPE_USER: 1133 if (rule->rr_subject.rs_uip == NULL) 1134 return (0); 1135 break; 1136 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1137 if (rule->rr_subject.rs_loginclass == NULL) 1138 return (0); 1139 break; 1140 case RCTL_SUBJECT_TYPE_JAIL: 1141 if (rule->rr_subject.rs_prison_racct == NULL) 1142 return (0); 1143 break; 1144 default: 1145 panic("rctl_rule_fully_specified: unknown subject type %d", 1146 rule->rr_subject_type); 1147 } 1148 if (rule->rr_resource == RACCT_UNDEFINED) 1149 return (0); 1150 if (rule->rr_action == RCTL_ACTION_UNDEFINED) 1151 return (0); 1152 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) 1153 return (0); 1154 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) 1155 return (0); 1156 1157 return (1); 1158 } 1159 1160 static int 1161 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) 1162 { 1163 int error = 0; 1164 char *subjectstr, *subject_idstr, *resourcestr, *actionstr, 1165 *amountstr, *perstr; 1166 struct rctl_rule *rule; 1167 id_t id; 1168 1169 ASSERT_RACCT_ENABLED(); 1170 1171 rule = rctl_rule_alloc(M_WAITOK); 1172 1173 subjectstr = strsep(&rulestr, ":"); 1174 subject_idstr = strsep(&rulestr, ":"); 1175 resourcestr = strsep(&rulestr, ":"); 1176 actionstr = strsep(&rulestr, "=/"); 1177 amountstr = strsep(&rulestr, "/"); 1178 perstr = rulestr; 1179 1180 if (subjectstr == NULL || subjectstr[0] == '\0') 1181 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; 1182 else { 1183 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); 1184 if (error != 0) 1185 goto out; 1186 } 1187 1188 if (subject_idstr == NULL || subject_idstr[0] == '\0') { 1189 rule->rr_subject.rs_proc = NULL; 1190 rule->rr_subject.rs_uip = NULL; 1191 rule->rr_subject.rs_loginclass = NULL; 1192 rule->rr_subject.rs_prison_racct = NULL; 1193 } else { 1194 switch (rule->rr_subject_type) { 1195 case RCTL_SUBJECT_TYPE_UNDEFINED: 1196 error = EINVAL; 1197 goto out; 1198 case RCTL_SUBJECT_TYPE_PROCESS: 1199 error = str2id(subject_idstr, &id); 1200 if (error != 0) 1201 goto out; 1202 sx_assert(&allproc_lock, SA_LOCKED); 1203 rule->rr_subject.rs_proc = pfind(id); 1204 if (rule->rr_subject.rs_proc == NULL) { 1205 error = ESRCH; 1206 goto out; 1207 } 1208 PROC_UNLOCK(rule->rr_subject.rs_proc); 1209 break; 1210 case RCTL_SUBJECT_TYPE_USER: 1211 error = str2id(subject_idstr, &id); 1212 if (error != 0) 1213 goto out; 1214 rule->rr_subject.rs_uip = uifind(id); 1215 break; 1216 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1217 rule->rr_subject.rs_loginclass = 1218 loginclass_find(subject_idstr); 1219 if (rule->rr_subject.rs_loginclass == NULL) { 1220 error = ENAMETOOLONG; 1221 goto out; 1222 } 1223 break; 1224 case RCTL_SUBJECT_TYPE_JAIL: 1225 rule->rr_subject.rs_prison_racct = 1226 prison_racct_find(subject_idstr); 1227 if (rule->rr_subject.rs_prison_racct == NULL) { 1228 error = ENAMETOOLONG; 1229 goto out; 1230 } 1231 break; 1232 default: 1233 panic("rctl_string_to_rule: unknown subject type %d", 1234 rule->rr_subject_type); 1235 } 1236 } 1237 1238 if (resourcestr == NULL || resourcestr[0] == '\0') 1239 rule->rr_resource = RACCT_UNDEFINED; 1240 else { 1241 error = str2value(resourcestr, &rule->rr_resource, 1242 resourcenames); 1243 if (error != 0) 1244 goto out; 1245 } 1246 1247 if (actionstr == NULL || actionstr[0] == '\0') 1248 rule->rr_action = RCTL_ACTION_UNDEFINED; 1249 else { 1250 error = str2value(actionstr, &rule->rr_action, actionnames); 1251 if (error != 0) 1252 goto out; 1253 } 1254 1255 if (amountstr == NULL || amountstr[0] == '\0') 1256 rule->rr_amount = RCTL_AMOUNT_UNDEFINED; 1257 else { 1258 error = str2int64(amountstr, &rule->rr_amount); 1259 if (error != 0) 1260 goto out; 1261 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) { 1262 if (rule->rr_amount > INT64_MAX / 1000000) { 1263 error = ERANGE; 1264 goto out; 1265 } 1266 rule->rr_amount *= 1000000; 1267 } 1268 } 1269 1270 if (perstr == NULL || perstr[0] == '\0') 1271 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; 1272 else { 1273 error = str2value(perstr, &rule->rr_per, subjectnames); 1274 if (error != 0) 1275 goto out; 1276 } 1277 1278 out: 1279 if (error == 0) 1280 *rulep = rule; 1281 else 1282 rctl_rule_release(rule); 1283 1284 return (error); 1285 } 1286 1287 /* 1288 * Link a rule with all the subjects it applies to. 1289 */ 1290 int 1291 rctl_rule_add(struct rctl_rule *rule) 1292 { 1293 struct proc *p; 1294 struct ucred *cred; 1295 struct uidinfo *uip; 1296 struct prison *pr; 1297 struct prison_racct *prr; 1298 struct loginclass *lc; 1299 struct rctl_rule *rule2; 1300 int match; 1301 1302 ASSERT_RACCT_ENABLED(); 1303 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); 1304 1305 /* 1306 * Some rules just don't make sense, like "deny" rule for an undeniable 1307 * resource. The exception are the RSS and %CPU resources - they are 1308 * not deniable in the racct sense, but the limit is enforced in 1309 * a different way. 1310 */ 1311 if (rule->rr_action == RCTL_ACTION_DENY && 1312 !RACCT_IS_DENIABLE(rule->rr_resource) && 1313 rule->rr_resource != RACCT_RSS && 1314 rule->rr_resource != RACCT_PCTCPU) { 1315 return (EOPNOTSUPP); 1316 } 1317 1318 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1319 !RACCT_IS_DECAYING(rule->rr_resource)) { 1320 return (EOPNOTSUPP); 1321 } 1322 1323 if (rule->rr_action == RCTL_ACTION_THROTTLE && 1324 rule->rr_resource == RACCT_PCTCPU) { 1325 return (EOPNOTSUPP); 1326 } 1327 1328 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && 1329 RACCT_IS_SLOPPY(rule->rr_resource)) { 1330 return (EOPNOTSUPP); 1331 } 1332 1333 /* 1334 * Make sure there are no duplicated rules. Also, for the "deny" 1335 * rules, remove ones differing only by "amount". 1336 */ 1337 if (rule->rr_action == RCTL_ACTION_DENY) { 1338 rule2 = rctl_rule_duplicate(rule, M_WAITOK); 1339 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; 1340 rctl_rule_remove(rule2); 1341 rctl_rule_release(rule2); 1342 } else 1343 rctl_rule_remove(rule); 1344 1345 switch (rule->rr_subject_type) { 1346 case RCTL_SUBJECT_TYPE_PROCESS: 1347 p = rule->rr_subject.rs_proc; 1348 KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); 1349 1350 rctl_racct_add_rule(p->p_racct, rule); 1351 /* 1352 * In case of per-process rule, we don't have anything more 1353 * to do. 1354 */ 1355 return (0); 1356 1357 case RCTL_SUBJECT_TYPE_USER: 1358 uip = rule->rr_subject.rs_uip; 1359 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); 1360 rctl_racct_add_rule(uip->ui_racct, rule); 1361 break; 1362 1363 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1364 lc = rule->rr_subject.rs_loginclass; 1365 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); 1366 rctl_racct_add_rule(lc->lc_racct, rule); 1367 break; 1368 1369 case RCTL_SUBJECT_TYPE_JAIL: 1370 prr = rule->rr_subject.rs_prison_racct; 1371 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); 1372 rctl_racct_add_rule(prr->prr_racct, rule); 1373 break; 1374 1375 default: 1376 panic("rctl_rule_add: unknown subject type %d", 1377 rule->rr_subject_type); 1378 } 1379 1380 /* 1381 * Now go through all the processes and add the new rule to the ones 1382 * it applies to. 1383 */ 1384 sx_assert(&allproc_lock, SA_LOCKED); 1385 FOREACH_PROC_IN_SYSTEM(p) { 1386 cred = p->p_ucred; 1387 switch (rule->rr_subject_type) { 1388 case RCTL_SUBJECT_TYPE_USER: 1389 if (cred->cr_uidinfo == rule->rr_subject.rs_uip || 1390 cred->cr_ruidinfo == rule->rr_subject.rs_uip) 1391 break; 1392 continue; 1393 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1394 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) 1395 break; 1396 continue; 1397 case RCTL_SUBJECT_TYPE_JAIL: 1398 match = 0; 1399 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { 1400 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { 1401 match = 1; 1402 break; 1403 } 1404 } 1405 if (match) 1406 break; 1407 continue; 1408 default: 1409 panic("rctl_rule_add: unknown subject type %d", 1410 rule->rr_subject_type); 1411 } 1412 1413 rctl_racct_add_rule(p->p_racct, rule); 1414 } 1415 1416 return (0); 1417 } 1418 1419 static void 1420 rctl_rule_pre_callback(void) 1421 { 1422 1423 RCTL_WLOCK(); 1424 } 1425 1426 static void 1427 rctl_rule_post_callback(void) 1428 { 1429 1430 RCTL_WUNLOCK(); 1431 } 1432 1433 static void 1434 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) 1435 { 1436 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1437 int found = 0; 1438 1439 ASSERT_RACCT_ENABLED(); 1440 RCTL_WLOCK_ASSERT(); 1441 1442 found += rctl_racct_remove_rules(racct, filter); 1443 1444 *((int *)arg3) += found; 1445 } 1446 1447 /* 1448 * Remove all rules that match the filter. 1449 */ 1450 int 1451 rctl_rule_remove(struct rctl_rule *filter) 1452 { 1453 int found = 0; 1454 struct proc *p; 1455 1456 ASSERT_RACCT_ENABLED(); 1457 1458 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && 1459 filter->rr_subject.rs_proc != NULL) { 1460 p = filter->rr_subject.rs_proc; 1461 RCTL_WLOCK(); 1462 found = rctl_racct_remove_rules(p->p_racct, filter); 1463 RCTL_WUNLOCK(); 1464 if (found) 1465 return (0); 1466 return (ESRCH); 1467 } 1468 1469 loginclass_racct_foreach(rctl_rule_remove_callback, 1470 rctl_rule_pre_callback, rctl_rule_post_callback, 1471 filter, (void *)&found); 1472 ui_racct_foreach(rctl_rule_remove_callback, 1473 rctl_rule_pre_callback, rctl_rule_post_callback, 1474 filter, (void *)&found); 1475 prison_racct_foreach(rctl_rule_remove_callback, 1476 rctl_rule_pre_callback, rctl_rule_post_callback, 1477 filter, (void *)&found); 1478 1479 sx_assert(&allproc_lock, SA_LOCKED); 1480 RCTL_WLOCK(); 1481 FOREACH_PROC_IN_SYSTEM(p) { 1482 found += rctl_racct_remove_rules(p->p_racct, filter); 1483 } 1484 RCTL_WUNLOCK(); 1485 1486 if (found) 1487 return (0); 1488 return (ESRCH); 1489 } 1490 1491 /* 1492 * Appends a rule to the sbuf. 1493 */ 1494 static void 1495 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) 1496 { 1497 int64_t amount; 1498 1499 ASSERT_RACCT_ENABLED(); 1500 1501 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); 1502 1503 switch (rule->rr_subject_type) { 1504 case RCTL_SUBJECT_TYPE_PROCESS: 1505 if (rule->rr_subject.rs_proc == NULL) 1506 sbuf_printf(sb, ":"); 1507 else 1508 sbuf_printf(sb, "%d:", 1509 rule->rr_subject.rs_proc->p_pid); 1510 break; 1511 case RCTL_SUBJECT_TYPE_USER: 1512 if (rule->rr_subject.rs_uip == NULL) 1513 sbuf_printf(sb, ":"); 1514 else 1515 sbuf_printf(sb, "%d:", 1516 rule->rr_subject.rs_uip->ui_uid); 1517 break; 1518 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1519 if (rule->rr_subject.rs_loginclass == NULL) 1520 sbuf_printf(sb, ":"); 1521 else 1522 sbuf_printf(sb, "%s:", 1523 rule->rr_subject.rs_loginclass->lc_name); 1524 break; 1525 case RCTL_SUBJECT_TYPE_JAIL: 1526 if (rule->rr_subject.rs_prison_racct == NULL) 1527 sbuf_printf(sb, ":"); 1528 else 1529 sbuf_printf(sb, "%s:", 1530 rule->rr_subject.rs_prison_racct->prr_name); 1531 break; 1532 default: 1533 panic("rctl_rule_to_sbuf: unknown subject type %d", 1534 rule->rr_subject_type); 1535 } 1536 1537 amount = rule->rr_amount; 1538 if (amount != RCTL_AMOUNT_UNDEFINED && 1539 RACCT_IS_IN_MILLIONS(rule->rr_resource)) 1540 amount /= 1000000; 1541 1542 sbuf_printf(sb, "%s:%s=%jd", 1543 rctl_resource_name(rule->rr_resource), 1544 rctl_action_name(rule->rr_action), 1545 amount); 1546 1547 if (rule->rr_per != rule->rr_subject_type) 1548 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); 1549 } 1550 1551 /* 1552 * Routine used by RCTL syscalls to read in input string. 1553 */ 1554 static int 1555 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) 1556 { 1557 int error; 1558 char *str; 1559 1560 ASSERT_RACCT_ENABLED(); 1561 1562 if (inbuflen <= 0) 1563 return (EINVAL); 1564 if (inbuflen > RCTL_MAX_INBUFSIZE) 1565 return (E2BIG); 1566 1567 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); 1568 error = copyinstr(inbufp, str, inbuflen, NULL); 1569 if (error != 0) { 1570 free(str, M_RCTL); 1571 return (error); 1572 } 1573 1574 *inputstr = str; 1575 1576 return (0); 1577 } 1578 1579 /* 1580 * Routine used by RCTL syscalls to write out output string. 1581 */ 1582 static int 1583 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) 1584 { 1585 int error; 1586 1587 ASSERT_RACCT_ENABLED(); 1588 1589 if (outputsbuf == NULL) 1590 return (0); 1591 1592 sbuf_finish(outputsbuf); 1593 if (outbuflen < sbuf_len(outputsbuf) + 1) { 1594 sbuf_delete(outputsbuf); 1595 return (ERANGE); 1596 } 1597 error = copyout(sbuf_data(outputsbuf), outbufp, 1598 sbuf_len(outputsbuf) + 1); 1599 sbuf_delete(outputsbuf); 1600 return (error); 1601 } 1602 1603 static struct sbuf * 1604 rctl_racct_to_sbuf(struct racct *racct, int sloppy) 1605 { 1606 int i; 1607 int64_t amount; 1608 struct sbuf *sb; 1609 1610 ASSERT_RACCT_ENABLED(); 1611 1612 sb = sbuf_new_auto(); 1613 for (i = 0; i <= RACCT_MAX; i++) { 1614 if (sloppy == 0 && RACCT_IS_SLOPPY(i)) 1615 continue; 1616 amount = racct->r_resources[i]; 1617 if (RACCT_IS_IN_MILLIONS(i)) 1618 amount /= 1000000; 1619 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); 1620 } 1621 sbuf_setpos(sb, sbuf_len(sb) - 1); 1622 return (sb); 1623 } 1624 1625 int 1626 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 1627 { 1628 int error; 1629 char *inputstr; 1630 struct rctl_rule *filter; 1631 struct sbuf *outputsbuf = NULL; 1632 struct proc *p; 1633 struct uidinfo *uip; 1634 struct loginclass *lc; 1635 struct prison_racct *prr; 1636 1637 if (!racct_enable) 1638 return (ENOSYS); 1639 1640 error = priv_check(td, PRIV_RCTL_GET_RACCT); 1641 if (error != 0) 1642 return (error); 1643 1644 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1645 if (error != 0) 1646 return (error); 1647 1648 sx_slock(&allproc_lock); 1649 error = rctl_string_to_rule(inputstr, &filter); 1650 free(inputstr, M_RCTL); 1651 if (error != 0) { 1652 sx_sunlock(&allproc_lock); 1653 return (error); 1654 } 1655 1656 switch (filter->rr_subject_type) { 1657 case RCTL_SUBJECT_TYPE_PROCESS: 1658 p = filter->rr_subject.rs_proc; 1659 if (p == NULL) { 1660 error = EINVAL; 1661 goto out; 1662 } 1663 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); 1664 break; 1665 case RCTL_SUBJECT_TYPE_USER: 1666 uip = filter->rr_subject.rs_uip; 1667 if (uip == NULL) { 1668 error = EINVAL; 1669 goto out; 1670 } 1671 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); 1672 break; 1673 case RCTL_SUBJECT_TYPE_LOGINCLASS: 1674 lc = filter->rr_subject.rs_loginclass; 1675 if (lc == NULL) { 1676 error = EINVAL; 1677 goto out; 1678 } 1679 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); 1680 break; 1681 case RCTL_SUBJECT_TYPE_JAIL: 1682 prr = filter->rr_subject.rs_prison_racct; 1683 if (prr == NULL) { 1684 error = EINVAL; 1685 goto out; 1686 } 1687 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); 1688 break; 1689 default: 1690 error = EINVAL; 1691 } 1692 out: 1693 rctl_rule_release(filter); 1694 sx_sunlock(&allproc_lock); 1695 if (error != 0) 1696 return (error); 1697 1698 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); 1699 1700 return (error); 1701 } 1702 1703 static void 1704 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) 1705 { 1706 struct rctl_rule *filter = (struct rctl_rule *)arg2; 1707 struct rctl_rule_link *link; 1708 struct sbuf *sb = (struct sbuf *)arg3; 1709 1710 ASSERT_RACCT_ENABLED(); 1711 RCTL_LOCK_ASSERT(); 1712 1713 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { 1714 if (!rctl_rule_matches(link->rrl_rule, filter)) 1715 continue; 1716 rctl_rule_to_sbuf(sb, link->rrl_rule); 1717 sbuf_printf(sb, ","); 1718 } 1719 } 1720 1721 int 1722 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 1723 { 1724 int error; 1725 size_t bufsize; 1726 char *inputstr, *buf; 1727 struct sbuf *sb; 1728 struct rctl_rule *filter; 1729 struct rctl_rule_link *link; 1730 struct proc *p; 1731 1732 if (!racct_enable) 1733 return (ENOSYS); 1734 1735 error = priv_check(td, PRIV_RCTL_GET_RULES); 1736 if (error != 0) 1737 return (error); 1738 1739 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1740 if (error != 0) 1741 return (error); 1742 1743 sx_slock(&allproc_lock); 1744 error = rctl_string_to_rule(inputstr, &filter); 1745 free(inputstr, M_RCTL); 1746 if (error != 0) { 1747 sx_sunlock(&allproc_lock); 1748 return (error); 1749 } 1750 1751 bufsize = uap->outbuflen; 1752 if (bufsize > rctl_maxbufsize) { 1753 sx_sunlock(&allproc_lock); 1754 return (E2BIG); 1755 } 1756 1757 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1758 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1759 KASSERT(sb != NULL, ("sbuf_new failed")); 1760 1761 FOREACH_PROC_IN_SYSTEM(p) { 1762 RCTL_RLOCK(); 1763 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1764 /* 1765 * Non-process rules will be added to the buffer later. 1766 * Adding them here would result in duplicated output. 1767 */ 1768 if (link->rrl_rule->rr_subject_type != 1769 RCTL_SUBJECT_TYPE_PROCESS) 1770 continue; 1771 if (!rctl_rule_matches(link->rrl_rule, filter)) 1772 continue; 1773 rctl_rule_to_sbuf(sb, link->rrl_rule); 1774 sbuf_printf(sb, ","); 1775 } 1776 RCTL_RUNLOCK(); 1777 } 1778 1779 loginclass_racct_foreach(rctl_get_rules_callback, 1780 rctl_rule_pre_callback, rctl_rule_post_callback, 1781 filter, sb); 1782 ui_racct_foreach(rctl_get_rules_callback, 1783 rctl_rule_pre_callback, rctl_rule_post_callback, 1784 filter, sb); 1785 prison_racct_foreach(rctl_get_rules_callback, 1786 rctl_rule_pre_callback, rctl_rule_post_callback, 1787 filter, sb); 1788 if (sbuf_error(sb) == ENOMEM) { 1789 error = ERANGE; 1790 goto out; 1791 } 1792 1793 /* 1794 * Remove trailing ",". 1795 */ 1796 if (sbuf_len(sb) > 0) 1797 sbuf_setpos(sb, sbuf_len(sb) - 1); 1798 1799 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1800 out: 1801 rctl_rule_release(filter); 1802 sx_sunlock(&allproc_lock); 1803 free(buf, M_RCTL); 1804 return (error); 1805 } 1806 1807 int 1808 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 1809 { 1810 int error; 1811 size_t bufsize; 1812 char *inputstr, *buf; 1813 struct sbuf *sb; 1814 struct rctl_rule *filter; 1815 struct rctl_rule_link *link; 1816 1817 if (!racct_enable) 1818 return (ENOSYS); 1819 1820 error = priv_check(td, PRIV_RCTL_GET_LIMITS); 1821 if (error != 0) 1822 return (error); 1823 1824 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1825 if (error != 0) 1826 return (error); 1827 1828 sx_slock(&allproc_lock); 1829 error = rctl_string_to_rule(inputstr, &filter); 1830 free(inputstr, M_RCTL); 1831 if (error != 0) { 1832 sx_sunlock(&allproc_lock); 1833 return (error); 1834 } 1835 1836 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { 1837 rctl_rule_release(filter); 1838 sx_sunlock(&allproc_lock); 1839 return (EINVAL); 1840 } 1841 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { 1842 rctl_rule_release(filter); 1843 sx_sunlock(&allproc_lock); 1844 return (EOPNOTSUPP); 1845 } 1846 if (filter->rr_subject.rs_proc == NULL) { 1847 rctl_rule_release(filter); 1848 sx_sunlock(&allproc_lock); 1849 return (EINVAL); 1850 } 1851 1852 bufsize = uap->outbuflen; 1853 if (bufsize > rctl_maxbufsize) { 1854 rctl_rule_release(filter); 1855 sx_sunlock(&allproc_lock); 1856 return (E2BIG); 1857 } 1858 1859 buf = malloc(bufsize, M_RCTL, M_WAITOK); 1860 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); 1861 KASSERT(sb != NULL, ("sbuf_new failed")); 1862 1863 RCTL_RLOCK(); 1864 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, 1865 rrl_next) { 1866 rctl_rule_to_sbuf(sb, link->rrl_rule); 1867 sbuf_printf(sb, ","); 1868 } 1869 RCTL_RUNLOCK(); 1870 if (sbuf_error(sb) == ENOMEM) { 1871 error = ERANGE; 1872 goto out; 1873 } 1874 1875 /* 1876 * Remove trailing ",". 1877 */ 1878 if (sbuf_len(sb) > 0) 1879 sbuf_setpos(sb, sbuf_len(sb) - 1); 1880 1881 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); 1882 out: 1883 rctl_rule_release(filter); 1884 sx_sunlock(&allproc_lock); 1885 free(buf, M_RCTL); 1886 return (error); 1887 } 1888 1889 int 1890 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 1891 { 1892 int error; 1893 struct rctl_rule *rule; 1894 char *inputstr; 1895 1896 if (!racct_enable) 1897 return (ENOSYS); 1898 1899 error = priv_check(td, PRIV_RCTL_ADD_RULE); 1900 if (error != 0) 1901 return (error); 1902 1903 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1904 if (error != 0) 1905 return (error); 1906 1907 sx_slock(&allproc_lock); 1908 error = rctl_string_to_rule(inputstr, &rule); 1909 free(inputstr, M_RCTL); 1910 if (error != 0) { 1911 sx_sunlock(&allproc_lock); 1912 return (error); 1913 } 1914 /* 1915 * The 'per' part of a rule is optional. 1916 */ 1917 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && 1918 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) 1919 rule->rr_per = rule->rr_subject_type; 1920 1921 if (!rctl_rule_fully_specified(rule)) { 1922 error = EINVAL; 1923 goto out; 1924 } 1925 1926 error = rctl_rule_add(rule); 1927 1928 out: 1929 rctl_rule_release(rule); 1930 sx_sunlock(&allproc_lock); 1931 return (error); 1932 } 1933 1934 int 1935 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 1936 { 1937 int error; 1938 struct rctl_rule *filter; 1939 char *inputstr; 1940 1941 if (!racct_enable) 1942 return (ENOSYS); 1943 1944 error = priv_check(td, PRIV_RCTL_REMOVE_RULE); 1945 if (error != 0) 1946 return (error); 1947 1948 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); 1949 if (error != 0) 1950 return (error); 1951 1952 sx_slock(&allproc_lock); 1953 error = rctl_string_to_rule(inputstr, &filter); 1954 free(inputstr, M_RCTL); 1955 if (error != 0) { 1956 sx_sunlock(&allproc_lock); 1957 return (error); 1958 } 1959 1960 error = rctl_rule_remove(filter); 1961 rctl_rule_release(filter); 1962 sx_sunlock(&allproc_lock); 1963 1964 return (error); 1965 } 1966 1967 /* 1968 * Update RCTL rule list after credential change. 1969 */ 1970 void 1971 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) 1972 { 1973 int rulecnt, i; 1974 struct rctl_rule_link *link, *newlink; 1975 struct uidinfo *newuip; 1976 struct loginclass *newlc; 1977 struct prison_racct *newprr; 1978 LIST_HEAD(, rctl_rule_link) newrules; 1979 1980 ASSERT_RACCT_ENABLED(); 1981 1982 newuip = newcred->cr_ruidinfo; 1983 newlc = newcred->cr_loginclass; 1984 newprr = newcred->cr_prison->pr_prison_racct; 1985 1986 LIST_INIT(&newrules); 1987 1988 again: 1989 /* 1990 * First, count the rules that apply to the process with new 1991 * credentials. 1992 */ 1993 rulecnt = 0; 1994 RCTL_RLOCK(); 1995 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 1996 if (link->rrl_rule->rr_subject_type == 1997 RCTL_SUBJECT_TYPE_PROCESS) 1998 rulecnt++; 1999 } 2000 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) 2001 rulecnt++; 2002 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) 2003 rulecnt++; 2004 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) 2005 rulecnt++; 2006 RCTL_RUNLOCK(); 2007 2008 /* 2009 * Create temporary list. We've dropped the rctl_lock in order 2010 * to use M_WAITOK. 2011 */ 2012 for (i = 0; i < rulecnt; i++) { 2013 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); 2014 newlink->rrl_rule = NULL; 2015 newlink->rrl_exceeded = 0; 2016 LIST_INSERT_HEAD(&newrules, newlink, rrl_next); 2017 } 2018 2019 newlink = LIST_FIRST(&newrules); 2020 2021 /* 2022 * Assign rules to the newly allocated list entries. 2023 */ 2024 RCTL_WLOCK(); 2025 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { 2026 if (link->rrl_rule->rr_subject_type == 2027 RCTL_SUBJECT_TYPE_PROCESS) { 2028 if (newlink == NULL) 2029 goto goaround; 2030 rctl_rule_acquire(link->rrl_rule); 2031 newlink->rrl_rule = link->rrl_rule; 2032 newlink->rrl_exceeded = link->rrl_exceeded; 2033 newlink = LIST_NEXT(newlink, rrl_next); 2034 rulecnt--; 2035 } 2036 } 2037 2038 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { 2039 if (newlink == NULL) 2040 goto goaround; 2041 rctl_rule_acquire(link->rrl_rule); 2042 newlink->rrl_rule = link->rrl_rule; 2043 newlink->rrl_exceeded = link->rrl_exceeded; 2044 newlink = LIST_NEXT(newlink, rrl_next); 2045 rulecnt--; 2046 } 2047 2048 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { 2049 if (newlink == NULL) 2050 goto goaround; 2051 rctl_rule_acquire(link->rrl_rule); 2052 newlink->rrl_rule = link->rrl_rule; 2053 newlink->rrl_exceeded = link->rrl_exceeded; 2054 newlink = LIST_NEXT(newlink, rrl_next); 2055 rulecnt--; 2056 } 2057 2058 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { 2059 if (newlink == NULL) 2060 goto goaround; 2061 rctl_rule_acquire(link->rrl_rule); 2062 newlink->rrl_rule = link->rrl_rule; 2063 newlink->rrl_exceeded = link->rrl_exceeded; 2064 newlink = LIST_NEXT(newlink, rrl_next); 2065 rulecnt--; 2066 } 2067 2068 if (rulecnt == 0) { 2069 /* 2070 * Free the old rule list. 2071 */ 2072 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { 2073 link = LIST_FIRST(&p->p_racct->r_rule_links); 2074 LIST_REMOVE(link, rrl_next); 2075 rctl_rule_release(link->rrl_rule); 2076 uma_zfree(rctl_rule_link_zone, link); 2077 } 2078 2079 /* 2080 * Replace lists and we're done. 2081 * 2082 * XXX: Is there any way to switch list heads instead 2083 * of iterating here? 2084 */ 2085 while (!LIST_EMPTY(&newrules)) { 2086 newlink = LIST_FIRST(&newrules); 2087 LIST_REMOVE(newlink, rrl_next); 2088 LIST_INSERT_HEAD(&p->p_racct->r_rule_links, 2089 newlink, rrl_next); 2090 } 2091 2092 RCTL_WUNLOCK(); 2093 2094 return; 2095 } 2096 2097 goaround: 2098 RCTL_WUNLOCK(); 2099 2100 /* 2101 * Rule list changed while we were not holding the rctl_lock. 2102 * Free the new list and try again. 2103 */ 2104 while (!LIST_EMPTY(&newrules)) { 2105 newlink = LIST_FIRST(&newrules); 2106 LIST_REMOVE(newlink, rrl_next); 2107 if (newlink->rrl_rule != NULL) 2108 rctl_rule_release(newlink->rrl_rule); 2109 uma_zfree(rctl_rule_link_zone, newlink); 2110 } 2111 2112 goto again; 2113 } 2114 2115 /* 2116 * Assign RCTL rules to the newly created process. 2117 */ 2118 int 2119 rctl_proc_fork(struct proc *parent, struct proc *child) 2120 { 2121 int error; 2122 struct rctl_rule_link *link; 2123 struct rctl_rule *rule; 2124 2125 LIST_INIT(&child->p_racct->r_rule_links); 2126 2127 ASSERT_RACCT_ENABLED(); 2128 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); 2129 2130 RCTL_WLOCK(); 2131 2132 /* 2133 * Go through limits applicable to the parent and assign them 2134 * to the child. Rules with 'process' subject have to be duplicated 2135 * in order to make their rr_subject point to the new process. 2136 */ 2137 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { 2138 if (link->rrl_rule->rr_subject_type == 2139 RCTL_SUBJECT_TYPE_PROCESS) { 2140 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); 2141 if (rule == NULL) 2142 goto fail; 2143 KASSERT(rule->rr_subject.rs_proc == parent, 2144 ("rule->rr_subject.rs_proc != parent")); 2145 rule->rr_subject.rs_proc = child; 2146 error = rctl_racct_add_rule_locked(child->p_racct, 2147 rule); 2148 rctl_rule_release(rule); 2149 if (error != 0) 2150 goto fail; 2151 } else { 2152 error = rctl_racct_add_rule_locked(child->p_racct, 2153 link->rrl_rule); 2154 if (error != 0) 2155 goto fail; 2156 } 2157 } 2158 2159 RCTL_WUNLOCK(); 2160 return (0); 2161 2162 fail: 2163 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { 2164 link = LIST_FIRST(&child->p_racct->r_rule_links); 2165 LIST_REMOVE(link, rrl_next); 2166 rctl_rule_release(link->rrl_rule); 2167 uma_zfree(rctl_rule_link_zone, link); 2168 } 2169 RCTL_WUNLOCK(); 2170 return (EAGAIN); 2171 } 2172 2173 /* 2174 * Release rules attached to the racct. 2175 */ 2176 void 2177 rctl_racct_release(struct racct *racct) 2178 { 2179 struct rctl_rule_link *link; 2180 2181 ASSERT_RACCT_ENABLED(); 2182 2183 RCTL_WLOCK(); 2184 while (!LIST_EMPTY(&racct->r_rule_links)) { 2185 link = LIST_FIRST(&racct->r_rule_links); 2186 LIST_REMOVE(link, rrl_next); 2187 rctl_rule_release(link->rrl_rule); 2188 uma_zfree(rctl_rule_link_zone, link); 2189 } 2190 RCTL_WUNLOCK(); 2191 } 2192 2193 static void 2194 rctl_init(void) 2195 { 2196 2197 if (!racct_enable) 2198 return; 2199 2200 rctl_rule_link_zone = uma_zcreate("rctl_rule_link", 2201 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, 2202 UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 2203 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), 2204 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 2205 2206 /* 2207 * Set default values, making sure not to overwrite the ones 2208 * fetched from tunables. Most of those could be set at the 2209 * declaration, except for the rctl_throttle_max - we cannot 2210 * set it there due to hz not being compile time constant. 2211 */ 2212 if (rctl_throttle_min < 1) 2213 rctl_throttle_min = 1; 2214 if (rctl_throttle_max < rctl_throttle_min) 2215 rctl_throttle_max = 2 * hz; 2216 if (rctl_throttle_pct < 0) 2217 rctl_throttle_pct = 100; 2218 if (rctl_throttle_pct2 < 0) 2219 rctl_throttle_pct2 = 100; 2220 } 2221 2222 #else /* !RCTL */ 2223 2224 int 2225 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) 2226 { 2227 2228 return (ENOSYS); 2229 } 2230 2231 int 2232 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) 2233 { 2234 2235 return (ENOSYS); 2236 } 2237 2238 int 2239 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) 2240 { 2241 2242 return (ENOSYS); 2243 } 2244 2245 int 2246 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) 2247 { 2248 2249 return (ENOSYS); 2250 } 2251 2252 int 2253 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) 2254 { 2255 2256 return (ENOSYS); 2257 } 2258 2259 #endif /* !RCTL */ 2260