1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/sysproto.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mutex.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/refcount.h> 53 #include <sys/resourcevar.h> 54 #include <sys/sched.h> 55 #include <sys/sx.h> 56 #include <sys/syscallsubr.h> 57 #include <sys/sysent.h> 58 #include <sys/time.h> 59 #include <sys/umtx.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_param.h> 63 #include <vm/pmap.h> 64 #include <vm/vm_map.h> 65 66 67 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 68 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 69 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 70 static struct mtx uihashtbl_mtx; 71 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 72 static u_long uihash; /* size of hash table - 1 */ 73 74 static void calcru1(struct proc *p, struct rusage_ext *ruxp, 75 struct timeval *up, struct timeval *sp); 76 static int donice(struct thread *td, struct proc *chgp, int n); 77 static struct uidinfo *uilookup(uid_t uid); 78 79 /* 80 * Resource controls and accounting. 81 */ 82 #ifndef _SYS_SYSPROTO_H_ 83 struct getpriority_args { 84 int which; 85 int who; 86 }; 87 #endif 88 int 89 getpriority(td, uap) 90 struct thread *td; 91 register struct getpriority_args *uap; 92 { 93 struct proc *p; 94 struct pgrp *pg; 95 int error, low; 96 97 error = 0; 98 low = PRIO_MAX + 1; 99 switch (uap->which) { 100 101 case PRIO_PROCESS: 102 if (uap->who == 0) 103 low = td->td_proc->p_nice; 104 else { 105 p = pfind(uap->who); 106 if (p == NULL) 107 break; 108 if (p_cansee(td, p) == 0) 109 low = p->p_nice; 110 PROC_UNLOCK(p); 111 } 112 break; 113 114 case PRIO_PGRP: 115 sx_slock(&proctree_lock); 116 if (uap->who == 0) { 117 pg = td->td_proc->p_pgrp; 118 PGRP_LOCK(pg); 119 } else { 120 pg = pgfind(uap->who); 121 if (pg == NULL) { 122 sx_sunlock(&proctree_lock); 123 break; 124 } 125 } 126 sx_sunlock(&proctree_lock); 127 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 128 PROC_LOCK(p); 129 if (!p_cansee(td, p)) { 130 if (p->p_nice < low) 131 low = p->p_nice; 132 } 133 PROC_UNLOCK(p); 134 } 135 PGRP_UNLOCK(pg); 136 break; 137 138 case PRIO_USER: 139 if (uap->who == 0) 140 uap->who = td->td_ucred->cr_uid; 141 sx_slock(&allproc_lock); 142 FOREACH_PROC_IN_SYSTEM(p) { 143 /* Do not bother to check PRS_NEW processes */ 144 if (p->p_state == PRS_NEW) 145 continue; 146 PROC_LOCK(p); 147 if (!p_cansee(td, p) && 148 p->p_ucred->cr_uid == uap->who) { 149 if (p->p_nice < low) 150 low = p->p_nice; 151 } 152 PROC_UNLOCK(p); 153 } 154 sx_sunlock(&allproc_lock); 155 break; 156 157 default: 158 error = EINVAL; 159 break; 160 } 161 if (low == PRIO_MAX + 1 && error == 0) 162 error = ESRCH; 163 td->td_retval[0] = low; 164 return (error); 165 } 166 167 #ifndef _SYS_SYSPROTO_H_ 168 struct setpriority_args { 169 int which; 170 int who; 171 int prio; 172 }; 173 #endif 174 int 175 setpriority(td, uap) 176 struct thread *td; 177 struct setpriority_args *uap; 178 { 179 struct proc *curp, *p; 180 struct pgrp *pg; 181 int found = 0, error = 0; 182 183 curp = td->td_proc; 184 switch (uap->which) { 185 case PRIO_PROCESS: 186 if (uap->who == 0) { 187 PROC_LOCK(curp); 188 error = donice(td, curp, uap->prio); 189 PROC_UNLOCK(curp); 190 } else { 191 p = pfind(uap->who); 192 if (p == 0) 193 break; 194 if (p_cansee(td, p) == 0) 195 error = donice(td, p, uap->prio); 196 PROC_UNLOCK(p); 197 } 198 found++; 199 break; 200 201 case PRIO_PGRP: 202 sx_slock(&proctree_lock); 203 if (uap->who == 0) { 204 pg = curp->p_pgrp; 205 PGRP_LOCK(pg); 206 } else { 207 pg = pgfind(uap->who); 208 if (pg == NULL) { 209 sx_sunlock(&proctree_lock); 210 break; 211 } 212 } 213 sx_sunlock(&proctree_lock); 214 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 215 PROC_LOCK(p); 216 if (!p_cansee(td, p)) { 217 error = donice(td, p, uap->prio); 218 found++; 219 } 220 PROC_UNLOCK(p); 221 } 222 PGRP_UNLOCK(pg); 223 break; 224 225 case PRIO_USER: 226 if (uap->who == 0) 227 uap->who = td->td_ucred->cr_uid; 228 sx_slock(&allproc_lock); 229 FOREACH_PROC_IN_SYSTEM(p) { 230 PROC_LOCK(p); 231 if (p->p_ucred->cr_uid == uap->who && 232 !p_cansee(td, p)) { 233 error = donice(td, p, uap->prio); 234 found++; 235 } 236 PROC_UNLOCK(p); 237 } 238 sx_sunlock(&allproc_lock); 239 break; 240 241 default: 242 error = EINVAL; 243 break; 244 } 245 if (found == 0 && error == 0) 246 error = ESRCH; 247 return (error); 248 } 249 250 /* 251 * Set "nice" for a (whole) process. 252 */ 253 static int 254 donice(struct thread *td, struct proc *p, int n) 255 { 256 int error; 257 258 PROC_LOCK_ASSERT(p, MA_OWNED); 259 if ((error = p_cansched(td, p))) 260 return (error); 261 if (n > PRIO_MAX) 262 n = PRIO_MAX; 263 if (n < PRIO_MIN) 264 n = PRIO_MIN; 265 if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) 266 return (EACCES); 267 PROC_SLOCK(p); 268 sched_nice(p, n); 269 PROC_SUNLOCK(p); 270 return (0); 271 } 272 273 /* 274 * Set realtime priority for LWP. 275 */ 276 #ifndef _SYS_SYSPROTO_H_ 277 struct rtprio_thread_args { 278 int function; 279 lwpid_t lwpid; 280 struct rtprio *rtp; 281 }; 282 #endif 283 int 284 rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) 285 { 286 struct proc *curp; 287 struct proc *p; 288 struct rtprio rtp; 289 struct thread *td1; 290 int cierror, error; 291 292 /* Perform copyin before acquiring locks if needed. */ 293 if (uap->function == RTP_SET) 294 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 295 else 296 cierror = 0; 297 298 curp = td->td_proc; 299 /* 300 * Though lwpid is unique, only current process is supported 301 * since there is no efficient way to look up a LWP yet. 302 */ 303 p = curp; 304 PROC_LOCK(p); 305 306 switch (uap->function) { 307 case RTP_LOOKUP: 308 if ((error = p_cansee(td, p))) 309 break; 310 PROC_SLOCK(p); 311 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 312 td1 = td; 313 else 314 td1 = thread_find(p, uap->lwpid); 315 if (td1 != NULL) 316 pri_to_rtp(td1, &rtp); 317 else 318 error = ESRCH; 319 PROC_SUNLOCK(p); 320 PROC_UNLOCK(p); 321 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 322 case RTP_SET: 323 if ((error = p_cansched(td, p)) || (error = cierror)) 324 break; 325 326 /* Disallow setting rtprio in most cases if not superuser. */ 327 /* 328 * Realtime priority has to be restricted for reasons which should be 329 * obvious. However, for idle priority, there is a potential for 330 * system deadlock if an idleprio process gains a lock on a resource 331 * that other processes need (and the idleprio process can't run 332 * due to a CPU-bound normal process). Fix me! XXX 333 */ 334 #if 0 335 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 336 #else 337 if (rtp.type != RTP_PRIO_NORMAL) { 338 #endif 339 error = priv_check(td, PRIV_SCHED_RTPRIO); 340 if (error) 341 break; 342 } 343 344 PROC_SLOCK(p); 345 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 346 td1 = td; 347 else 348 td1 = thread_find(p, uap->lwpid); 349 if (td1 != NULL) 350 error = rtp_to_pri(&rtp, td1); 351 else 352 error = ESRCH; 353 PROC_SUNLOCK(p); 354 break; 355 default: 356 error = EINVAL; 357 break; 358 } 359 PROC_UNLOCK(p); 360 return (error); 361 } 362 363 /* 364 * Set realtime priority. 365 */ 366 #ifndef _SYS_SYSPROTO_H_ 367 struct rtprio_args { 368 int function; 369 pid_t pid; 370 struct rtprio *rtp; 371 }; 372 #endif 373 int 374 rtprio(td, uap) 375 struct thread *td; /* curthread */ 376 register struct rtprio_args *uap; 377 { 378 struct proc *curp; 379 struct proc *p; 380 struct thread *tdp; 381 struct rtprio rtp; 382 int cierror, error; 383 384 /* Perform copyin before acquiring locks if needed. */ 385 if (uap->function == RTP_SET) 386 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 387 else 388 cierror = 0; 389 390 curp = td->td_proc; 391 if (uap->pid == 0) { 392 p = curp; 393 PROC_LOCK(p); 394 } else { 395 p = pfind(uap->pid); 396 if (p == NULL) 397 return (ESRCH); 398 } 399 400 switch (uap->function) { 401 case RTP_LOOKUP: 402 if ((error = p_cansee(td, p))) 403 break; 404 PROC_SLOCK(p); 405 /* 406 * Return OUR priority if no pid specified, 407 * or if one is, report the highest priority 408 * in the process. There isn't much more you can do as 409 * there is only room to return a single priority. 410 * XXXKSE: maybe need a new interface to report 411 * priorities of multiple system scope threads. 412 * Note: specifying our own pid is not the same 413 * as leaving it zero. 414 */ 415 if (uap->pid == 0) { 416 pri_to_rtp(td, &rtp); 417 } else { 418 struct rtprio rtp2; 419 420 rtp.type = RTP_PRIO_IDLE; 421 rtp.prio = RTP_PRIO_MAX; 422 FOREACH_THREAD_IN_PROC(p, tdp) { 423 pri_to_rtp(tdp, &rtp2); 424 if (rtp2.type < rtp.type || 425 (rtp2.type == rtp.type && 426 rtp2.prio < rtp.prio)) { 427 rtp.type = rtp2.type; 428 rtp.prio = rtp2.prio; 429 } 430 } 431 } 432 PROC_SUNLOCK(p); 433 PROC_UNLOCK(p); 434 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 435 case RTP_SET: 436 if ((error = p_cansched(td, p)) || (error = cierror)) 437 break; 438 439 /* Disallow setting rtprio in most cases if not superuser. */ 440 /* 441 * Realtime priority has to be restricted for reasons which should be 442 * obvious. However, for idle priority, there is a potential for 443 * system deadlock if an idleprio process gains a lock on a resource 444 * that other processes need (and the idleprio process can't run 445 * due to a CPU-bound normal process). Fix me! XXX 446 */ 447 #if 0 448 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 449 #else 450 if (rtp.type != RTP_PRIO_NORMAL) { 451 #endif 452 error = priv_check(td, PRIV_SCHED_RTPRIO); 453 if (error) 454 break; 455 } 456 457 /* 458 * If we are setting our own priority, set just our 459 * thread but if we are doing another process, 460 * do all the threads on that process. If we 461 * specify our own pid we do the latter. 462 */ 463 PROC_SLOCK(p); 464 if (uap->pid == 0) { 465 error = rtp_to_pri(&rtp, td); 466 } else { 467 FOREACH_THREAD_IN_PROC(p, td) { 468 if ((error = rtp_to_pri(&rtp, td)) != 0) 469 break; 470 } 471 } 472 PROC_SUNLOCK(p); 473 break; 474 default: 475 error = EINVAL; 476 break; 477 } 478 PROC_UNLOCK(p); 479 return (error); 480 } 481 482 int 483 rtp_to_pri(struct rtprio *rtp, struct thread *td) 484 { 485 u_char newpri; 486 u_char oldpri; 487 488 if (rtp->prio > RTP_PRIO_MAX) 489 return (EINVAL); 490 thread_lock(td); 491 switch (RTP_PRIO_BASE(rtp->type)) { 492 case RTP_PRIO_REALTIME: 493 newpri = PRI_MIN_REALTIME + rtp->prio; 494 break; 495 case RTP_PRIO_NORMAL: 496 newpri = PRI_MIN_TIMESHARE + rtp->prio; 497 break; 498 case RTP_PRIO_IDLE: 499 newpri = PRI_MIN_IDLE + rtp->prio; 500 break; 501 default: 502 thread_unlock(td); 503 return (EINVAL); 504 } 505 sched_class(td, rtp->type); /* XXX fix */ 506 oldpri = td->td_user_pri; 507 sched_user_prio(td, newpri); 508 if (curthread == td) 509 sched_prio(curthread, td->td_user_pri); /* XXX dubious */ 510 if (TD_ON_UPILOCK(td) && oldpri != newpri) { 511 thread_unlock(td); 512 umtx_pi_adjust(td, oldpri); 513 } else 514 thread_unlock(td); 515 return (0); 516 } 517 518 void 519 pri_to_rtp(struct thread *td, struct rtprio *rtp) 520 { 521 522 thread_lock(td); 523 switch (PRI_BASE(td->td_pri_class)) { 524 case PRI_REALTIME: 525 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; 526 break; 527 case PRI_TIMESHARE: 528 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; 529 break; 530 case PRI_IDLE: 531 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; 532 break; 533 default: 534 break; 535 } 536 rtp->type = td->td_pri_class; 537 thread_unlock(td); 538 } 539 540 #if defined(COMPAT_43) 541 #ifndef _SYS_SYSPROTO_H_ 542 struct osetrlimit_args { 543 u_int which; 544 struct orlimit *rlp; 545 }; 546 #endif 547 int 548 osetrlimit(td, uap) 549 struct thread *td; 550 register struct osetrlimit_args *uap; 551 { 552 struct orlimit olim; 553 struct rlimit lim; 554 int error; 555 556 if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) 557 return (error); 558 lim.rlim_cur = olim.rlim_cur; 559 lim.rlim_max = olim.rlim_max; 560 error = kern_setrlimit(td, uap->which, &lim); 561 return (error); 562 } 563 564 #ifndef _SYS_SYSPROTO_H_ 565 struct ogetrlimit_args { 566 u_int which; 567 struct orlimit *rlp; 568 }; 569 #endif 570 int 571 ogetrlimit(td, uap) 572 struct thread *td; 573 register struct ogetrlimit_args *uap; 574 { 575 struct orlimit olim; 576 struct rlimit rl; 577 struct proc *p; 578 int error; 579 580 if (uap->which >= RLIM_NLIMITS) 581 return (EINVAL); 582 p = td->td_proc; 583 PROC_LOCK(p); 584 lim_rlimit(p, uap->which, &rl); 585 PROC_UNLOCK(p); 586 587 /* 588 * XXX would be more correct to convert only RLIM_INFINITY to the 589 * old RLIM_INFINITY and fail with EOVERFLOW for other larger 590 * values. Most 64->32 and 32->16 conversions, including not 591 * unimportant ones of uids are even more broken than what we 592 * do here (they blindly truncate). We don't do this correctly 593 * here since we have little experience with EOVERFLOW yet. 594 * Elsewhere, getuid() can't fail... 595 */ 596 olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; 597 olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; 598 error = copyout(&olim, uap->rlp, sizeof(olim)); 599 return (error); 600 } 601 #endif /* COMPAT_43 */ 602 603 #ifndef _SYS_SYSPROTO_H_ 604 struct __setrlimit_args { 605 u_int which; 606 struct rlimit *rlp; 607 }; 608 #endif 609 int 610 setrlimit(td, uap) 611 struct thread *td; 612 register struct __setrlimit_args *uap; 613 { 614 struct rlimit alim; 615 int error; 616 617 if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) 618 return (error); 619 error = kern_setrlimit(td, uap->which, &alim); 620 return (error); 621 } 622 623 static void 624 lim_cb(void *arg) 625 { 626 struct rlimit rlim; 627 struct thread *td; 628 struct proc *p; 629 630 p = arg; 631 PROC_LOCK_ASSERT(p, MA_OWNED); 632 /* 633 * Check if the process exceeds its cpu resource allocation. If 634 * it reaches the max, arrange to kill the process in ast(). 635 */ 636 if (p->p_cpulimit == RLIM_INFINITY) 637 return; 638 PROC_SLOCK(p); 639 FOREACH_THREAD_IN_PROC(p, td) { 640 thread_lock(td); 641 ruxagg(&p->p_rux, td); 642 thread_unlock(td); 643 } 644 PROC_SUNLOCK(p); 645 if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { 646 lim_rlimit(p, RLIMIT_CPU, &rlim); 647 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { 648 killproc(p, "exceeded maximum CPU limit"); 649 } else { 650 if (p->p_cpulimit < rlim.rlim_max) 651 p->p_cpulimit += 5; 652 psignal(p, SIGXCPU); 653 } 654 } 655 callout_reset(&p->p_limco, hz, lim_cb, p); 656 } 657 658 int 659 kern_setrlimit(td, which, limp) 660 struct thread *td; 661 u_int which; 662 struct rlimit *limp; 663 { 664 struct plimit *newlim, *oldlim; 665 struct proc *p; 666 register struct rlimit *alimp; 667 struct rlimit oldssiz; 668 int error; 669 670 if (which >= RLIM_NLIMITS) 671 return (EINVAL); 672 673 /* 674 * Preserve historical bugs by treating negative limits as unsigned. 675 */ 676 if (limp->rlim_cur < 0) 677 limp->rlim_cur = RLIM_INFINITY; 678 if (limp->rlim_max < 0) 679 limp->rlim_max = RLIM_INFINITY; 680 681 oldssiz.rlim_cur = 0; 682 p = td->td_proc; 683 newlim = lim_alloc(); 684 PROC_LOCK(p); 685 oldlim = p->p_limit; 686 alimp = &oldlim->pl_rlimit[which]; 687 if (limp->rlim_cur > alimp->rlim_max || 688 limp->rlim_max > alimp->rlim_max) 689 if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) { 690 PROC_UNLOCK(p); 691 lim_free(newlim); 692 return (error); 693 } 694 if (limp->rlim_cur > limp->rlim_max) 695 limp->rlim_cur = limp->rlim_max; 696 lim_copy(newlim, oldlim); 697 alimp = &newlim->pl_rlimit[which]; 698 699 switch (which) { 700 701 case RLIMIT_CPU: 702 if (limp->rlim_cur != RLIM_INFINITY && 703 p->p_cpulimit == RLIM_INFINITY) 704 callout_reset(&p->p_limco, hz, lim_cb, p); 705 PROC_SLOCK(p); 706 p->p_cpulimit = limp->rlim_cur; 707 PROC_SUNLOCK(p); 708 break; 709 case RLIMIT_DATA: 710 if (limp->rlim_cur > maxdsiz) 711 limp->rlim_cur = maxdsiz; 712 if (limp->rlim_max > maxdsiz) 713 limp->rlim_max = maxdsiz; 714 break; 715 716 case RLIMIT_STACK: 717 if (limp->rlim_cur > maxssiz) 718 limp->rlim_cur = maxssiz; 719 if (limp->rlim_max > maxssiz) 720 limp->rlim_max = maxssiz; 721 oldssiz = *alimp; 722 if (td->td_proc->p_sysent->sv_fixlimit != NULL) 723 td->td_proc->p_sysent->sv_fixlimit(&oldssiz, 724 RLIMIT_STACK); 725 break; 726 727 case RLIMIT_NOFILE: 728 if (limp->rlim_cur > maxfilesperproc) 729 limp->rlim_cur = maxfilesperproc; 730 if (limp->rlim_max > maxfilesperproc) 731 limp->rlim_max = maxfilesperproc; 732 break; 733 734 case RLIMIT_NPROC: 735 if (limp->rlim_cur > maxprocperuid) 736 limp->rlim_cur = maxprocperuid; 737 if (limp->rlim_max > maxprocperuid) 738 limp->rlim_max = maxprocperuid; 739 if (limp->rlim_cur < 1) 740 limp->rlim_cur = 1; 741 if (limp->rlim_max < 1) 742 limp->rlim_max = 1; 743 break; 744 } 745 if (td->td_proc->p_sysent->sv_fixlimit != NULL) 746 td->td_proc->p_sysent->sv_fixlimit(limp, which); 747 *alimp = *limp; 748 p->p_limit = newlim; 749 PROC_UNLOCK(p); 750 lim_free(oldlim); 751 752 if (which == RLIMIT_STACK) { 753 /* 754 * Stack is allocated to the max at exec time with only 755 * "rlim_cur" bytes accessible. If stack limit is going 756 * up make more accessible, if going down make inaccessible. 757 */ 758 if (limp->rlim_cur != oldssiz.rlim_cur) { 759 vm_offset_t addr; 760 vm_size_t size; 761 vm_prot_t prot; 762 763 if (limp->rlim_cur > oldssiz.rlim_cur) { 764 prot = p->p_sysent->sv_stackprot; 765 size = limp->rlim_cur - oldssiz.rlim_cur; 766 addr = p->p_sysent->sv_usrstack - 767 limp->rlim_cur; 768 } else { 769 prot = VM_PROT_NONE; 770 size = oldssiz.rlim_cur - limp->rlim_cur; 771 addr = p->p_sysent->sv_usrstack - 772 oldssiz.rlim_cur; 773 } 774 addr = trunc_page(addr); 775 size = round_page(size); 776 (void)vm_map_protect(&p->p_vmspace->vm_map, 777 addr, addr + size, prot, FALSE); 778 } 779 } 780 781 return (0); 782 } 783 784 #ifndef _SYS_SYSPROTO_H_ 785 struct __getrlimit_args { 786 u_int which; 787 struct rlimit *rlp; 788 }; 789 #endif 790 /* ARGSUSED */ 791 int 792 getrlimit(td, uap) 793 struct thread *td; 794 register struct __getrlimit_args *uap; 795 { 796 struct rlimit rlim; 797 struct proc *p; 798 int error; 799 800 if (uap->which >= RLIM_NLIMITS) 801 return (EINVAL); 802 p = td->td_proc; 803 PROC_LOCK(p); 804 lim_rlimit(p, uap->which, &rlim); 805 PROC_UNLOCK(p); 806 error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); 807 return (error); 808 } 809 810 /* 811 * Transform the running time and tick information for children of proc p 812 * into user and system time usage. 813 */ 814 void 815 calccru(p, up, sp) 816 struct proc *p; 817 struct timeval *up; 818 struct timeval *sp; 819 { 820 821 PROC_LOCK_ASSERT(p, MA_OWNED); 822 calcru1(p, &p->p_crux, up, sp); 823 } 824 825 /* 826 * Transform the running time and tick information in proc p into user 827 * and system time usage. If appropriate, include the current time slice 828 * on this CPU. 829 */ 830 void 831 calcru(struct proc *p, struct timeval *up, struct timeval *sp) 832 { 833 struct thread *td; 834 uint64_t u; 835 836 PROC_LOCK_ASSERT(p, MA_OWNED); 837 PROC_SLOCK_ASSERT(p, MA_OWNED); 838 /* 839 * If we are getting stats for the current process, then add in the 840 * stats that this thread has accumulated in its current time slice. 841 * We reset the thread and CPU state as if we had performed a context 842 * switch right here. 843 */ 844 td = curthread; 845 if (td->td_proc == p) { 846 u = cpu_ticks(); 847 p->p_rux.rux_runtime += u - PCPU_GET(switchtime); 848 PCPU_SET(switchtime, u); 849 } 850 /* Make sure the per-thread stats are current. */ 851 FOREACH_THREAD_IN_PROC(p, td) { 852 if (td->td_incruntime == 0) 853 continue; 854 thread_lock(td); 855 ruxagg(&p->p_rux, td); 856 thread_unlock(td); 857 } 858 calcru1(p, &p->p_rux, up, sp); 859 } 860 861 static void 862 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, 863 struct timeval *sp) 864 { 865 /* {user, system, interrupt, total} {ticks, usec}: */ 866 u_int64_t ut, uu, st, su, it, tt, tu; 867 868 ut = ruxp->rux_uticks; 869 st = ruxp->rux_sticks; 870 it = ruxp->rux_iticks; 871 tt = ut + st + it; 872 if (tt == 0) { 873 /* Avoid divide by zero */ 874 st = 1; 875 tt = 1; 876 } 877 tu = cputick2usec(ruxp->rux_runtime); 878 if ((int64_t)tu < 0) { 879 /* XXX: this should be an assert /phk */ 880 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", 881 (intmax_t)tu, p->p_pid, p->p_comm); 882 tu = ruxp->rux_tu; 883 } 884 885 if (tu >= ruxp->rux_tu) { 886 /* 887 * The normal case, time increased. 888 * Enforce monotonicity of bucketed numbers. 889 */ 890 uu = (tu * ut) / tt; 891 if (uu < ruxp->rux_uu) 892 uu = ruxp->rux_uu; 893 su = (tu * st) / tt; 894 if (su < ruxp->rux_su) 895 su = ruxp->rux_su; 896 } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { 897 /* 898 * When we calibrate the cputicker, it is not uncommon to 899 * see the presumably fixed frequency increase slightly over 900 * time as a result of thermal stabilization and NTP 901 * discipline (of the reference clock). We therefore ignore 902 * a bit of backwards slop because we expect to catch up 903 * shortly. We use a 3 microsecond limit to catch low 904 * counts and a 1% limit for high counts. 905 */ 906 uu = ruxp->rux_uu; 907 su = ruxp->rux_su; 908 tu = ruxp->rux_tu; 909 } else { /* tu < ruxp->rux_tu */ 910 /* 911 * What happene here was likely that a laptop, which ran at 912 * a reduced clock frequency at boot, kicked into high gear. 913 * The wisdom of spamming this message in that case is 914 * dubious, but it might also be indicative of something 915 * serious, so lets keep it and hope laptops can be made 916 * more truthful about their CPU speed via ACPI. 917 */ 918 printf("calcru: runtime went backwards from %ju usec " 919 "to %ju usec for pid %d (%s)\n", 920 (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, 921 p->p_pid, p->p_comm); 922 uu = (tu * ut) / tt; 923 su = (tu * st) / tt; 924 } 925 926 ruxp->rux_uu = uu; 927 ruxp->rux_su = su; 928 ruxp->rux_tu = tu; 929 930 up->tv_sec = uu / 1000000; 931 up->tv_usec = uu % 1000000; 932 sp->tv_sec = su / 1000000; 933 sp->tv_usec = su % 1000000; 934 } 935 936 #ifndef _SYS_SYSPROTO_H_ 937 struct getrusage_args { 938 int who; 939 struct rusage *rusage; 940 }; 941 #endif 942 int 943 getrusage(td, uap) 944 register struct thread *td; 945 register struct getrusage_args *uap; 946 { 947 struct rusage ru; 948 int error; 949 950 error = kern_getrusage(td, uap->who, &ru); 951 if (error == 0) 952 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 953 return (error); 954 } 955 956 int 957 kern_getrusage(td, who, rup) 958 struct thread *td; 959 int who; 960 struct rusage *rup; 961 { 962 struct proc *p; 963 964 p = td->td_proc; 965 PROC_LOCK(p); 966 switch (who) { 967 968 case RUSAGE_SELF: 969 rufetchcalc(p, rup, &rup->ru_utime, 970 &rup->ru_stime); 971 break; 972 973 case RUSAGE_CHILDREN: 974 *rup = p->p_stats->p_cru; 975 calccru(p, &rup->ru_utime, &rup->ru_stime); 976 break; 977 978 default: 979 PROC_UNLOCK(p); 980 return (EINVAL); 981 } 982 PROC_UNLOCK(p); 983 return (0); 984 } 985 986 void 987 rucollect(struct rusage *ru, struct rusage *ru2) 988 { 989 long *ip, *ip2; 990 int i; 991 992 if (ru->ru_maxrss < ru2->ru_maxrss) 993 ru->ru_maxrss = ru2->ru_maxrss; 994 ip = &ru->ru_first; 995 ip2 = &ru2->ru_first; 996 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 997 *ip++ += *ip2++; 998 } 999 1000 void 1001 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, 1002 struct rusage_ext *rux2) 1003 { 1004 1005 rux->rux_runtime += rux2->rux_runtime; 1006 rux->rux_uticks += rux2->rux_uticks; 1007 rux->rux_sticks += rux2->rux_sticks; 1008 rux->rux_iticks += rux2->rux_iticks; 1009 rux->rux_uu += rux2->rux_uu; 1010 rux->rux_su += rux2->rux_su; 1011 rux->rux_tu += rux2->rux_tu; 1012 rucollect(ru, ru2); 1013 } 1014 1015 /* 1016 * Aggregate tick counts into the proc's rusage_ext. 1017 */ 1018 void 1019 ruxagg(struct rusage_ext *rux, struct thread *td) 1020 { 1021 1022 THREAD_LOCK_ASSERT(td, MA_OWNED); 1023 PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED); 1024 rux->rux_runtime += td->td_incruntime; 1025 rux->rux_uticks += td->td_uticks; 1026 rux->rux_sticks += td->td_sticks; 1027 rux->rux_iticks += td->td_iticks; 1028 td->td_incruntime = 0; 1029 td->td_uticks = 0; 1030 td->td_iticks = 0; 1031 td->td_sticks = 0; 1032 } 1033 1034 /* 1035 * Update the rusage_ext structure and fetch a valid aggregate rusage 1036 * for proc p if storage for one is supplied. 1037 */ 1038 void 1039 rufetch(struct proc *p, struct rusage *ru) 1040 { 1041 struct thread *td; 1042 1043 PROC_SLOCK_ASSERT(p, MA_OWNED); 1044 1045 *ru = p->p_ru; 1046 if (p->p_numthreads > 0) { 1047 FOREACH_THREAD_IN_PROC(p, td) { 1048 thread_lock(td); 1049 ruxagg(&p->p_rux, td); 1050 thread_unlock(td); 1051 rucollect(ru, &td->td_ru); 1052 } 1053 } 1054 } 1055 1056 /* 1057 * Atomically perform a rufetch and a calcru together. 1058 * Consumers, can safely assume the calcru is executed only once 1059 * rufetch is completed. 1060 */ 1061 void 1062 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, 1063 struct timeval *sp) 1064 { 1065 1066 PROC_SLOCK(p); 1067 rufetch(p, ru); 1068 calcru(p, up, sp); 1069 PROC_SUNLOCK(p); 1070 } 1071 1072 /* 1073 * Allocate a new resource limits structure and initialize its 1074 * reference count and mutex pointer. 1075 */ 1076 struct plimit * 1077 lim_alloc() 1078 { 1079 struct plimit *limp; 1080 1081 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); 1082 refcount_init(&limp->pl_refcnt, 1); 1083 return (limp); 1084 } 1085 1086 struct plimit * 1087 lim_hold(limp) 1088 struct plimit *limp; 1089 { 1090 1091 refcount_acquire(&limp->pl_refcnt); 1092 return (limp); 1093 } 1094 1095 void 1096 lim_fork(struct proc *p1, struct proc *p2) 1097 { 1098 p2->p_limit = lim_hold(p1->p_limit); 1099 callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); 1100 if (p1->p_cpulimit != RLIM_INFINITY) 1101 callout_reset(&p2->p_limco, hz, lim_cb, p2); 1102 } 1103 1104 void 1105 lim_free(limp) 1106 struct plimit *limp; 1107 { 1108 1109 KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); 1110 if (refcount_release(&limp->pl_refcnt)) 1111 free((void *)limp, M_PLIMIT); 1112 } 1113 1114 /* 1115 * Make a copy of the plimit structure. 1116 * We share these structures copy-on-write after fork. 1117 */ 1118 void 1119 lim_copy(dst, src) 1120 struct plimit *dst, *src; 1121 { 1122 1123 KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); 1124 bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); 1125 } 1126 1127 /* 1128 * Return the hard limit for a particular system resource. The 1129 * which parameter specifies the index into the rlimit array. 1130 */ 1131 rlim_t 1132 lim_max(struct proc *p, int which) 1133 { 1134 struct rlimit rl; 1135 1136 lim_rlimit(p, which, &rl); 1137 return (rl.rlim_max); 1138 } 1139 1140 /* 1141 * Return the current (soft) limit for a particular system resource. 1142 * The which parameter which specifies the index into the rlimit array 1143 */ 1144 rlim_t 1145 lim_cur(struct proc *p, int which) 1146 { 1147 struct rlimit rl; 1148 1149 lim_rlimit(p, which, &rl); 1150 return (rl.rlim_cur); 1151 } 1152 1153 /* 1154 * Return a copy of the entire rlimit structure for the system limit 1155 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 1156 */ 1157 void 1158 lim_rlimit(struct proc *p, int which, struct rlimit *rlp) 1159 { 1160 1161 PROC_LOCK_ASSERT(p, MA_OWNED); 1162 KASSERT(which >= 0 && which < RLIM_NLIMITS, 1163 ("request for invalid resource limit")); 1164 *rlp = p->p_limit->pl_rlimit[which]; 1165 if (p->p_sysent->sv_fixlimit != NULL) 1166 p->p_sysent->sv_fixlimit(rlp, which); 1167 } 1168 1169 /* 1170 * Find the uidinfo structure for a uid. This structure is used to 1171 * track the total resource consumption (process count, socket buffer 1172 * size, etc.) for the uid and impose limits. 1173 */ 1174 void 1175 uihashinit() 1176 { 1177 1178 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 1179 mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF); 1180 } 1181 1182 /* 1183 * Look up a uidinfo struct for the parameter uid. 1184 * uihashtbl_mtx must be locked. 1185 */ 1186 static struct uidinfo * 1187 uilookup(uid) 1188 uid_t uid; 1189 { 1190 struct uihashhead *uipp; 1191 struct uidinfo *uip; 1192 1193 mtx_assert(&uihashtbl_mtx, MA_OWNED); 1194 uipp = UIHASH(uid); 1195 LIST_FOREACH(uip, uipp, ui_hash) 1196 if (uip->ui_uid == uid) 1197 break; 1198 1199 return (uip); 1200 } 1201 1202 /* 1203 * Find or allocate a struct uidinfo for a particular uid. 1204 * Increase refcount on uidinfo struct returned. 1205 * uifree() should be called on a struct uidinfo when released. 1206 */ 1207 struct uidinfo * 1208 uifind(uid) 1209 uid_t uid; 1210 { 1211 struct uidinfo *old_uip, *uip; 1212 1213 mtx_lock(&uihashtbl_mtx); 1214 uip = uilookup(uid); 1215 if (uip == NULL) { 1216 mtx_unlock(&uihashtbl_mtx); 1217 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); 1218 mtx_lock(&uihashtbl_mtx); 1219 /* 1220 * There's a chance someone created our uidinfo while we 1221 * were in malloc and not holding the lock, so we have to 1222 * make sure we don't insert a duplicate uidinfo. 1223 */ 1224 if ((old_uip = uilookup(uid)) != NULL) { 1225 /* Someone else beat us to it. */ 1226 free(uip, M_UIDINFO); 1227 uip = old_uip; 1228 } else { 1229 uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep); 1230 uip->ui_uid = uid; 1231 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 1232 } 1233 } 1234 uihold(uip); 1235 mtx_unlock(&uihashtbl_mtx); 1236 return (uip); 1237 } 1238 1239 /* 1240 * Place another refcount on a uidinfo struct. 1241 */ 1242 void 1243 uihold(uip) 1244 struct uidinfo *uip; 1245 { 1246 1247 UIDINFO_LOCK(uip); 1248 uip->ui_ref++; 1249 UIDINFO_UNLOCK(uip); 1250 } 1251 1252 /*- 1253 * Since uidinfo structs have a long lifetime, we use an 1254 * opportunistic refcounting scheme to avoid locking the lookup hash 1255 * for each release. 1256 * 1257 * If the refcount hits 0, we need to free the structure, 1258 * which means we need to lock the hash. 1259 * Optimal case: 1260 * After locking the struct and lowering the refcount, if we find 1261 * that we don't need to free, simply unlock and return. 1262 * Suboptimal case: 1263 * If refcount lowering results in need to free, bump the count 1264 * back up, lose the lock and acquire the locks in the proper 1265 * order to try again. 1266 */ 1267 void 1268 uifree(uip) 1269 struct uidinfo *uip; 1270 { 1271 1272 /* Prepare for optimal case. */ 1273 UIDINFO_LOCK(uip); 1274 1275 if (--uip->ui_ref != 0) { 1276 UIDINFO_UNLOCK(uip); 1277 return; 1278 } 1279 1280 /* Prepare for suboptimal case. */ 1281 uip->ui_ref++; 1282 UIDINFO_UNLOCK(uip); 1283 mtx_lock(&uihashtbl_mtx); 1284 UIDINFO_LOCK(uip); 1285 1286 /* 1287 * We must subtract one from the count again because we backed out 1288 * our initial subtraction before dropping the lock. 1289 * Since another thread may have added a reference after we dropped the 1290 * initial lock we have to test for zero again. 1291 */ 1292 if (--uip->ui_ref == 0) { 1293 LIST_REMOVE(uip, ui_hash); 1294 mtx_unlock(&uihashtbl_mtx); 1295 if (uip->ui_sbsize != 0) 1296 printf("freeing uidinfo: uid = %d, sbsize = %jd\n", 1297 uip->ui_uid, (intmax_t)uip->ui_sbsize); 1298 if (uip->ui_proccnt != 0) 1299 printf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1300 uip->ui_uid, uip->ui_proccnt); 1301 UIDINFO_UNLOCK(uip); 1302 FREE(uip, M_UIDINFO); 1303 return; 1304 } 1305 1306 mtx_unlock(&uihashtbl_mtx); 1307 UIDINFO_UNLOCK(uip); 1308 } 1309 1310 /* 1311 * Change the count associated with number of processes 1312 * a given user is using. When 'max' is 0, don't enforce a limit 1313 */ 1314 int 1315 chgproccnt(uip, diff, max) 1316 struct uidinfo *uip; 1317 int diff; 1318 int max; 1319 { 1320 1321 UIDINFO_LOCK(uip); 1322 /* Don't allow them to exceed max, but allow subtraction. */ 1323 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { 1324 UIDINFO_UNLOCK(uip); 1325 return (0); 1326 } 1327 uip->ui_proccnt += diff; 1328 if (uip->ui_proccnt < 0) 1329 printf("negative proccnt for uid = %d\n", uip->ui_uid); 1330 UIDINFO_UNLOCK(uip); 1331 return (1); 1332 } 1333 1334 /* 1335 * Change the total socket buffer size a user has used. 1336 */ 1337 int 1338 chgsbsize(uip, hiwat, to, max) 1339 struct uidinfo *uip; 1340 u_int *hiwat; 1341 u_int to; 1342 rlim_t max; 1343 { 1344 rlim_t new; 1345 1346 UIDINFO_LOCK(uip); 1347 new = uip->ui_sbsize + to - *hiwat; 1348 /* Don't allow them to exceed max, but allow subtraction. */ 1349 if (to > *hiwat && new > max) { 1350 UIDINFO_UNLOCK(uip); 1351 return (0); 1352 } 1353 uip->ui_sbsize = new; 1354 UIDINFO_UNLOCK(uip); 1355 *hiwat = to; 1356 if (new < 0) 1357 printf("negative sbsize for uid = %d\n", uip->ui_uid); 1358 return (1); 1359 } 1360