1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/sysproto.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mutex.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/refcount.h> 53 #include <sys/resourcevar.h> 54 #include <sys/sched.h> 55 #include <sys/sx.h> 56 #include <sys/syscallsubr.h> 57 #include <sys/sysent.h> 58 #include <sys/time.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_param.h> 62 #include <vm/pmap.h> 63 #include <vm/vm_map.h> 64 65 66 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 67 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 68 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 69 static struct mtx uihashtbl_mtx; 70 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 71 static u_long uihash; /* size of hash table - 1 */ 72 73 static void calcru1(struct proc *p, struct rusage_ext *ruxp, 74 struct timeval *up, struct timeval *sp); 75 static int donice(struct thread *td, struct proc *chgp, int n); 76 static struct uidinfo *uilookup(uid_t uid); 77 78 /* 79 * Resource controls and accounting. 80 */ 81 #ifndef _SYS_SYSPROTO_H_ 82 struct getpriority_args { 83 int which; 84 int who; 85 }; 86 #endif 87 int 88 getpriority(td, uap) 89 struct thread *td; 90 register struct getpriority_args *uap; 91 { 92 struct proc *p; 93 struct pgrp *pg; 94 int error, low; 95 96 error = 0; 97 low = PRIO_MAX + 1; 98 switch (uap->which) { 99 100 case PRIO_PROCESS: 101 if (uap->who == 0) 102 low = td->td_proc->p_nice; 103 else { 104 p = pfind(uap->who); 105 if (p == NULL) 106 break; 107 if (p_cansee(td, p) == 0) 108 low = p->p_nice; 109 PROC_UNLOCK(p); 110 } 111 break; 112 113 case PRIO_PGRP: 114 sx_slock(&proctree_lock); 115 if (uap->who == 0) { 116 pg = td->td_proc->p_pgrp; 117 PGRP_LOCK(pg); 118 } else { 119 pg = pgfind(uap->who); 120 if (pg == NULL) { 121 sx_sunlock(&proctree_lock); 122 break; 123 } 124 } 125 sx_sunlock(&proctree_lock); 126 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 127 PROC_LOCK(p); 128 if (!p_cansee(td, p)) { 129 if (p->p_nice < low) 130 low = p->p_nice; 131 } 132 PROC_UNLOCK(p); 133 } 134 PGRP_UNLOCK(pg); 135 break; 136 137 case PRIO_USER: 138 if (uap->who == 0) 139 uap->who = td->td_ucred->cr_uid; 140 sx_slock(&allproc_lock); 141 FOREACH_PROC_IN_SYSTEM(p) { 142 /* Do not bother to check PRS_NEW processes */ 143 if (p->p_state == PRS_NEW) 144 continue; 145 PROC_LOCK(p); 146 if (!p_cansee(td, p) && 147 p->p_ucred->cr_uid == uap->who) { 148 if (p->p_nice < low) 149 low = p->p_nice; 150 } 151 PROC_UNLOCK(p); 152 } 153 sx_sunlock(&allproc_lock); 154 break; 155 156 default: 157 error = EINVAL; 158 break; 159 } 160 if (low == PRIO_MAX + 1 && error == 0) 161 error = ESRCH; 162 td->td_retval[0] = low; 163 return (error); 164 } 165 166 #ifndef _SYS_SYSPROTO_H_ 167 struct setpriority_args { 168 int which; 169 int who; 170 int prio; 171 }; 172 #endif 173 int 174 setpriority(td, uap) 175 struct thread *td; 176 struct setpriority_args *uap; 177 { 178 struct proc *curp, *p; 179 struct pgrp *pg; 180 int found = 0, error = 0; 181 182 curp = td->td_proc; 183 switch (uap->which) { 184 case PRIO_PROCESS: 185 if (uap->who == 0) { 186 PROC_LOCK(curp); 187 error = donice(td, curp, uap->prio); 188 PROC_UNLOCK(curp); 189 } else { 190 p = pfind(uap->who); 191 if (p == 0) 192 break; 193 if (p_cansee(td, p) == 0) 194 error = donice(td, p, uap->prio); 195 PROC_UNLOCK(p); 196 } 197 found++; 198 break; 199 200 case PRIO_PGRP: 201 sx_slock(&proctree_lock); 202 if (uap->who == 0) { 203 pg = curp->p_pgrp; 204 PGRP_LOCK(pg); 205 } else { 206 pg = pgfind(uap->who); 207 if (pg == NULL) { 208 sx_sunlock(&proctree_lock); 209 break; 210 } 211 } 212 sx_sunlock(&proctree_lock); 213 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 214 PROC_LOCK(p); 215 if (!p_cansee(td, p)) { 216 error = donice(td, p, uap->prio); 217 found++; 218 } 219 PROC_UNLOCK(p); 220 } 221 PGRP_UNLOCK(pg); 222 break; 223 224 case PRIO_USER: 225 if (uap->who == 0) 226 uap->who = td->td_ucred->cr_uid; 227 sx_slock(&allproc_lock); 228 FOREACH_PROC_IN_SYSTEM(p) { 229 PROC_LOCK(p); 230 if (p->p_ucred->cr_uid == uap->who && 231 !p_cansee(td, p)) { 232 error = donice(td, p, uap->prio); 233 found++; 234 } 235 PROC_UNLOCK(p); 236 } 237 sx_sunlock(&allproc_lock); 238 break; 239 240 default: 241 error = EINVAL; 242 break; 243 } 244 if (found == 0 && error == 0) 245 error = ESRCH; 246 return (error); 247 } 248 249 /* 250 * Set "nice" for a (whole) process. 251 */ 252 static int 253 donice(struct thread *td, struct proc *p, int n) 254 { 255 int error; 256 257 PROC_LOCK_ASSERT(p, MA_OWNED); 258 if ((error = p_cansched(td, p))) 259 return (error); 260 if (n > PRIO_MAX) 261 n = PRIO_MAX; 262 if (n < PRIO_MIN) 263 n = PRIO_MIN; 264 if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) 265 return (EACCES); 266 PROC_SLOCK(p); 267 sched_nice(p, n); 268 PROC_SUNLOCK(p); 269 return (0); 270 } 271 272 /* 273 * Set realtime priority for LWP. 274 */ 275 #ifndef _SYS_SYSPROTO_H_ 276 struct rtprio_thread_args { 277 int function; 278 lwpid_t lwpid; 279 struct rtprio *rtp; 280 }; 281 #endif 282 int 283 rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) 284 { 285 struct proc *curp; 286 struct proc *p; 287 struct rtprio rtp; 288 struct thread *td1; 289 int cierror, error; 290 291 /* Perform copyin before acquiring locks if needed. */ 292 if (uap->function == RTP_SET) 293 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 294 else 295 cierror = 0; 296 297 curp = td->td_proc; 298 /* 299 * Though lwpid is unique, only current process is supported 300 * since there is no efficient way to look up a LWP yet. 301 */ 302 p = curp; 303 PROC_LOCK(p); 304 305 switch (uap->function) { 306 case RTP_LOOKUP: 307 if ((error = p_cansee(td, p))) 308 break; 309 PROC_SLOCK(p); 310 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 311 td1 = td; 312 else 313 td1 = thread_find(p, uap->lwpid); 314 if (td1 != NULL) 315 pri_to_rtp(td1, &rtp); 316 else 317 error = ESRCH; 318 PROC_SUNLOCK(p); 319 PROC_UNLOCK(p); 320 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 321 case RTP_SET: 322 if ((error = p_cansched(td, p)) || (error = cierror)) 323 break; 324 325 /* Disallow setting rtprio in most cases if not superuser. */ 326 if (priv_check(td, PRIV_SCHED_RTPRIO) != 0) { 327 /* can't set realtime priority */ 328 /* 329 * Realtime priority has to be restricted for reasons which should be 330 * obvious. However, for idle priority, there is a potential for 331 * system deadlock if an idleprio process gains a lock on a resource 332 * that other processes need (and the idleprio process can't run 333 * due to a CPU-bound normal process). Fix me! XXX 334 */ 335 #if 0 336 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 337 #else 338 if (rtp.type != RTP_PRIO_NORMAL) { 339 #endif 340 error = EPERM; 341 break; 342 } 343 } 344 345 PROC_SLOCK(p); 346 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 347 td1 = td; 348 else 349 td1 = thread_find(p, uap->lwpid); 350 if (td1 != NULL) 351 error = rtp_to_pri(&rtp, td1); 352 else 353 error = ESRCH; 354 PROC_SUNLOCK(p); 355 break; 356 default: 357 error = EINVAL; 358 break; 359 } 360 PROC_UNLOCK(p); 361 return (error); 362 } 363 364 /* 365 * Set realtime priority. 366 */ 367 #ifndef _SYS_SYSPROTO_H_ 368 struct rtprio_args { 369 int function; 370 pid_t pid; 371 struct rtprio *rtp; 372 }; 373 #endif 374 int 375 rtprio(td, uap) 376 struct thread *td; /* curthread */ 377 register struct rtprio_args *uap; 378 { 379 struct proc *curp; 380 struct proc *p; 381 struct thread *tdp; 382 struct rtprio rtp; 383 int cierror, error; 384 385 /* Perform copyin before acquiring locks if needed. */ 386 if (uap->function == RTP_SET) 387 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 388 else 389 cierror = 0; 390 391 curp = td->td_proc; 392 if (uap->pid == 0) { 393 p = curp; 394 PROC_LOCK(p); 395 } else { 396 p = pfind(uap->pid); 397 if (p == NULL) 398 return (ESRCH); 399 } 400 401 switch (uap->function) { 402 case RTP_LOOKUP: 403 if ((error = p_cansee(td, p))) 404 break; 405 PROC_SLOCK(p); 406 /* 407 * Return OUR priority if no pid specified, 408 * or if one is, report the highest priority 409 * in the process. There isn't much more you can do as 410 * there is only room to return a single priority. 411 * XXXKSE: maybe need a new interface to report 412 * priorities of multiple system scope threads. 413 * Note: specifying our own pid is not the same 414 * as leaving it zero. 415 */ 416 if (uap->pid == 0) { 417 pri_to_rtp(td, &rtp); 418 } else { 419 struct rtprio rtp2; 420 421 rtp.type = RTP_PRIO_IDLE; 422 rtp.prio = RTP_PRIO_MAX; 423 FOREACH_THREAD_IN_PROC(p, tdp) { 424 pri_to_rtp(tdp, &rtp2); 425 if (rtp2.type < rtp.type || 426 (rtp2.type == rtp.type && 427 rtp2.prio < rtp.prio)) { 428 rtp.type = rtp2.type; 429 rtp.prio = rtp2.prio; 430 } 431 } 432 } 433 PROC_SUNLOCK(p); 434 PROC_UNLOCK(p); 435 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 436 case RTP_SET: 437 if ((error = p_cansched(td, p)) || (error = cierror)) 438 break; 439 440 /* Disallow setting rtprio in most cases if not superuser. */ 441 if (priv_check(td, PRIV_SCHED_RTPRIO) != 0) { 442 /* can't set someone else's */ 443 if (uap->pid) { 444 error = EPERM; 445 break; 446 } 447 /* can't set realtime priority */ 448 /* 449 * Realtime priority has to be restricted for reasons which should be 450 * obvious. However, for idle priority, there is a potential for 451 * system deadlock if an idleprio process gains a lock on a resource 452 * that other processes need (and the idleprio process can't run 453 * due to a CPU-bound normal process). Fix me! XXX 454 */ 455 #if 0 456 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 457 #else 458 if (rtp.type != RTP_PRIO_NORMAL) { 459 #endif 460 error = EPERM; 461 break; 462 } 463 } 464 465 /* 466 * If we are setting our own priority, set just our 467 * thread but if we are doing another process, 468 * do all the threads on that process. If we 469 * specify our own pid we do the latter. 470 */ 471 PROC_SLOCK(p); 472 if (uap->pid == 0) { 473 error = rtp_to_pri(&rtp, td); 474 } else { 475 FOREACH_THREAD_IN_PROC(p, td) { 476 if ((error = rtp_to_pri(&rtp, td)) != 0) 477 break; 478 } 479 } 480 PROC_SUNLOCK(p); 481 break; 482 default: 483 error = EINVAL; 484 break; 485 } 486 PROC_UNLOCK(p); 487 return (error); 488 } 489 490 int 491 rtp_to_pri(struct rtprio *rtp, struct thread *td) 492 { 493 u_char newpri; 494 495 if (rtp->prio > RTP_PRIO_MAX) 496 return (EINVAL); 497 thread_lock(td); 498 switch (RTP_PRIO_BASE(rtp->type)) { 499 case RTP_PRIO_REALTIME: 500 newpri = PRI_MIN_REALTIME + rtp->prio; 501 break; 502 case RTP_PRIO_NORMAL: 503 newpri = PRI_MIN_TIMESHARE + rtp->prio; 504 break; 505 case RTP_PRIO_IDLE: 506 newpri = PRI_MIN_IDLE + rtp->prio; 507 break; 508 default: 509 thread_unlock(td); 510 return (EINVAL); 511 } 512 sched_class(td, rtp->type); /* XXX fix */ 513 sched_user_prio(td, newpri); 514 if (curthread == td) 515 sched_prio(curthread, td->td_user_pri); /* XXX dubious */ 516 thread_unlock(td); 517 return (0); 518 } 519 520 void 521 pri_to_rtp(struct thread *td, struct rtprio *rtp) 522 { 523 524 thread_lock(td); 525 switch (PRI_BASE(td->td_pri_class)) { 526 case PRI_REALTIME: 527 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; 528 break; 529 case PRI_TIMESHARE: 530 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; 531 break; 532 case PRI_IDLE: 533 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; 534 break; 535 default: 536 break; 537 } 538 rtp->type = td->td_pri_class; 539 thread_unlock(td); 540 } 541 542 #if defined(COMPAT_43) 543 #ifndef _SYS_SYSPROTO_H_ 544 struct osetrlimit_args { 545 u_int which; 546 struct orlimit *rlp; 547 }; 548 #endif 549 int 550 osetrlimit(td, uap) 551 struct thread *td; 552 register struct osetrlimit_args *uap; 553 { 554 struct orlimit olim; 555 struct rlimit lim; 556 int error; 557 558 if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) 559 return (error); 560 lim.rlim_cur = olim.rlim_cur; 561 lim.rlim_max = olim.rlim_max; 562 error = kern_setrlimit(td, uap->which, &lim); 563 return (error); 564 } 565 566 #ifndef _SYS_SYSPROTO_H_ 567 struct ogetrlimit_args { 568 u_int which; 569 struct orlimit *rlp; 570 }; 571 #endif 572 int 573 ogetrlimit(td, uap) 574 struct thread *td; 575 register struct ogetrlimit_args *uap; 576 { 577 struct orlimit olim; 578 struct rlimit rl; 579 struct proc *p; 580 int error; 581 582 if (uap->which >= RLIM_NLIMITS) 583 return (EINVAL); 584 p = td->td_proc; 585 PROC_LOCK(p); 586 lim_rlimit(p, uap->which, &rl); 587 PROC_UNLOCK(p); 588 589 /* 590 * XXX would be more correct to convert only RLIM_INFINITY to the 591 * old RLIM_INFINITY and fail with EOVERFLOW for other larger 592 * values. Most 64->32 and 32->16 conversions, including not 593 * unimportant ones of uids are even more broken than what we 594 * do here (they blindly truncate). We don't do this correctly 595 * here since we have little experience with EOVERFLOW yet. 596 * Elsewhere, getuid() can't fail... 597 */ 598 olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; 599 olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; 600 error = copyout(&olim, uap->rlp, sizeof(olim)); 601 return (error); 602 } 603 #endif /* COMPAT_43 */ 604 605 #ifndef _SYS_SYSPROTO_H_ 606 struct __setrlimit_args { 607 u_int which; 608 struct rlimit *rlp; 609 }; 610 #endif 611 int 612 setrlimit(td, uap) 613 struct thread *td; 614 register struct __setrlimit_args *uap; 615 { 616 struct rlimit alim; 617 int error; 618 619 if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) 620 return (error); 621 error = kern_setrlimit(td, uap->which, &alim); 622 return (error); 623 } 624 625 static void 626 lim_cb(void *arg) 627 { 628 struct rlimit rlim; 629 struct thread *td; 630 struct proc *p; 631 632 p = arg; 633 PROC_LOCK_ASSERT(p, MA_OWNED); 634 /* 635 * Check if the process exceeds its cpu resource allocation. If 636 * it reaches the max, arrange to kill the process in ast(). 637 */ 638 if (p->p_cpulimit == RLIM_INFINITY) 639 return; 640 PROC_SLOCK(p); 641 FOREACH_THREAD_IN_PROC(p, td) { 642 thread_lock(td); 643 ruxagg(&p->p_rux, td); 644 thread_unlock(td); 645 } 646 PROC_SUNLOCK(p); 647 if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { 648 lim_rlimit(p, RLIMIT_CPU, &rlim); 649 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { 650 killproc(p, "exceeded maximum CPU limit"); 651 } else { 652 if (p->p_cpulimit < rlim.rlim_max) 653 p->p_cpulimit += 5; 654 psignal(p, SIGXCPU); 655 } 656 } 657 callout_reset(&p->p_limco, hz, lim_cb, p); 658 } 659 660 int 661 kern_setrlimit(td, which, limp) 662 struct thread *td; 663 u_int which; 664 struct rlimit *limp; 665 { 666 struct plimit *newlim, *oldlim; 667 struct proc *p; 668 register struct rlimit *alimp; 669 rlim_t oldssiz; 670 int error; 671 672 if (which >= RLIM_NLIMITS) 673 return (EINVAL); 674 675 /* 676 * Preserve historical bugs by treating negative limits as unsigned. 677 */ 678 if (limp->rlim_cur < 0) 679 limp->rlim_cur = RLIM_INFINITY; 680 if (limp->rlim_max < 0) 681 limp->rlim_max = RLIM_INFINITY; 682 683 oldssiz = 0; 684 p = td->td_proc; 685 newlim = lim_alloc(); 686 PROC_LOCK(p); 687 oldlim = p->p_limit; 688 alimp = &oldlim->pl_rlimit[which]; 689 if (limp->rlim_cur > alimp->rlim_max || 690 limp->rlim_max > alimp->rlim_max) 691 if ((error = priv_check_cred(td->td_ucred, 692 PRIV_PROC_SETRLIMIT, SUSER_ALLOWJAIL))) { 693 PROC_UNLOCK(p); 694 lim_free(newlim); 695 return (error); 696 } 697 if (limp->rlim_cur > limp->rlim_max) 698 limp->rlim_cur = limp->rlim_max; 699 lim_copy(newlim, oldlim); 700 alimp = &newlim->pl_rlimit[which]; 701 702 switch (which) { 703 704 case RLIMIT_CPU: 705 if (limp->rlim_cur != RLIM_INFINITY && 706 p->p_cpulimit == RLIM_INFINITY) 707 callout_reset(&p->p_limco, hz, lim_cb, p); 708 PROC_SLOCK(p); 709 p->p_cpulimit = limp->rlim_cur; 710 PROC_SUNLOCK(p); 711 break; 712 case RLIMIT_DATA: 713 if (limp->rlim_cur > maxdsiz) 714 limp->rlim_cur = maxdsiz; 715 if (limp->rlim_max > maxdsiz) 716 limp->rlim_max = maxdsiz; 717 break; 718 719 case RLIMIT_STACK: 720 if (limp->rlim_cur > maxssiz) 721 limp->rlim_cur = maxssiz; 722 if (limp->rlim_max > maxssiz) 723 limp->rlim_max = maxssiz; 724 oldssiz = alimp->rlim_cur; 725 break; 726 727 case RLIMIT_NOFILE: 728 if (limp->rlim_cur > maxfilesperproc) 729 limp->rlim_cur = maxfilesperproc; 730 if (limp->rlim_max > maxfilesperproc) 731 limp->rlim_max = maxfilesperproc; 732 break; 733 734 case RLIMIT_NPROC: 735 if (limp->rlim_cur > maxprocperuid) 736 limp->rlim_cur = maxprocperuid; 737 if (limp->rlim_max > maxprocperuid) 738 limp->rlim_max = maxprocperuid; 739 if (limp->rlim_cur < 1) 740 limp->rlim_cur = 1; 741 if (limp->rlim_max < 1) 742 limp->rlim_max = 1; 743 break; 744 } 745 if (td->td_proc->p_sysent->sv_fixlimit != NULL) 746 td->td_proc->p_sysent->sv_fixlimit(limp, which); 747 *alimp = *limp; 748 p->p_limit = newlim; 749 PROC_UNLOCK(p); 750 lim_free(oldlim); 751 752 if (which == RLIMIT_STACK) { 753 /* 754 * Stack is allocated to the max at exec time with only 755 * "rlim_cur" bytes accessible. If stack limit is going 756 * up make more accessible, if going down make inaccessible. 757 */ 758 if (limp->rlim_cur != oldssiz) { 759 vm_offset_t addr; 760 vm_size_t size; 761 vm_prot_t prot; 762 763 if (limp->rlim_cur > oldssiz) { 764 prot = p->p_sysent->sv_stackprot; 765 size = limp->rlim_cur - oldssiz; 766 addr = p->p_sysent->sv_usrstack - 767 limp->rlim_cur; 768 } else { 769 prot = VM_PROT_NONE; 770 size = oldssiz - limp->rlim_cur; 771 addr = p->p_sysent->sv_usrstack - oldssiz; 772 } 773 addr = trunc_page(addr); 774 size = round_page(size); 775 (void)vm_map_protect(&p->p_vmspace->vm_map, 776 addr, addr + size, prot, FALSE); 777 } 778 } 779 780 return (0); 781 } 782 783 #ifndef _SYS_SYSPROTO_H_ 784 struct __getrlimit_args { 785 u_int which; 786 struct rlimit *rlp; 787 }; 788 #endif 789 /* ARGSUSED */ 790 int 791 getrlimit(td, uap) 792 struct thread *td; 793 register struct __getrlimit_args *uap; 794 { 795 struct rlimit rlim; 796 struct proc *p; 797 int error; 798 799 if (uap->which >= RLIM_NLIMITS) 800 return (EINVAL); 801 p = td->td_proc; 802 PROC_LOCK(p); 803 lim_rlimit(p, uap->which, &rlim); 804 PROC_UNLOCK(p); 805 error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); 806 return (error); 807 } 808 809 /* 810 * Transform the running time and tick information for children of proc p 811 * into user and system time usage. 812 */ 813 void 814 calccru(p, up, sp) 815 struct proc *p; 816 struct timeval *up; 817 struct timeval *sp; 818 { 819 820 PROC_LOCK_ASSERT(p, MA_OWNED); 821 calcru1(p, &p->p_crux, up, sp); 822 } 823 824 /* 825 * Transform the running time and tick information in proc p into user 826 * and system time usage. If appropriate, include the current time slice 827 * on this CPU. 828 */ 829 void 830 calcru(struct proc *p, struct timeval *up, struct timeval *sp) 831 { 832 struct rusage_ext rux; 833 struct thread *td; 834 uint64_t u; 835 836 PROC_LOCK_ASSERT(p, MA_OWNED); 837 PROC_SLOCK(p); 838 /* 839 * If we are getting stats for the current process, then add in the 840 * stats that this thread has accumulated in its current time slice. 841 * We reset the thread and CPU state as if we had performed a context 842 * switch right here. 843 */ 844 td = curthread; 845 if (td->td_proc == p) { 846 u = cpu_ticks(); 847 p->p_rux.rux_runtime += u - PCPU_GET(switchtime); 848 PCPU_SET(switchtime, u); 849 } 850 /* Work on a copy of p_rux so we can let go of p_slock */ 851 rux = p->p_rux; 852 PROC_SUNLOCK(p); 853 calcru1(p, &rux, up, sp); 854 /* Update the result from the p_rux copy */ 855 p->p_rux.rux_uu = rux.rux_uu; 856 p->p_rux.rux_su = rux.rux_su; 857 p->p_rux.rux_tu = rux.rux_tu; 858 } 859 860 static void 861 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, 862 struct timeval *sp) 863 { 864 /* {user, system, interrupt, total} {ticks, usec}: */ 865 u_int64_t ut, uu, st, su, it, tt, tu; 866 867 ut = ruxp->rux_uticks; 868 st = ruxp->rux_sticks; 869 it = ruxp->rux_iticks; 870 tt = ut + st + it; 871 if (tt == 0) { 872 /* Avoid divide by zero */ 873 st = 1; 874 tt = 1; 875 } 876 tu = cputick2usec(ruxp->rux_runtime); 877 if ((int64_t)tu < 0) { 878 /* XXX: this should be an assert /phk */ 879 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", 880 (intmax_t)tu, p->p_pid, p->p_comm); 881 tu = ruxp->rux_tu; 882 } 883 884 if (tu >= ruxp->rux_tu) { 885 /* 886 * The normal case, time increased. 887 * Enforce monotonicity of bucketed numbers. 888 */ 889 uu = (tu * ut) / tt; 890 if (uu < ruxp->rux_uu) 891 uu = ruxp->rux_uu; 892 su = (tu * st) / tt; 893 if (su < ruxp->rux_su) 894 su = ruxp->rux_su; 895 } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { 896 /* 897 * When we calibrate the cputicker, it is not uncommon to 898 * see the presumably fixed frequency increase slightly over 899 * time as a result of thermal stabilization and NTP 900 * discipline (of the reference clock). We therefore ignore 901 * a bit of backwards slop because we expect to catch up 902 * shortly. We use a 3 microsecond limit to catch low 903 * counts and a 1% limit for high counts. 904 */ 905 uu = ruxp->rux_uu; 906 su = ruxp->rux_su; 907 tu = ruxp->rux_tu; 908 } else { /* tu < ruxp->rux_tu */ 909 /* 910 * What happene here was likely that a laptop, which ran at 911 * a reduced clock frequency at boot, kicked into high gear. 912 * The wisdom of spamming this message in that case is 913 * dubious, but it might also be indicative of something 914 * serious, so lets keep it and hope laptops can be made 915 * more truthful about their CPU speed via ACPI. 916 */ 917 printf("calcru: runtime went backwards from %ju usec " 918 "to %ju usec for pid %d (%s)\n", 919 (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, 920 p->p_pid, p->p_comm); 921 uu = (tu * ut) / tt; 922 su = (tu * st) / tt; 923 } 924 925 ruxp->rux_uu = uu; 926 ruxp->rux_su = su; 927 ruxp->rux_tu = tu; 928 929 up->tv_sec = uu / 1000000; 930 up->tv_usec = uu % 1000000; 931 sp->tv_sec = su / 1000000; 932 sp->tv_usec = su % 1000000; 933 } 934 935 #ifndef _SYS_SYSPROTO_H_ 936 struct getrusage_args { 937 int who; 938 struct rusage *rusage; 939 }; 940 #endif 941 int 942 getrusage(td, uap) 943 register struct thread *td; 944 register struct getrusage_args *uap; 945 { 946 struct rusage ru; 947 int error; 948 949 error = kern_getrusage(td, uap->who, &ru); 950 if (error == 0) 951 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 952 return (error); 953 } 954 955 int 956 kern_getrusage(td, who, rup) 957 struct thread *td; 958 int who; 959 struct rusage *rup; 960 { 961 struct proc *p; 962 963 p = td->td_proc; 964 PROC_LOCK(p); 965 switch (who) { 966 967 case RUSAGE_SELF: 968 rufetch(p, rup); 969 calcru(p, &rup->ru_utime, &rup->ru_stime); 970 break; 971 972 case RUSAGE_CHILDREN: 973 *rup = p->p_stats->p_cru; 974 calccru(p, &rup->ru_utime, &rup->ru_stime); 975 break; 976 977 default: 978 PROC_UNLOCK(p); 979 return (EINVAL); 980 } 981 PROC_UNLOCK(p); 982 return (0); 983 } 984 985 void 986 rucollect(struct rusage *ru, struct rusage *ru2) 987 { 988 long *ip, *ip2; 989 int i; 990 991 if (ru->ru_maxrss < ru2->ru_maxrss) 992 ru->ru_maxrss = ru2->ru_maxrss; 993 ip = &ru->ru_first; 994 ip2 = &ru2->ru_first; 995 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 996 *ip++ += *ip2++; 997 } 998 999 void 1000 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, 1001 struct rusage_ext *rux2) 1002 { 1003 1004 rux->rux_runtime += rux2->rux_runtime; 1005 rux->rux_uticks += rux2->rux_uticks; 1006 rux->rux_sticks += rux2->rux_sticks; 1007 rux->rux_iticks += rux2->rux_iticks; 1008 rux->rux_uu += rux2->rux_uu; 1009 rux->rux_su += rux2->rux_su; 1010 rux->rux_tu += rux2->rux_tu; 1011 rucollect(ru, ru2); 1012 } 1013 1014 /* 1015 * Aggregate tick counts into the proc's rusage_ext. 1016 */ 1017 void 1018 ruxagg(struct rusage_ext *rux, struct thread *td) 1019 { 1020 1021 THREAD_LOCK_ASSERT(td, MA_OWNED); 1022 PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED); 1023 rux->rux_runtime += td->td_runtime; 1024 rux->rux_uticks += td->td_uticks; 1025 rux->rux_sticks += td->td_sticks; 1026 rux->rux_iticks += td->td_iticks; 1027 td->td_runtime = 0; 1028 td->td_uticks = 0; 1029 td->td_iticks = 0; 1030 td->td_sticks = 0; 1031 } 1032 1033 /* 1034 * Update the rusage_ext structure and fetch a valid aggregate rusage 1035 * for proc p if storage for one is supplied. 1036 */ 1037 void 1038 rufetch(struct proc *p, struct rusage *ru) 1039 { 1040 struct thread *td; 1041 1042 memset(ru, 0, sizeof(*ru)); 1043 PROC_SLOCK(p); 1044 if (p->p_ru == NULL) { 1045 KASSERT(p->p_numthreads > 0, 1046 ("rufetch: No threads or ru in proc %p", p)); 1047 FOREACH_THREAD_IN_PROC(p, td) { 1048 thread_lock(td); 1049 ruxagg(&p->p_rux, td); 1050 thread_unlock(td); 1051 rucollect(ru, &td->td_ru); 1052 } 1053 } else 1054 *ru = *p->p_ru; 1055 PROC_SUNLOCK(p); 1056 } 1057 1058 /* 1059 * Allocate a new resource limits structure and initialize its 1060 * reference count and mutex pointer. 1061 */ 1062 struct plimit * 1063 lim_alloc() 1064 { 1065 struct plimit *limp; 1066 1067 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); 1068 refcount_init(&limp->pl_refcnt, 1); 1069 return (limp); 1070 } 1071 1072 struct plimit * 1073 lim_hold(limp) 1074 struct plimit *limp; 1075 { 1076 1077 refcount_acquire(&limp->pl_refcnt); 1078 return (limp); 1079 } 1080 1081 void 1082 lim_fork(struct proc *p1, struct proc *p2) 1083 { 1084 p2->p_limit = lim_hold(p1->p_limit); 1085 callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); 1086 if (p1->p_cpulimit != RLIM_INFINITY) 1087 callout_reset(&p2->p_limco, hz, lim_cb, p2); 1088 } 1089 1090 void 1091 lim_free(limp) 1092 struct plimit *limp; 1093 { 1094 1095 KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); 1096 if (refcount_release(&limp->pl_refcnt)) 1097 free((void *)limp, M_PLIMIT); 1098 } 1099 1100 /* 1101 * Make a copy of the plimit structure. 1102 * We share these structures copy-on-write after fork. 1103 */ 1104 void 1105 lim_copy(dst, src) 1106 struct plimit *dst, *src; 1107 { 1108 1109 KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); 1110 bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); 1111 } 1112 1113 /* 1114 * Return the hard limit for a particular system resource. The 1115 * which parameter specifies the index into the rlimit array. 1116 */ 1117 rlim_t 1118 lim_max(struct proc *p, int which) 1119 { 1120 struct rlimit rl; 1121 1122 lim_rlimit(p, which, &rl); 1123 return (rl.rlim_max); 1124 } 1125 1126 /* 1127 * Return the current (soft) limit for a particular system resource. 1128 * The which parameter which specifies the index into the rlimit array 1129 */ 1130 rlim_t 1131 lim_cur(struct proc *p, int which) 1132 { 1133 struct rlimit rl; 1134 1135 lim_rlimit(p, which, &rl); 1136 return (rl.rlim_cur); 1137 } 1138 1139 /* 1140 * Return a copy of the entire rlimit structure for the system limit 1141 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 1142 */ 1143 void 1144 lim_rlimit(struct proc *p, int which, struct rlimit *rlp) 1145 { 1146 1147 PROC_LOCK_ASSERT(p, MA_OWNED); 1148 KASSERT(which >= 0 && which < RLIM_NLIMITS, 1149 ("request for invalid resource limit")); 1150 *rlp = p->p_limit->pl_rlimit[which]; 1151 if (p->p_sysent->sv_fixlimit != NULL) 1152 p->p_sysent->sv_fixlimit(rlp, which); 1153 } 1154 1155 /* 1156 * Find the uidinfo structure for a uid. This structure is used to 1157 * track the total resource consumption (process count, socket buffer 1158 * size, etc.) for the uid and impose limits. 1159 */ 1160 void 1161 uihashinit() 1162 { 1163 1164 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 1165 mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF); 1166 } 1167 1168 /* 1169 * Look up a uidinfo struct for the parameter uid. 1170 * uihashtbl_mtx must be locked. 1171 */ 1172 static struct uidinfo * 1173 uilookup(uid) 1174 uid_t uid; 1175 { 1176 struct uihashhead *uipp; 1177 struct uidinfo *uip; 1178 1179 mtx_assert(&uihashtbl_mtx, MA_OWNED); 1180 uipp = UIHASH(uid); 1181 LIST_FOREACH(uip, uipp, ui_hash) 1182 if (uip->ui_uid == uid) 1183 break; 1184 1185 return (uip); 1186 } 1187 1188 /* 1189 * Find or allocate a struct uidinfo for a particular uid. 1190 * Increase refcount on uidinfo struct returned. 1191 * uifree() should be called on a struct uidinfo when released. 1192 */ 1193 struct uidinfo * 1194 uifind(uid) 1195 uid_t uid; 1196 { 1197 struct uidinfo *old_uip, *uip; 1198 1199 mtx_lock(&uihashtbl_mtx); 1200 uip = uilookup(uid); 1201 if (uip == NULL) { 1202 mtx_unlock(&uihashtbl_mtx); 1203 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); 1204 mtx_lock(&uihashtbl_mtx); 1205 /* 1206 * There's a chance someone created our uidinfo while we 1207 * were in malloc and not holding the lock, so we have to 1208 * make sure we don't insert a duplicate uidinfo. 1209 */ 1210 if ((old_uip = uilookup(uid)) != NULL) { 1211 /* Someone else beat us to it. */ 1212 free(uip, M_UIDINFO); 1213 uip = old_uip; 1214 } else { 1215 uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep); 1216 uip->ui_uid = uid; 1217 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 1218 } 1219 } 1220 uihold(uip); 1221 mtx_unlock(&uihashtbl_mtx); 1222 return (uip); 1223 } 1224 1225 /* 1226 * Place another refcount on a uidinfo struct. 1227 */ 1228 void 1229 uihold(uip) 1230 struct uidinfo *uip; 1231 { 1232 1233 UIDINFO_LOCK(uip); 1234 uip->ui_ref++; 1235 UIDINFO_UNLOCK(uip); 1236 } 1237 1238 /*- 1239 * Since uidinfo structs have a long lifetime, we use an 1240 * opportunistic refcounting scheme to avoid locking the lookup hash 1241 * for each release. 1242 * 1243 * If the refcount hits 0, we need to free the structure, 1244 * which means we need to lock the hash. 1245 * Optimal case: 1246 * After locking the struct and lowering the refcount, if we find 1247 * that we don't need to free, simply unlock and return. 1248 * Suboptimal case: 1249 * If refcount lowering results in need to free, bump the count 1250 * back up, lose the lock and acquire the locks in the proper 1251 * order to try again. 1252 */ 1253 void 1254 uifree(uip) 1255 struct uidinfo *uip; 1256 { 1257 1258 /* Prepare for optimal case. */ 1259 UIDINFO_LOCK(uip); 1260 1261 if (--uip->ui_ref != 0) { 1262 UIDINFO_UNLOCK(uip); 1263 return; 1264 } 1265 1266 /* Prepare for suboptimal case. */ 1267 uip->ui_ref++; 1268 UIDINFO_UNLOCK(uip); 1269 mtx_lock(&uihashtbl_mtx); 1270 UIDINFO_LOCK(uip); 1271 1272 /* 1273 * We must subtract one from the count again because we backed out 1274 * our initial subtraction before dropping the lock. 1275 * Since another thread may have added a reference after we dropped the 1276 * initial lock we have to test for zero again. 1277 */ 1278 if (--uip->ui_ref == 0) { 1279 LIST_REMOVE(uip, ui_hash); 1280 mtx_unlock(&uihashtbl_mtx); 1281 if (uip->ui_sbsize != 0) 1282 printf("freeing uidinfo: uid = %d, sbsize = %jd\n", 1283 uip->ui_uid, (intmax_t)uip->ui_sbsize); 1284 if (uip->ui_proccnt != 0) 1285 printf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1286 uip->ui_uid, uip->ui_proccnt); 1287 UIDINFO_UNLOCK(uip); 1288 FREE(uip, M_UIDINFO); 1289 return; 1290 } 1291 1292 mtx_unlock(&uihashtbl_mtx); 1293 UIDINFO_UNLOCK(uip); 1294 } 1295 1296 /* 1297 * Change the count associated with number of processes 1298 * a given user is using. When 'max' is 0, don't enforce a limit 1299 */ 1300 int 1301 chgproccnt(uip, diff, max) 1302 struct uidinfo *uip; 1303 int diff; 1304 int max; 1305 { 1306 1307 UIDINFO_LOCK(uip); 1308 /* Don't allow them to exceed max, but allow subtraction. */ 1309 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { 1310 UIDINFO_UNLOCK(uip); 1311 return (0); 1312 } 1313 uip->ui_proccnt += diff; 1314 if (uip->ui_proccnt < 0) 1315 printf("negative proccnt for uid = %d\n", uip->ui_uid); 1316 UIDINFO_UNLOCK(uip); 1317 return (1); 1318 } 1319 1320 /* 1321 * Change the total socket buffer size a user has used. 1322 */ 1323 int 1324 chgsbsize(uip, hiwat, to, max) 1325 struct uidinfo *uip; 1326 u_int *hiwat; 1327 u_int to; 1328 rlim_t max; 1329 { 1330 rlim_t new; 1331 1332 UIDINFO_LOCK(uip); 1333 new = uip->ui_sbsize + to - *hiwat; 1334 /* Don't allow them to exceed max, but allow subtraction. */ 1335 if (to > *hiwat && new > max) { 1336 UIDINFO_UNLOCK(uip); 1337 return (0); 1338 } 1339 uip->ui_sbsize = new; 1340 UIDINFO_UNLOCK(uip); 1341 *hiwat = to; 1342 if (new < 0) 1343 printf("negative sbsize for uid = %d\n", uip->ui_uid); 1344 return (1); 1345 } 1346