1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/sysproto.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mutex.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/refcount.h> 53 #include <sys/resourcevar.h> 54 #include <sys/rwlock.h> 55 #include <sys/sched.h> 56 #include <sys/sx.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysent.h> 59 #include <sys/time.h> 60 #include <sys/umtx.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <vm/pmap.h> 65 #include <vm/vm_map.h> 66 67 68 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 69 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 70 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 71 static struct rwlock uihashtbl_lock; 72 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 73 static u_long uihash; /* size of hash table - 1 */ 74 75 static void calcru1(struct proc *p, struct rusage_ext *ruxp, 76 struct timeval *up, struct timeval *sp); 77 static int donice(struct thread *td, struct proc *chgp, int n); 78 static struct uidinfo *uilookup(uid_t uid); 79 static void ruxagg_locked(struct rusage_ext *rux, struct thread *td); 80 81 /* 82 * Resource controls and accounting. 83 */ 84 #ifndef _SYS_SYSPROTO_H_ 85 struct getpriority_args { 86 int which; 87 int who; 88 }; 89 #endif 90 int 91 getpriority(td, uap) 92 struct thread *td; 93 register struct getpriority_args *uap; 94 { 95 struct proc *p; 96 struct pgrp *pg; 97 int error, low; 98 99 error = 0; 100 low = PRIO_MAX + 1; 101 switch (uap->which) { 102 103 case PRIO_PROCESS: 104 if (uap->who == 0) 105 low = td->td_proc->p_nice; 106 else { 107 p = pfind(uap->who); 108 if (p == NULL) 109 break; 110 if (p_cansee(td, p) == 0) 111 low = p->p_nice; 112 PROC_UNLOCK(p); 113 } 114 break; 115 116 case PRIO_PGRP: 117 sx_slock(&proctree_lock); 118 if (uap->who == 0) { 119 pg = td->td_proc->p_pgrp; 120 PGRP_LOCK(pg); 121 } else { 122 pg = pgfind(uap->who); 123 if (pg == NULL) { 124 sx_sunlock(&proctree_lock); 125 break; 126 } 127 } 128 sx_sunlock(&proctree_lock); 129 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 130 PROC_LOCK(p); 131 if (p_cansee(td, p) == 0) { 132 if (p->p_nice < low) 133 low = p->p_nice; 134 } 135 PROC_UNLOCK(p); 136 } 137 PGRP_UNLOCK(pg); 138 break; 139 140 case PRIO_USER: 141 if (uap->who == 0) 142 uap->who = td->td_ucred->cr_uid; 143 sx_slock(&allproc_lock); 144 FOREACH_PROC_IN_SYSTEM(p) { 145 /* Do not bother to check PRS_NEW processes */ 146 if (p->p_state == PRS_NEW) 147 continue; 148 PROC_LOCK(p); 149 if (p_cansee(td, p) == 0 && 150 p->p_ucred->cr_uid == uap->who) { 151 if (p->p_nice < low) 152 low = p->p_nice; 153 } 154 PROC_UNLOCK(p); 155 } 156 sx_sunlock(&allproc_lock); 157 break; 158 159 default: 160 error = EINVAL; 161 break; 162 } 163 if (low == PRIO_MAX + 1 && error == 0) 164 error = ESRCH; 165 td->td_retval[0] = low; 166 return (error); 167 } 168 169 #ifndef _SYS_SYSPROTO_H_ 170 struct setpriority_args { 171 int which; 172 int who; 173 int prio; 174 }; 175 #endif 176 int 177 setpriority(td, uap) 178 struct thread *td; 179 struct setpriority_args *uap; 180 { 181 struct proc *curp, *p; 182 struct pgrp *pg; 183 int found = 0, error = 0; 184 185 curp = td->td_proc; 186 switch (uap->which) { 187 case PRIO_PROCESS: 188 if (uap->who == 0) { 189 PROC_LOCK(curp); 190 error = donice(td, curp, uap->prio); 191 PROC_UNLOCK(curp); 192 } else { 193 p = pfind(uap->who); 194 if (p == NULL) 195 break; 196 error = p_cansee(td, p); 197 if (error == 0) 198 error = donice(td, p, uap->prio); 199 PROC_UNLOCK(p); 200 } 201 found++; 202 break; 203 204 case PRIO_PGRP: 205 sx_slock(&proctree_lock); 206 if (uap->who == 0) { 207 pg = curp->p_pgrp; 208 PGRP_LOCK(pg); 209 } else { 210 pg = pgfind(uap->who); 211 if (pg == NULL) { 212 sx_sunlock(&proctree_lock); 213 break; 214 } 215 } 216 sx_sunlock(&proctree_lock); 217 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 218 PROC_LOCK(p); 219 if (p_cansee(td, p) == 0) { 220 error = donice(td, p, uap->prio); 221 found++; 222 } 223 PROC_UNLOCK(p); 224 } 225 PGRP_UNLOCK(pg); 226 break; 227 228 case PRIO_USER: 229 if (uap->who == 0) 230 uap->who = td->td_ucred->cr_uid; 231 sx_slock(&allproc_lock); 232 FOREACH_PROC_IN_SYSTEM(p) { 233 PROC_LOCK(p); 234 if (p->p_ucred->cr_uid == uap->who && 235 p_cansee(td, p) == 0) { 236 error = donice(td, p, uap->prio); 237 found++; 238 } 239 PROC_UNLOCK(p); 240 } 241 sx_sunlock(&allproc_lock); 242 break; 243 244 default: 245 error = EINVAL; 246 break; 247 } 248 if (found == 0 && error == 0) 249 error = ESRCH; 250 return (error); 251 } 252 253 /* 254 * Set "nice" for a (whole) process. 255 */ 256 static int 257 donice(struct thread *td, struct proc *p, int n) 258 { 259 int error; 260 261 PROC_LOCK_ASSERT(p, MA_OWNED); 262 if ((error = p_cansched(td, p))) 263 return (error); 264 if (n > PRIO_MAX) 265 n = PRIO_MAX; 266 if (n < PRIO_MIN) 267 n = PRIO_MIN; 268 if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) 269 return (EACCES); 270 sched_nice(p, n); 271 return (0); 272 } 273 274 /* 275 * Set realtime priority for LWP. 276 */ 277 #ifndef _SYS_SYSPROTO_H_ 278 struct rtprio_thread_args { 279 int function; 280 lwpid_t lwpid; 281 struct rtprio *rtp; 282 }; 283 #endif 284 int 285 rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) 286 { 287 struct proc *p; 288 struct rtprio rtp; 289 struct thread *td1; 290 int cierror, error; 291 292 /* Perform copyin before acquiring locks if needed. */ 293 if (uap->function == RTP_SET) 294 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 295 else 296 cierror = 0; 297 298 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) { 299 p = td->td_proc; 300 td1 = td; 301 PROC_LOCK(p); 302 } else { 303 /* Only look up thread in current process */ 304 td1 = tdfind(uap->lwpid, curproc->p_pid); 305 if (td1 == NULL) 306 return (ESRCH); 307 p = td1->td_proc; 308 } 309 310 switch (uap->function) { 311 case RTP_LOOKUP: 312 if ((error = p_cansee(td, p))) 313 break; 314 pri_to_rtp(td1, &rtp); 315 PROC_UNLOCK(p); 316 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 317 case RTP_SET: 318 if ((error = p_cansched(td, p)) || (error = cierror)) 319 break; 320 321 /* Disallow setting rtprio in most cases if not superuser. */ 322 /* 323 * Realtime priority has to be restricted for reasons which should be 324 * obvious. However, for idle priority, there is a potential for 325 * system deadlock if an idleprio process gains a lock on a resource 326 * that other processes need (and the idleprio process can't run 327 * due to a CPU-bound normal process). Fix me! XXX 328 */ 329 #if 0 330 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 331 #else 332 if (rtp.type != RTP_PRIO_NORMAL) { 333 #endif 334 error = priv_check(td, PRIV_SCHED_RTPRIO); 335 if (error) 336 break; 337 } 338 error = rtp_to_pri(&rtp, td1); 339 break; 340 default: 341 error = EINVAL; 342 break; 343 } 344 PROC_UNLOCK(p); 345 return (error); 346 } 347 348 /* 349 * Set realtime priority. 350 */ 351 #ifndef _SYS_SYSPROTO_H_ 352 struct rtprio_args { 353 int function; 354 pid_t pid; 355 struct rtprio *rtp; 356 }; 357 #endif 358 int 359 rtprio(td, uap) 360 struct thread *td; /* curthread */ 361 register struct rtprio_args *uap; 362 { 363 struct proc *p; 364 struct thread *tdp; 365 struct rtprio rtp; 366 int cierror, error; 367 368 /* Perform copyin before acquiring locks if needed. */ 369 if (uap->function == RTP_SET) 370 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 371 else 372 cierror = 0; 373 374 if (uap->pid == 0) { 375 p = td->td_proc; 376 PROC_LOCK(p); 377 } else { 378 p = pfind(uap->pid); 379 if (p == NULL) 380 return (ESRCH); 381 } 382 383 switch (uap->function) { 384 case RTP_LOOKUP: 385 if ((error = p_cansee(td, p))) 386 break; 387 /* 388 * Return OUR priority if no pid specified, 389 * or if one is, report the highest priority 390 * in the process. There isn't much more you can do as 391 * there is only room to return a single priority. 392 * Note: specifying our own pid is not the same 393 * as leaving it zero. 394 */ 395 if (uap->pid == 0) { 396 pri_to_rtp(td, &rtp); 397 } else { 398 struct rtprio rtp2; 399 400 rtp.type = RTP_PRIO_IDLE; 401 rtp.prio = RTP_PRIO_MAX; 402 FOREACH_THREAD_IN_PROC(p, tdp) { 403 pri_to_rtp(tdp, &rtp2); 404 if (rtp2.type < rtp.type || 405 (rtp2.type == rtp.type && 406 rtp2.prio < rtp.prio)) { 407 rtp.type = rtp2.type; 408 rtp.prio = rtp2.prio; 409 } 410 } 411 } 412 PROC_UNLOCK(p); 413 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 414 case RTP_SET: 415 if ((error = p_cansched(td, p)) || (error = cierror)) 416 break; 417 418 /* Disallow setting rtprio in most cases if not superuser. */ 419 /* 420 * Realtime priority has to be restricted for reasons which should be 421 * obvious. However, for idle priority, there is a potential for 422 * system deadlock if an idleprio process gains a lock on a resource 423 * that other processes need (and the idleprio process can't run 424 * due to a CPU-bound normal process). Fix me! XXX 425 */ 426 #if 0 427 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 428 #else 429 if (rtp.type != RTP_PRIO_NORMAL) { 430 #endif 431 error = priv_check(td, PRIV_SCHED_RTPRIO); 432 if (error) 433 break; 434 } 435 436 /* 437 * If we are setting our own priority, set just our 438 * thread but if we are doing another process, 439 * do all the threads on that process. If we 440 * specify our own pid we do the latter. 441 */ 442 if (uap->pid == 0) { 443 error = rtp_to_pri(&rtp, td); 444 } else { 445 FOREACH_THREAD_IN_PROC(p, td) { 446 if ((error = rtp_to_pri(&rtp, td)) != 0) 447 break; 448 } 449 } 450 break; 451 default: 452 error = EINVAL; 453 break; 454 } 455 PROC_UNLOCK(p); 456 return (error); 457 } 458 459 int 460 rtp_to_pri(struct rtprio *rtp, struct thread *td) 461 { 462 u_char newpri; 463 u_char oldpri; 464 465 thread_lock(td); 466 switch (RTP_PRIO_BASE(rtp->type)) { 467 case RTP_PRIO_REALTIME: 468 if (rtp->prio > RTP_PRIO_MAX) { 469 thread_unlock(td); 470 return (EINVAL); 471 } 472 newpri = PRI_MIN_REALTIME + rtp->prio; 473 break; 474 case RTP_PRIO_NORMAL: 475 if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) { 476 thread_unlock(td); 477 return (EINVAL); 478 } 479 newpri = PRI_MIN_TIMESHARE + rtp->prio; 480 break; 481 case RTP_PRIO_IDLE: 482 newpri = PRI_MIN_IDLE + rtp->prio; 483 break; 484 default: 485 thread_unlock(td); 486 return (EINVAL); 487 } 488 sched_class(td, rtp->type); /* XXX fix */ 489 oldpri = td->td_user_pri; 490 sched_user_prio(td, newpri); 491 if (curthread == td) 492 sched_prio(curthread, td->td_user_pri); /* XXX dubious */ 493 if (TD_ON_UPILOCK(td) && oldpri != newpri) { 494 thread_unlock(td); 495 umtx_pi_adjust(td, oldpri); 496 } else 497 thread_unlock(td); 498 return (0); 499 } 500 501 void 502 pri_to_rtp(struct thread *td, struct rtprio *rtp) 503 { 504 505 thread_lock(td); 506 switch (PRI_BASE(td->td_pri_class)) { 507 case PRI_REALTIME: 508 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; 509 break; 510 case PRI_TIMESHARE: 511 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; 512 break; 513 case PRI_IDLE: 514 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; 515 break; 516 default: 517 break; 518 } 519 rtp->type = td->td_pri_class; 520 thread_unlock(td); 521 } 522 523 #if defined(COMPAT_43) 524 #ifndef _SYS_SYSPROTO_H_ 525 struct osetrlimit_args { 526 u_int which; 527 struct orlimit *rlp; 528 }; 529 #endif 530 int 531 osetrlimit(td, uap) 532 struct thread *td; 533 register struct osetrlimit_args *uap; 534 { 535 struct orlimit olim; 536 struct rlimit lim; 537 int error; 538 539 if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) 540 return (error); 541 lim.rlim_cur = olim.rlim_cur; 542 lim.rlim_max = olim.rlim_max; 543 error = kern_setrlimit(td, uap->which, &lim); 544 return (error); 545 } 546 547 #ifndef _SYS_SYSPROTO_H_ 548 struct ogetrlimit_args { 549 u_int which; 550 struct orlimit *rlp; 551 }; 552 #endif 553 int 554 ogetrlimit(td, uap) 555 struct thread *td; 556 register struct ogetrlimit_args *uap; 557 { 558 struct orlimit olim; 559 struct rlimit rl; 560 struct proc *p; 561 int error; 562 563 if (uap->which >= RLIM_NLIMITS) 564 return (EINVAL); 565 p = td->td_proc; 566 PROC_LOCK(p); 567 lim_rlimit(p, uap->which, &rl); 568 PROC_UNLOCK(p); 569 570 /* 571 * XXX would be more correct to convert only RLIM_INFINITY to the 572 * old RLIM_INFINITY and fail with EOVERFLOW for other larger 573 * values. Most 64->32 and 32->16 conversions, including not 574 * unimportant ones of uids are even more broken than what we 575 * do here (they blindly truncate). We don't do this correctly 576 * here since we have little experience with EOVERFLOW yet. 577 * Elsewhere, getuid() can't fail... 578 */ 579 olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; 580 olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; 581 error = copyout(&olim, uap->rlp, sizeof(olim)); 582 return (error); 583 } 584 #endif /* COMPAT_43 */ 585 586 #ifndef _SYS_SYSPROTO_H_ 587 struct __setrlimit_args { 588 u_int which; 589 struct rlimit *rlp; 590 }; 591 #endif 592 int 593 setrlimit(td, uap) 594 struct thread *td; 595 register struct __setrlimit_args *uap; 596 { 597 struct rlimit alim; 598 int error; 599 600 if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) 601 return (error); 602 error = kern_setrlimit(td, uap->which, &alim); 603 return (error); 604 } 605 606 static void 607 lim_cb(void *arg) 608 { 609 struct rlimit rlim; 610 struct thread *td; 611 struct proc *p; 612 613 p = arg; 614 PROC_LOCK_ASSERT(p, MA_OWNED); 615 /* 616 * Check if the process exceeds its cpu resource allocation. If 617 * it reaches the max, arrange to kill the process in ast(). 618 */ 619 if (p->p_cpulimit == RLIM_INFINITY) 620 return; 621 PROC_SLOCK(p); 622 FOREACH_THREAD_IN_PROC(p, td) { 623 ruxagg(p, td); 624 } 625 PROC_SUNLOCK(p); 626 if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { 627 lim_rlimit(p, RLIMIT_CPU, &rlim); 628 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { 629 killproc(p, "exceeded maximum CPU limit"); 630 } else { 631 if (p->p_cpulimit < rlim.rlim_max) 632 p->p_cpulimit += 5; 633 psignal(p, SIGXCPU); 634 } 635 } 636 if ((p->p_flag & P_WEXIT) == 0) 637 callout_reset(&p->p_limco, hz, lim_cb, p); 638 } 639 640 int 641 kern_setrlimit(td, which, limp) 642 struct thread *td; 643 u_int which; 644 struct rlimit *limp; 645 { 646 struct plimit *newlim, *oldlim; 647 struct proc *p; 648 register struct rlimit *alimp; 649 struct rlimit oldssiz; 650 int error; 651 652 if (which >= RLIM_NLIMITS) 653 return (EINVAL); 654 655 /* 656 * Preserve historical bugs by treating negative limits as unsigned. 657 */ 658 if (limp->rlim_cur < 0) 659 limp->rlim_cur = RLIM_INFINITY; 660 if (limp->rlim_max < 0) 661 limp->rlim_max = RLIM_INFINITY; 662 663 oldssiz.rlim_cur = 0; 664 p = td->td_proc; 665 newlim = lim_alloc(); 666 PROC_LOCK(p); 667 oldlim = p->p_limit; 668 alimp = &oldlim->pl_rlimit[which]; 669 if (limp->rlim_cur > alimp->rlim_max || 670 limp->rlim_max > alimp->rlim_max) 671 if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) { 672 PROC_UNLOCK(p); 673 lim_free(newlim); 674 return (error); 675 } 676 if (limp->rlim_cur > limp->rlim_max) 677 limp->rlim_cur = limp->rlim_max; 678 lim_copy(newlim, oldlim); 679 alimp = &newlim->pl_rlimit[which]; 680 681 switch (which) { 682 683 case RLIMIT_CPU: 684 if (limp->rlim_cur != RLIM_INFINITY && 685 p->p_cpulimit == RLIM_INFINITY) 686 callout_reset(&p->p_limco, hz, lim_cb, p); 687 p->p_cpulimit = limp->rlim_cur; 688 break; 689 case RLIMIT_DATA: 690 if (limp->rlim_cur > maxdsiz) 691 limp->rlim_cur = maxdsiz; 692 if (limp->rlim_max > maxdsiz) 693 limp->rlim_max = maxdsiz; 694 break; 695 696 case RLIMIT_STACK: 697 if (limp->rlim_cur > maxssiz) 698 limp->rlim_cur = maxssiz; 699 if (limp->rlim_max > maxssiz) 700 limp->rlim_max = maxssiz; 701 oldssiz = *alimp; 702 if (p->p_sysent->sv_fixlimit != NULL) 703 p->p_sysent->sv_fixlimit(&oldssiz, 704 RLIMIT_STACK); 705 break; 706 707 case RLIMIT_NOFILE: 708 if (limp->rlim_cur > maxfilesperproc) 709 limp->rlim_cur = maxfilesperproc; 710 if (limp->rlim_max > maxfilesperproc) 711 limp->rlim_max = maxfilesperproc; 712 break; 713 714 case RLIMIT_NPROC: 715 if (limp->rlim_cur > maxprocperuid) 716 limp->rlim_cur = maxprocperuid; 717 if (limp->rlim_max > maxprocperuid) 718 limp->rlim_max = maxprocperuid; 719 if (limp->rlim_cur < 1) 720 limp->rlim_cur = 1; 721 if (limp->rlim_max < 1) 722 limp->rlim_max = 1; 723 break; 724 } 725 if (p->p_sysent->sv_fixlimit != NULL) 726 p->p_sysent->sv_fixlimit(limp, which); 727 *alimp = *limp; 728 p->p_limit = newlim; 729 PROC_UNLOCK(p); 730 lim_free(oldlim); 731 732 if (which == RLIMIT_STACK) { 733 /* 734 * Stack is allocated to the max at exec time with only 735 * "rlim_cur" bytes accessible. If stack limit is going 736 * up make more accessible, if going down make inaccessible. 737 */ 738 if (limp->rlim_cur != oldssiz.rlim_cur) { 739 vm_offset_t addr; 740 vm_size_t size; 741 vm_prot_t prot; 742 743 if (limp->rlim_cur > oldssiz.rlim_cur) { 744 prot = p->p_sysent->sv_stackprot; 745 size = limp->rlim_cur - oldssiz.rlim_cur; 746 addr = p->p_sysent->sv_usrstack - 747 limp->rlim_cur; 748 } else { 749 prot = VM_PROT_NONE; 750 size = oldssiz.rlim_cur - limp->rlim_cur; 751 addr = p->p_sysent->sv_usrstack - 752 oldssiz.rlim_cur; 753 } 754 addr = trunc_page(addr); 755 size = round_page(size); 756 (void)vm_map_protect(&p->p_vmspace->vm_map, 757 addr, addr + size, prot, FALSE); 758 } 759 } 760 761 return (0); 762 } 763 764 #ifndef _SYS_SYSPROTO_H_ 765 struct __getrlimit_args { 766 u_int which; 767 struct rlimit *rlp; 768 }; 769 #endif 770 /* ARGSUSED */ 771 int 772 getrlimit(td, uap) 773 struct thread *td; 774 register struct __getrlimit_args *uap; 775 { 776 struct rlimit rlim; 777 struct proc *p; 778 int error; 779 780 if (uap->which >= RLIM_NLIMITS) 781 return (EINVAL); 782 p = td->td_proc; 783 PROC_LOCK(p); 784 lim_rlimit(p, uap->which, &rlim); 785 PROC_UNLOCK(p); 786 error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); 787 return (error); 788 } 789 790 /* 791 * Transform the running time and tick information for children of proc p 792 * into user and system time usage. 793 */ 794 void 795 calccru(p, up, sp) 796 struct proc *p; 797 struct timeval *up; 798 struct timeval *sp; 799 { 800 801 PROC_LOCK_ASSERT(p, MA_OWNED); 802 calcru1(p, &p->p_crux, up, sp); 803 } 804 805 /* 806 * Transform the running time and tick information in proc p into user 807 * and system time usage. If appropriate, include the current time slice 808 * on this CPU. 809 */ 810 void 811 calcru(struct proc *p, struct timeval *up, struct timeval *sp) 812 { 813 struct thread *td; 814 uint64_t u; 815 816 PROC_LOCK_ASSERT(p, MA_OWNED); 817 PROC_SLOCK_ASSERT(p, MA_OWNED); 818 /* 819 * If we are getting stats for the current process, then add in the 820 * stats that this thread has accumulated in its current time slice. 821 * We reset the thread and CPU state as if we had performed a context 822 * switch right here. 823 */ 824 td = curthread; 825 if (td->td_proc == p) { 826 u = cpu_ticks(); 827 p->p_rux.rux_runtime += u - PCPU_GET(switchtime); 828 PCPU_SET(switchtime, u); 829 } 830 /* Make sure the per-thread stats are current. */ 831 FOREACH_THREAD_IN_PROC(p, td) { 832 if (td->td_incruntime == 0) 833 continue; 834 ruxagg(p, td); 835 } 836 calcru1(p, &p->p_rux, up, sp); 837 } 838 839 static void 840 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, 841 struct timeval *sp) 842 { 843 /* {user, system, interrupt, total} {ticks, usec}: */ 844 uint64_t ut, uu, st, su, it, tt, tu; 845 846 ut = ruxp->rux_uticks; 847 st = ruxp->rux_sticks; 848 it = ruxp->rux_iticks; 849 tt = ut + st + it; 850 if (tt == 0) { 851 /* Avoid divide by zero */ 852 st = 1; 853 tt = 1; 854 } 855 tu = cputick2usec(ruxp->rux_runtime); 856 if ((int64_t)tu < 0) { 857 /* XXX: this should be an assert /phk */ 858 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", 859 (intmax_t)tu, p->p_pid, p->p_comm); 860 tu = ruxp->rux_tu; 861 } 862 863 if (tu >= ruxp->rux_tu) { 864 /* 865 * The normal case, time increased. 866 * Enforce monotonicity of bucketed numbers. 867 */ 868 uu = (tu * ut) / tt; 869 if (uu < ruxp->rux_uu) 870 uu = ruxp->rux_uu; 871 su = (tu * st) / tt; 872 if (su < ruxp->rux_su) 873 su = ruxp->rux_su; 874 } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { 875 /* 876 * When we calibrate the cputicker, it is not uncommon to 877 * see the presumably fixed frequency increase slightly over 878 * time as a result of thermal stabilization and NTP 879 * discipline (of the reference clock). We therefore ignore 880 * a bit of backwards slop because we expect to catch up 881 * shortly. We use a 3 microsecond limit to catch low 882 * counts and a 1% limit for high counts. 883 */ 884 uu = ruxp->rux_uu; 885 su = ruxp->rux_su; 886 tu = ruxp->rux_tu; 887 } else { /* tu < ruxp->rux_tu */ 888 /* 889 * What happened here was likely that a laptop, which ran at 890 * a reduced clock frequency at boot, kicked into high gear. 891 * The wisdom of spamming this message in that case is 892 * dubious, but it might also be indicative of something 893 * serious, so lets keep it and hope laptops can be made 894 * more truthful about their CPU speed via ACPI. 895 */ 896 printf("calcru: runtime went backwards from %ju usec " 897 "to %ju usec for pid %d (%s)\n", 898 (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, 899 p->p_pid, p->p_comm); 900 uu = (tu * ut) / tt; 901 su = (tu * st) / tt; 902 } 903 904 ruxp->rux_uu = uu; 905 ruxp->rux_su = su; 906 ruxp->rux_tu = tu; 907 908 up->tv_sec = uu / 1000000; 909 up->tv_usec = uu % 1000000; 910 sp->tv_sec = su / 1000000; 911 sp->tv_usec = su % 1000000; 912 } 913 914 #ifndef _SYS_SYSPROTO_H_ 915 struct getrusage_args { 916 int who; 917 struct rusage *rusage; 918 }; 919 #endif 920 int 921 getrusage(td, uap) 922 register struct thread *td; 923 register struct getrusage_args *uap; 924 { 925 struct rusage ru; 926 int error; 927 928 error = kern_getrusage(td, uap->who, &ru); 929 if (error == 0) 930 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 931 return (error); 932 } 933 934 int 935 kern_getrusage(struct thread *td, int who, struct rusage *rup) 936 { 937 struct proc *p; 938 int error; 939 940 error = 0; 941 p = td->td_proc; 942 PROC_LOCK(p); 943 switch (who) { 944 case RUSAGE_SELF: 945 rufetchcalc(p, rup, &rup->ru_utime, 946 &rup->ru_stime); 947 break; 948 949 case RUSAGE_CHILDREN: 950 *rup = p->p_stats->p_cru; 951 calccru(p, &rup->ru_utime, &rup->ru_stime); 952 break; 953 954 case RUSAGE_THREAD: 955 PROC_SLOCK(p); 956 ruxagg(p, td); 957 PROC_SUNLOCK(p); 958 thread_lock(td); 959 *rup = td->td_ru; 960 calcru1(p, &td->td_rux, &rup->ru_utime, &rup->ru_stime); 961 thread_unlock(td); 962 break; 963 964 default: 965 error = EINVAL; 966 } 967 PROC_UNLOCK(p); 968 return (error); 969 } 970 971 void 972 rucollect(struct rusage *ru, struct rusage *ru2) 973 { 974 long *ip, *ip2; 975 int i; 976 977 if (ru->ru_maxrss < ru2->ru_maxrss) 978 ru->ru_maxrss = ru2->ru_maxrss; 979 ip = &ru->ru_first; 980 ip2 = &ru2->ru_first; 981 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 982 *ip++ += *ip2++; 983 } 984 985 void 986 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, 987 struct rusage_ext *rux2) 988 { 989 990 rux->rux_runtime += rux2->rux_runtime; 991 rux->rux_uticks += rux2->rux_uticks; 992 rux->rux_sticks += rux2->rux_sticks; 993 rux->rux_iticks += rux2->rux_iticks; 994 rux->rux_uu += rux2->rux_uu; 995 rux->rux_su += rux2->rux_su; 996 rux->rux_tu += rux2->rux_tu; 997 rucollect(ru, ru2); 998 } 999 1000 /* 1001 * Aggregate tick counts into the proc's rusage_ext. 1002 */ 1003 static void 1004 ruxagg_locked(struct rusage_ext *rux, struct thread *td) 1005 { 1006 1007 THREAD_LOCK_ASSERT(td, MA_OWNED); 1008 PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED); 1009 rux->rux_runtime += td->td_incruntime; 1010 rux->rux_uticks += td->td_uticks; 1011 rux->rux_sticks += td->td_sticks; 1012 rux->rux_iticks += td->td_iticks; 1013 } 1014 1015 void 1016 ruxagg(struct proc *p, struct thread *td) 1017 { 1018 1019 thread_lock(td); 1020 ruxagg_locked(&p->p_rux, td); 1021 ruxagg_locked(&td->td_rux, td); 1022 td->td_incruntime = 0; 1023 td->td_uticks = 0; 1024 td->td_iticks = 0; 1025 td->td_sticks = 0; 1026 thread_unlock(td); 1027 } 1028 1029 /* 1030 * Update the rusage_ext structure and fetch a valid aggregate rusage 1031 * for proc p if storage for one is supplied. 1032 */ 1033 void 1034 rufetch(struct proc *p, struct rusage *ru) 1035 { 1036 struct thread *td; 1037 1038 PROC_SLOCK_ASSERT(p, MA_OWNED); 1039 1040 *ru = p->p_ru; 1041 if (p->p_numthreads > 0) { 1042 FOREACH_THREAD_IN_PROC(p, td) { 1043 ruxagg(p, td); 1044 rucollect(ru, &td->td_ru); 1045 } 1046 } 1047 } 1048 1049 /* 1050 * Atomically perform a rufetch and a calcru together. 1051 * Consumers, can safely assume the calcru is executed only once 1052 * rufetch is completed. 1053 */ 1054 void 1055 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, 1056 struct timeval *sp) 1057 { 1058 1059 PROC_SLOCK(p); 1060 rufetch(p, ru); 1061 calcru(p, up, sp); 1062 PROC_SUNLOCK(p); 1063 } 1064 1065 /* 1066 * Allocate a new resource limits structure and initialize its 1067 * reference count and mutex pointer. 1068 */ 1069 struct plimit * 1070 lim_alloc() 1071 { 1072 struct plimit *limp; 1073 1074 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); 1075 refcount_init(&limp->pl_refcnt, 1); 1076 return (limp); 1077 } 1078 1079 struct plimit * 1080 lim_hold(limp) 1081 struct plimit *limp; 1082 { 1083 1084 refcount_acquire(&limp->pl_refcnt); 1085 return (limp); 1086 } 1087 1088 void 1089 lim_fork(struct proc *p1, struct proc *p2) 1090 { 1091 p2->p_limit = lim_hold(p1->p_limit); 1092 callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); 1093 if (p1->p_cpulimit != RLIM_INFINITY) 1094 callout_reset(&p2->p_limco, hz, lim_cb, p2); 1095 } 1096 1097 void 1098 lim_free(limp) 1099 struct plimit *limp; 1100 { 1101 1102 KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); 1103 if (refcount_release(&limp->pl_refcnt)) 1104 free((void *)limp, M_PLIMIT); 1105 } 1106 1107 /* 1108 * Make a copy of the plimit structure. 1109 * We share these structures copy-on-write after fork. 1110 */ 1111 void 1112 lim_copy(dst, src) 1113 struct plimit *dst, *src; 1114 { 1115 1116 KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); 1117 bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); 1118 } 1119 1120 /* 1121 * Return the hard limit for a particular system resource. The 1122 * which parameter specifies the index into the rlimit array. 1123 */ 1124 rlim_t 1125 lim_max(struct proc *p, int which) 1126 { 1127 struct rlimit rl; 1128 1129 lim_rlimit(p, which, &rl); 1130 return (rl.rlim_max); 1131 } 1132 1133 /* 1134 * Return the current (soft) limit for a particular system resource. 1135 * The which parameter which specifies the index into the rlimit array 1136 */ 1137 rlim_t 1138 lim_cur(struct proc *p, int which) 1139 { 1140 struct rlimit rl; 1141 1142 lim_rlimit(p, which, &rl); 1143 return (rl.rlim_cur); 1144 } 1145 1146 /* 1147 * Return a copy of the entire rlimit structure for the system limit 1148 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 1149 */ 1150 void 1151 lim_rlimit(struct proc *p, int which, struct rlimit *rlp) 1152 { 1153 1154 PROC_LOCK_ASSERT(p, MA_OWNED); 1155 KASSERT(which >= 0 && which < RLIM_NLIMITS, 1156 ("request for invalid resource limit")); 1157 *rlp = p->p_limit->pl_rlimit[which]; 1158 if (p->p_sysent->sv_fixlimit != NULL) 1159 p->p_sysent->sv_fixlimit(rlp, which); 1160 } 1161 1162 void 1163 uihashinit() 1164 { 1165 1166 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 1167 rw_init(&uihashtbl_lock, "uidinfo hash"); 1168 } 1169 1170 /* 1171 * Look up a uidinfo struct for the parameter uid. 1172 * uihashtbl_lock must be locked. 1173 */ 1174 static struct uidinfo * 1175 uilookup(uid) 1176 uid_t uid; 1177 { 1178 struct uihashhead *uipp; 1179 struct uidinfo *uip; 1180 1181 rw_assert(&uihashtbl_lock, RA_LOCKED); 1182 uipp = UIHASH(uid); 1183 LIST_FOREACH(uip, uipp, ui_hash) 1184 if (uip->ui_uid == uid) 1185 break; 1186 1187 return (uip); 1188 } 1189 1190 /* 1191 * Find or allocate a struct uidinfo for a particular uid. 1192 * Increase refcount on uidinfo struct returned. 1193 * uifree() should be called on a struct uidinfo when released. 1194 */ 1195 struct uidinfo * 1196 uifind(uid) 1197 uid_t uid; 1198 { 1199 struct uidinfo *old_uip, *uip; 1200 1201 rw_rlock(&uihashtbl_lock); 1202 uip = uilookup(uid); 1203 if (uip == NULL) { 1204 rw_runlock(&uihashtbl_lock); 1205 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); 1206 rw_wlock(&uihashtbl_lock); 1207 /* 1208 * There's a chance someone created our uidinfo while we 1209 * were in malloc and not holding the lock, so we have to 1210 * make sure we don't insert a duplicate uidinfo. 1211 */ 1212 if ((old_uip = uilookup(uid)) != NULL) { 1213 /* Someone else beat us to it. */ 1214 free(uip, M_UIDINFO); 1215 uip = old_uip; 1216 } else { 1217 refcount_init(&uip->ui_ref, 0); 1218 uip->ui_uid = uid; 1219 mtx_init(&uip->ui_vmsize_mtx, "ui_vmsize", NULL, 1220 MTX_DEF); 1221 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 1222 } 1223 } 1224 uihold(uip); 1225 rw_unlock(&uihashtbl_lock); 1226 return (uip); 1227 } 1228 1229 /* 1230 * Place another refcount on a uidinfo struct. 1231 */ 1232 void 1233 uihold(uip) 1234 struct uidinfo *uip; 1235 { 1236 1237 refcount_acquire(&uip->ui_ref); 1238 } 1239 1240 /*- 1241 * Since uidinfo structs have a long lifetime, we use an 1242 * opportunistic refcounting scheme to avoid locking the lookup hash 1243 * for each release. 1244 * 1245 * If the refcount hits 0, we need to free the structure, 1246 * which means we need to lock the hash. 1247 * Optimal case: 1248 * After locking the struct and lowering the refcount, if we find 1249 * that we don't need to free, simply unlock and return. 1250 * Suboptimal case: 1251 * If refcount lowering results in need to free, bump the count 1252 * back up, lose the lock and acquire the locks in the proper 1253 * order to try again. 1254 */ 1255 void 1256 uifree(uip) 1257 struct uidinfo *uip; 1258 { 1259 int old; 1260 1261 /* Prepare for optimal case. */ 1262 old = uip->ui_ref; 1263 if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1)) 1264 return; 1265 1266 /* Prepare for suboptimal case. */ 1267 rw_wlock(&uihashtbl_lock); 1268 if (refcount_release(&uip->ui_ref)) { 1269 LIST_REMOVE(uip, ui_hash); 1270 rw_wunlock(&uihashtbl_lock); 1271 if (uip->ui_sbsize != 0) 1272 printf("freeing uidinfo: uid = %d, sbsize = %ld\n", 1273 uip->ui_uid, uip->ui_sbsize); 1274 if (uip->ui_proccnt != 0) 1275 printf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1276 uip->ui_uid, uip->ui_proccnt); 1277 if (uip->ui_vmsize != 0) 1278 printf("freeing uidinfo: uid = %d, swapuse = %lld\n", 1279 uip->ui_uid, (unsigned long long)uip->ui_vmsize); 1280 mtx_destroy(&uip->ui_vmsize_mtx); 1281 free(uip, M_UIDINFO); 1282 return; 1283 } 1284 /* 1285 * Someone added a reference between atomic_cmpset_int() and 1286 * rw_wlock(&uihashtbl_lock). 1287 */ 1288 rw_wunlock(&uihashtbl_lock); 1289 } 1290 1291 /* 1292 * Change the count associated with number of processes 1293 * a given user is using. When 'max' is 0, don't enforce a limit 1294 */ 1295 int 1296 chgproccnt(uip, diff, max) 1297 struct uidinfo *uip; 1298 int diff; 1299 rlim_t max; 1300 { 1301 1302 /* Don't allow them to exceed max, but allow subtraction. */ 1303 if (diff > 0 && max != 0) { 1304 if (atomic_fetchadd_long(&uip->ui_proccnt, (long)diff) + diff > max) { 1305 atomic_subtract_long(&uip->ui_proccnt, (long)diff); 1306 return (0); 1307 } 1308 } else { 1309 atomic_add_long(&uip->ui_proccnt, (long)diff); 1310 if (uip->ui_proccnt < 0) 1311 printf("negative proccnt for uid = %d\n", uip->ui_uid); 1312 } 1313 return (1); 1314 } 1315 1316 /* 1317 * Change the total socket buffer size a user has used. 1318 */ 1319 int 1320 chgsbsize(uip, hiwat, to, max) 1321 struct uidinfo *uip; 1322 u_int *hiwat; 1323 u_int to; 1324 rlim_t max; 1325 { 1326 int diff; 1327 1328 diff = to - *hiwat; 1329 if (diff > 0) { 1330 if (atomic_fetchadd_long(&uip->ui_sbsize, (long)diff) + diff > max) { 1331 atomic_subtract_long(&uip->ui_sbsize, (long)diff); 1332 return (0); 1333 } 1334 } else { 1335 atomic_add_long(&uip->ui_sbsize, (long)diff); 1336 if (uip->ui_sbsize < 0) 1337 printf("negative sbsize for uid = %d\n", uip->ui_uid); 1338 } 1339 *hiwat = to; 1340 return (1); 1341 } 1342 1343 /* 1344 * Change the count associated with number of pseudo-terminals 1345 * a given user is using. When 'max' is 0, don't enforce a limit 1346 */ 1347 int 1348 chgptscnt(uip, diff, max) 1349 struct uidinfo *uip; 1350 int diff; 1351 rlim_t max; 1352 { 1353 1354 /* Don't allow them to exceed max, but allow subtraction. */ 1355 if (diff > 0 && max != 0) { 1356 if (atomic_fetchadd_long(&uip->ui_ptscnt, (long)diff) + diff > max) { 1357 atomic_subtract_long(&uip->ui_ptscnt, (long)diff); 1358 return (0); 1359 } 1360 } else { 1361 atomic_add_long(&uip->ui_ptscnt, (long)diff); 1362 if (uip->ui_ptscnt < 0) 1363 printf("negative ptscnt for uid = %d\n", uip->ui_uid); 1364 } 1365 return (1); 1366 } 1367