1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/atomic.h> 29 #include <sys/cmn_err.h> 30 #include <sys/id_space.h> 31 #include <sys/kmem.h> 32 #include <sys/log.h> 33 #include <sys/modctl.h> 34 #include <sys/modhash.h> 35 #include <sys/mutex.h> 36 #include <sys/proc.h> 37 #include <sys/procset.h> 38 #include <sys/project.h> 39 #include <sys/resource.h> 40 #include <sys/rctl.h> 41 #include <sys/siginfo.h> 42 #include <sys/strlog.h> 43 #include <sys/systm.h> 44 #include <sys/task.h> 45 #include <sys/types.h> 46 #include <sys/policy.h> 47 #include <sys/zone.h> 48 49 /* 50 * Resource controls (rctls) 51 * 52 * The rctl subsystem provides a mechanism for kernel components to 53 * register their individual resource controls with the system as a whole, 54 * such that those controls can subscribe to specific actions while being 55 * associated with the various process-model entities provided by the kernel: 56 * the process, the task, the project, and the zone. (In principle, only 57 * minor modifications would be required to connect the resource control 58 * functionality to non-process-model entities associated with the system.) 59 * 60 * Subsystems register their rctls via rctl_register(). Subsystems 61 * also wishing to provide additional limits on a given rctl can modify 62 * them once they have the rctl handle. Each subsystem should store the 63 * handle to their rctl for direct access. 64 * 65 * A primary dictionary, rctl_dict, contains a hash of id to the default 66 * control definition for each controlled resource-entity pair on the system. 67 * A secondary dictionary, rctl_dict_by_name, contains a hash of name to 68 * resource control handles. The resource control handles are distributed by 69 * the rctl_ids ID space. The handles are private and not to be 70 * advertised to userland; all userland interactions are via the rctl 71 * names. 72 * 73 * Entities inherit their rctls from their predecessor. Since projects have 74 * no ancestor, they inherit their rctls from the rctl dict for project 75 * rctls. It is expected that project controls will be set to their 76 * appropriate values shortly after project creation, presumably from a 77 * policy source such as the project database. 78 * 79 * Data structures 80 * The rctl_set_t attached to each of the process model entities is a simple 81 * hash table keyed on the rctl handle assigned at registration. The entries 82 * in the hash table are rctl_t's, whose relationship with the active control 83 * values on that resource and with the global state of the resource we 84 * illustrate below: 85 * 86 * rctl_dict[key] --> rctl_dict_entry 87 * ^ 88 * | 89 * +--+---+ 90 * rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL 91 * +--+---+ ^ 92 * | | 93 * +------- cursor ------+ 94 * 95 * That is, the rctl contains a back pointer to the global resource control 96 * state for this resource, which is also available in the rctl_dict hash 97 * table mentioned earlier. The rctl contains two pointers to resource 98 * control values: one, values, indicates the entire sequence of control 99 * values; the other, cursor, indicates the currently active control 100 * value--the next value to be enforced. The value list itself is an open, 101 * doubly-linked list, the last non-NULL member of which is the system value 102 * for that resource (being the theoretical/conventional maximum allowable 103 * value for the resource on this OS instance). 104 * 105 * Ops Vector 106 * Subsystems publishing rctls need not provide instances of all of the 107 * functions specified by the ops vector. In particular, if general 108 * rctl_*() entry points are not being called, certain functions can be 109 * omitted. These align as follows: 110 * 111 * rctl_set() 112 * You may wish to provide a set callback if locking circumstances prevent 113 * it or if the performance cost of requesting the enforced value from the 114 * resource control is prohibitively expensive. For instance, the currently 115 * enforced file size limit is stored on the process in the p_fsz_ctl to 116 * maintain read()/write() performance. 117 * 118 * rctl_test() 119 * You must provide a test callback if you are using the rctl_test() 120 * interface. An action callback is optional. 121 * 122 * rctl_action() 123 * You may wish to provide an action callback. 124 * 125 * Registration 126 * New resource controls can be added to a running instance by loaded modules 127 * via registration. (The current implementation does not support unloadable 128 * modules; this functionality can be added if needed, via an 129 * activation/deactivation interface involving the manipulation of the 130 * ops vector for the resource control(s) needing to support unloading.) 131 * 132 * Control value ordering 133 * Because the rctl_val chain on each rctl must be navigable in a 134 * deterministic way, we have to define an ordering on the rctl_val_t's. The 135 * defined order is (flags & [maximal], value, flags & [deny-action], 136 * privilege). 137 * 138 * Locking 139 * rctl_dict_lock must be acquired prior to rctl_lists_lock. Since 140 * rctl_dict_lock or rctl_lists_lock can be called at the enforcement point 141 * of any subsystem, holding subsystem locks, it is at all times inappropriate 142 * to call kmem_alloc(., KM_SLEEP) while holding either of these locks. 143 * Traversing any of the various resource control entity lists requires 144 * holding rctl_lists_lock. 145 * 146 * Each individual resource control set associated with an entity must have 147 * its rcs_lock held for the duration of any operations that would add 148 * resource controls or control values to the set. 149 * 150 * The locking subsequence of interest is: p_lock, rctl_dict_lock, 151 * rctl_lists_lock, entity->rcs_lock. 152 */ 153 154 id_t max_rctl_hndl = 32768; 155 int rctl_dict_size = 64; 156 int rctl_set_size = 8; 157 kmutex_t rctl_dict_lock; 158 mod_hash_t *rctl_dict; 159 mod_hash_t *rctl_dict_by_name; 160 id_space_t *rctl_ids; 161 kmem_cache_t *rctl_cache; /* kmem cache for rctl structures */ 162 kmem_cache_t *rctl_val_cache; /* kmem cache for rctl values */ 163 164 kmutex_t rctl_lists_lock; 165 rctl_dict_entry_t *rctl_lists[RC_MAX_ENTITY + 1]; 166 167 /* 168 * Default resource control operations and ops vector 169 * To be used if the particular rcontrol has no specific actions defined, or 170 * if the subsystem providing the control is quiescing (in preparation for 171 * unloading, presumably.) 172 * 173 * Resource controls with callbacks should fill the unused operations with the 174 * appropriate default impotent callback. 175 */ 176 /*ARGSUSED*/ 177 void 178 rcop_no_action(struct rctl *r, struct proc *p, rctl_entity_p_t *e) 179 { 180 } 181 182 /*ARGSUSED*/ 183 rctl_qty_t 184 rcop_no_usage(struct rctl *r, struct proc *p) 185 { 186 return (0); 187 } 188 189 /*ARGSUSED*/ 190 int 191 rcop_no_set(struct rctl *r, struct proc *p, rctl_entity_p_t *e, rctl_qty_t l) 192 { 193 return (0); 194 } 195 196 /*ARGSUSED*/ 197 int 198 rcop_no_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 199 struct rctl_val *rv, rctl_qty_t i, uint_t f) 200 { 201 return (0); 202 } 203 204 rctl_ops_t rctl_default_ops = { 205 rcop_no_action, 206 rcop_no_usage, 207 rcop_no_set, 208 rcop_no_test 209 }; 210 211 /* 212 * Default "absolute" resource control operation and ops vector 213 * Useful if there is no usage associated with the 214 * resource control. 215 */ 216 /*ARGSUSED*/ 217 int 218 rcop_absolute_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 219 struct rctl_val *rv, rctl_qty_t i, uint_t f) 220 { 221 return (i > rv->rcv_value); 222 } 223 224 rctl_ops_t rctl_absolute_ops = { 225 rcop_no_action, 226 rcop_no_usage, 227 rcop_no_set, 228 rcop_absolute_test 229 }; 230 231 /*ARGSUSED*/ 232 static uint_t 233 rctl_dict_hash_by_id(void *hash_data, mod_hash_key_t key) 234 { 235 return ((uint_t)(uintptr_t)key % rctl_dict_size); 236 } 237 238 static int 239 rctl_dict_id_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 240 { 241 uint_t u1 = (uint_t)(uintptr_t)key1; 242 uint_t u2 = (uint_t)(uintptr_t)key2; 243 244 if (u1 > u2) 245 return (1); 246 247 if (u1 == u2) 248 return (0); 249 250 return (-1); 251 } 252 253 static void 254 rctl_dict_val_dtor(mod_hash_val_t val) 255 { 256 rctl_dict_entry_t *kr = (rctl_dict_entry_t *)val; 257 258 kmem_free(kr, sizeof (rctl_dict_entry_t)); 259 } 260 261 /* 262 * size_t rctl_build_name_buf() 263 * 264 * Overview 265 * rctl_build_name_buf() walks all active resource controls in the dictionary, 266 * building a buffer of continguous NUL-terminated strings. 267 * 268 * Return values 269 * The size of the buffer is returned, the passed pointer's contents are 270 * modified to that of the location of the buffer. 271 * 272 * Caller's context 273 * Caller must be in a context suitable for KM_SLEEP allocations. 274 */ 275 size_t 276 rctl_build_name_buf(char **rbufp) 277 { 278 size_t req_size, cpy_size; 279 char *rbufloc; 280 int i; 281 282 rctl_rebuild_name_buf: 283 req_size = cpy_size = 0; 284 285 /* 286 * Calculate needed buffer length. 287 */ 288 mutex_enter(&rctl_lists_lock); 289 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 290 rctl_dict_entry_t *rde; 291 292 for (rde = rctl_lists[i]; 293 rde != NULL; 294 rde = rde->rcd_next) 295 req_size += strlen(rde->rcd_name) + 1; 296 } 297 mutex_exit(&rctl_lists_lock); 298 299 rbufloc = *rbufp = kmem_alloc(req_size, KM_SLEEP); 300 301 /* 302 * Copy rctl names into our buffer. If the copy length exceeds the 303 * allocate length (due to registration changes), stop copying, free the 304 * buffer, and start again. 305 */ 306 mutex_enter(&rctl_lists_lock); 307 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 308 rctl_dict_entry_t *rde; 309 310 for (rde = rctl_lists[i]; 311 rde != NULL; 312 rde = rde->rcd_next) { 313 size_t length = strlen(rde->rcd_name) + 1; 314 315 cpy_size += length; 316 317 if (cpy_size > req_size) { 318 kmem_free(*rbufp, req_size); 319 mutex_exit(&rctl_lists_lock); 320 goto rctl_rebuild_name_buf; 321 } 322 323 bcopy(rde->rcd_name, rbufloc, length); 324 rbufloc += length; 325 } 326 } 327 mutex_exit(&rctl_lists_lock); 328 329 return (req_size); 330 } 331 332 /* 333 * rctl_dict_entry_t *rctl_dict_lookup(const char *) 334 * 335 * Overview 336 * rctl_dict_lookup() returns the resource control dictionary entry for the 337 * named resource control. 338 * 339 * Return values 340 * A pointer to the appropriate resource control dictionary entry, or NULL if 341 * no such named entry exists. 342 * 343 * Caller's context 344 * Caller must not be holding rctl_dict_lock. 345 */ 346 rctl_dict_entry_t * 347 rctl_dict_lookup(const char *name) 348 { 349 rctl_dict_entry_t *rde; 350 351 mutex_enter(&rctl_dict_lock); 352 353 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 354 (mod_hash_val_t *)&rde) == MH_ERR_NOTFOUND) { 355 mutex_exit(&rctl_dict_lock); 356 return (NULL); 357 } 358 359 mutex_exit(&rctl_dict_lock); 360 361 return (rde); 362 } 363 364 /* 365 * rctl_hndl_t rctl_hndl_lookup(const char *) 366 * 367 * Overview 368 * rctl_hndl_lookup() returns the resource control id (the "handle") for the 369 * named resource control. 370 * 371 * Return values 372 * The appropriate id, or -1 if no such named entry exists. 373 * 374 * Caller's context 375 * Caller must not be holding rctl_dict_lock. 376 */ 377 rctl_hndl_t 378 rctl_hndl_lookup(const char *name) 379 { 380 rctl_dict_entry_t *rde; 381 382 if ((rde = rctl_dict_lookup(name)) == NULL) 383 return (-1); 384 385 return (rde->rcd_id); 386 } 387 388 /* 389 * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t) 390 * 391 * Overview 392 * rctl_dict_lookup_hndl() completes the public lookup functions, by returning 393 * the resource control dictionary entry matching a given resource control id. 394 * 395 * Return values 396 * A pointer to the matching resource control dictionary entry, or NULL if the 397 * id does not match any existing entries. 398 * 399 * Caller's context 400 * Caller must not be holding rctl_lists_lock. 401 */ 402 rctl_dict_entry_t * 403 rctl_dict_lookup_hndl(rctl_hndl_t hndl) 404 { 405 uint_t i; 406 407 mutex_enter(&rctl_lists_lock); 408 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 409 rctl_dict_entry_t *rde; 410 411 for (rde = rctl_lists[i]; 412 rde != NULL; 413 rde = rde->rcd_next) 414 if (rde->rcd_id == hndl) { 415 mutex_exit(&rctl_lists_lock); 416 return (rde); 417 } 418 } 419 mutex_exit(&rctl_lists_lock); 420 421 return (NULL); 422 } 423 424 /* 425 * void rctl_add_default_limit(const char *name, rctl_qty_t value, 426 * rctl_priv_t privilege, uint_t action) 427 * 428 * Overview 429 * Create a default limit with specified value, privilege, and action. 430 * 431 * Return value 432 * No value returned. 433 */ 434 void 435 rctl_add_default_limit(const char *name, rctl_qty_t value, 436 rctl_priv_t privilege, uint_t action) 437 { 438 rctl_val_t *dval; 439 rctl_dict_entry_t *rde; 440 441 dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 442 bzero(dval, sizeof (rctl_val_t)); 443 dval->rcv_value = value; 444 dval->rcv_privilege = privilege; 445 dval->rcv_flagaction = action; 446 dval->rcv_action_recip_pid = -1; 447 448 rde = rctl_dict_lookup(name); 449 (void) rctl_val_list_insert(&rde->rcd_default_value, dval); 450 } 451 452 /* 453 * void rctl_add_legacy_limit(const char *name, const char *mname, 454 * const char *lname, rctl_qty_t dflt) 455 * 456 * Overview 457 * Create a default privileged limit, using the value obtained from 458 * /etc/system if it exists and is greater than the specified default 459 * value. Exists primarily for System V IPC. 460 * 461 * Return value 462 * No value returned. 463 */ 464 void 465 rctl_add_legacy_limit(const char *name, const char *mname, const char *lname, 466 rctl_qty_t dflt, rctl_qty_t max) 467 { 468 rctl_qty_t qty; 469 470 if (!mod_sysvar(mname, lname, &qty) || (qty < dflt)) 471 qty = dflt; 472 473 if (qty > max) 474 qty = max; 475 476 rctl_add_default_limit(name, qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY); 477 } 478 479 static rctl_set_t * 480 rctl_entity_obtain_rset(rctl_dict_entry_t *rcd, struct proc *p) 481 { 482 rctl_set_t *rset = NULL; 483 484 if (rcd == NULL) 485 return (NULL); 486 487 switch (rcd->rcd_entity) { 488 case RCENTITY_PROCESS: 489 rset = p->p_rctls; 490 break; 491 case RCENTITY_TASK: 492 ASSERT(MUTEX_HELD(&p->p_lock)); 493 if (p->p_task != NULL) 494 rset = p->p_task->tk_rctls; 495 break; 496 case RCENTITY_PROJECT: 497 ASSERT(MUTEX_HELD(&p->p_lock)); 498 if (p->p_task != NULL && 499 p->p_task->tk_proj != NULL) 500 rset = p->p_task->tk_proj->kpj_rctls; 501 break; 502 case RCENTITY_ZONE: 503 ASSERT(MUTEX_HELD(&p->p_lock)); 504 if (p->p_zone != NULL) 505 rset = p->p_zone->zone_rctls; 506 break; 507 default: 508 panic("unknown rctl entity type %d seen", rcd->rcd_entity); 509 break; 510 } 511 512 return (rset); 513 } 514 515 static void 516 rctl_entity_obtain_entity_p(rctl_entity_t entity, struct proc *p, 517 rctl_entity_p_t *e) 518 { 519 e->rcep_p.proc = NULL; 520 e->rcep_t = entity; 521 522 switch (entity) { 523 case RCENTITY_PROCESS: 524 e->rcep_p.proc = p; 525 break; 526 case RCENTITY_TASK: 527 ASSERT(MUTEX_HELD(&p->p_lock)); 528 if (p->p_task != NULL) 529 e->rcep_p.task = p->p_task; 530 break; 531 case RCENTITY_PROJECT: 532 ASSERT(MUTEX_HELD(&p->p_lock)); 533 if (p->p_task != NULL && 534 p->p_task->tk_proj != NULL) 535 e->rcep_p.proj = p->p_task->tk_proj; 536 break; 537 case RCENTITY_ZONE: 538 ASSERT(MUTEX_HELD(&p->p_lock)); 539 if (p->p_zone != NULL) 540 e->rcep_p.zone = p->p_zone; 541 break; 542 default: 543 panic("unknown rctl entity type %d seen", entity); 544 break; 545 } 546 } 547 548 static void 549 rctl_gp_alloc(rctl_alloc_gp_t *rcgp) 550 { 551 uint_t i; 552 553 if (rcgp->rcag_nctls > 0) { 554 rctl_t *prev = kmem_cache_alloc(rctl_cache, KM_SLEEP); 555 rctl_t *rctl = prev; 556 557 rcgp->rcag_ctls = prev; 558 559 for (i = 1; i < rcgp->rcag_nctls; i++) { 560 rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 561 prev->rc_next = rctl; 562 prev = rctl; 563 } 564 565 rctl->rc_next = NULL; 566 } 567 568 if (rcgp->rcag_nvals > 0) { 569 rctl_val_t *prev = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 570 rctl_val_t *rval = prev; 571 572 rcgp->rcag_vals = prev; 573 574 for (i = 1; i < rcgp->rcag_nvals; i++) { 575 rval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 576 prev->rcv_next = rval; 577 prev = rval; 578 } 579 580 rval->rcv_next = NULL; 581 } 582 583 } 584 585 static rctl_val_t * 586 rctl_gp_detach_val(rctl_alloc_gp_t *rcgp) 587 { 588 rctl_val_t *rval = rcgp->rcag_vals; 589 590 ASSERT(rcgp->rcag_nvals > 0); 591 rcgp->rcag_nvals--; 592 rcgp->rcag_vals = rval->rcv_next; 593 594 rval->rcv_next = NULL; 595 596 return (rval); 597 } 598 599 static rctl_t * 600 rctl_gp_detach_ctl(rctl_alloc_gp_t *rcgp) 601 { 602 rctl_t *rctl = rcgp->rcag_ctls; 603 604 ASSERT(rcgp->rcag_nctls > 0); 605 rcgp->rcag_nctls--; 606 rcgp->rcag_ctls = rctl->rc_next; 607 608 rctl->rc_next = NULL; 609 610 return (rctl); 611 612 } 613 614 static void 615 rctl_gp_free(rctl_alloc_gp_t *rcgp) 616 { 617 rctl_val_t *rval = rcgp->rcag_vals; 618 rctl_t *rctl = rcgp->rcag_ctls; 619 620 while (rval != NULL) { 621 rctl_val_t *next = rval->rcv_next; 622 623 kmem_cache_free(rctl_val_cache, rval); 624 rval = next; 625 } 626 627 while (rctl != NULL) { 628 rctl_t *next = rctl->rc_next; 629 630 kmem_cache_free(rctl_cache, rctl); 631 rctl = next; 632 } 633 } 634 635 /* 636 * void rctl_prealloc_destroy(rctl_alloc_gp_t *) 637 * 638 * Overview 639 * Release all unused memory allocated via one of the "prealloc" functions: 640 * rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc. 641 * 642 * Return values 643 * None. 644 * 645 * Caller's context 646 * No restrictions on context. 647 */ 648 void 649 rctl_prealloc_destroy(rctl_alloc_gp_t *gp) 650 { 651 rctl_gp_free(gp); 652 kmem_free(gp, sizeof (rctl_alloc_gp_t)); 653 } 654 655 /* 656 * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int) 657 * 658 * Overview 659 * This function defines an ordering to rctl_val_t's in order to allow 660 * for correct placement in value lists. When the imprecise flag is set, 661 * the action recipient is ignored. This is to facilitate insert, 662 * delete, and replace operations by rctlsys. 663 * 664 * Return values 665 * 0 if the val_t's are are considered identical 666 * -1 if a is ordered lower than b 667 * 1 if a is lowered higher than b 668 * 669 * Caller's context 670 * No restrictions on context. 671 */ 672 int 673 rctl_val_cmp(rctl_val_t *a, rctl_val_t *b, int imprecise) 674 { 675 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) < 676 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 677 return (-1); 678 679 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) > 680 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 681 return (1); 682 683 if (a->rcv_value < b->rcv_value) 684 return (-1); 685 686 if (a->rcv_value > b->rcv_value) 687 return (1); 688 689 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) < 690 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 691 return (-1); 692 693 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) > 694 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 695 return (1); 696 697 if (a->rcv_privilege < b->rcv_privilege) 698 return (-1); 699 700 if (a->rcv_privilege > b->rcv_privilege) 701 return (1); 702 703 if (imprecise) 704 return (0); 705 706 if (a->rcv_action_recip_pid < b->rcv_action_recip_pid) 707 return (-1); 708 709 if (a->rcv_action_recip_pid > b->rcv_action_recip_pid) 710 return (1); 711 712 return (0); 713 } 714 715 static rctl_val_t * 716 rctl_val_list_find(rctl_val_t **head, rctl_val_t *cval) 717 { 718 rctl_val_t *rval = *head; 719 720 while (rval != NULL) { 721 if (rctl_val_cmp(cval, rval, 0) == 0) 722 return (rval); 723 724 rval = rval->rcv_next; 725 } 726 727 return (NULL); 728 729 } 730 731 /* 732 * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *) 733 * 734 * Overview 735 * This function inserts the rctl_val_t into the value list provided. 736 * The insert is always successful unless if the value is a duplicate 737 * of one already in the list. 738 * 739 * Return values 740 * 1 if the value was a duplicate of an existing value in the list. 741 * 0 if the insert was successful. 742 */ 743 int 744 rctl_val_list_insert(rctl_val_t **root, rctl_val_t *rval) 745 { 746 rctl_val_t *prev; 747 int equiv; 748 749 rval->rcv_next = NULL; 750 rval->rcv_prev = NULL; 751 752 if (*root == NULL) { 753 *root = rval; 754 return (0); 755 } 756 757 equiv = rctl_val_cmp(rval, *root, 0); 758 759 if (equiv == 0) 760 return (1); 761 762 if (equiv < 0) { 763 rval->rcv_next = *root; 764 rval->rcv_next->rcv_prev = rval; 765 *root = rval; 766 767 return (0); 768 } 769 770 prev = *root; 771 while (prev->rcv_next != NULL && 772 (equiv = rctl_val_cmp(rval, prev->rcv_next, 0)) > 0) { 773 prev = prev->rcv_next; 774 } 775 776 if (equiv == 0) 777 return (1); 778 779 rval->rcv_next = prev->rcv_next; 780 if (rval->rcv_next != NULL) 781 rval->rcv_next->rcv_prev = rval; 782 prev->rcv_next = rval; 783 rval->rcv_prev = prev; 784 785 return (0); 786 } 787 788 static int 789 rctl_val_list_delete(rctl_val_t **root, rctl_val_t *rval) 790 { 791 rctl_val_t *prev; 792 793 if (*root == NULL) 794 return (-1); 795 796 prev = *root; 797 if (rctl_val_cmp(rval, prev, 0) == 0) { 798 *root = prev->rcv_next; 799 (*root)->rcv_prev = NULL; 800 801 kmem_cache_free(rctl_val_cache, prev); 802 803 return (0); 804 } 805 806 while (prev->rcv_next != NULL && 807 rctl_val_cmp(rval, prev->rcv_next, 0) != 0) { 808 prev = prev->rcv_next; 809 } 810 811 if (prev->rcv_next == NULL) { 812 /* 813 * If we navigate the entire list and cannot find a match, then 814 * return failure. 815 */ 816 return (-1); 817 } 818 819 prev = prev->rcv_next; 820 prev->rcv_prev->rcv_next = prev->rcv_next; 821 if (prev->rcv_next != NULL) 822 prev->rcv_next->rcv_prev = prev->rcv_prev; 823 824 kmem_cache_free(rctl_val_cache, prev); 825 826 return (0); 827 } 828 829 static rctl_val_t * 830 rctl_val_list_dup(rctl_val_t *rval, rctl_alloc_gp_t *ragp, struct proc *oldp, 831 struct proc *newp) 832 { 833 rctl_val_t *head = NULL; 834 835 for (; rval != NULL; rval = rval->rcv_next) { 836 rctl_val_t *dval = rctl_gp_detach_val(ragp); 837 838 bcopy(rval, dval, sizeof (rctl_val_t)); 839 dval->rcv_prev = dval->rcv_next = NULL; 840 841 if (oldp == NULL || 842 rval->rcv_action_recipient == NULL || 843 rval->rcv_action_recipient == oldp) { 844 if (rval->rcv_privilege == RCPRIV_BASIC) { 845 dval->rcv_action_recipient = newp; 846 dval->rcv_action_recip_pid = newp->p_pid; 847 } else { 848 dval->rcv_action_recipient = NULL; 849 dval->rcv_action_recip_pid = -1; 850 } 851 852 (void) rctl_val_list_insert(&head, dval); 853 } else { 854 kmem_cache_free(rctl_val_cache, dval); 855 } 856 } 857 858 return (head); 859 } 860 861 static void 862 rctl_val_list_reset(rctl_val_t *rval) 863 { 864 for (; rval != NULL; rval = rval->rcv_next) 865 rval->rcv_firing_time = 0; 866 } 867 868 static uint_t 869 rctl_val_list_count(rctl_val_t *rval) 870 { 871 uint_t n = 0; 872 873 for (; rval != NULL; rval = rval->rcv_next) 874 n++; 875 876 return (n); 877 } 878 879 880 static void 881 rctl_val_list_free(rctl_val_t *rval) 882 { 883 while (rval != NULL) { 884 rctl_val_t *next = rval->rcv_next; 885 886 kmem_cache_free(rctl_val_cache, rval); 887 888 rval = next; 889 } 890 } 891 892 /* 893 * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *) 894 * 895 * Overview 896 * In cases where the operating system supports more than one process 897 * addressing model, the operating system capabilities will exceed those of 898 * one or more of these models. Processes in a less capable model must have 899 * their resources accurately controlled, without diluting those of their 900 * descendants reached via exec(). rctl_model_maximum() returns the governing 901 * value for the specified process with respect to a resource control, such 902 * that the value can used for the RCTLOP_SET callback or compatability 903 * support. 904 * 905 * Return values 906 * The maximum value for the given process for the specified resource control. 907 * 908 * Caller's context 909 * No restrictions on context. 910 */ 911 rctl_qty_t 912 rctl_model_maximum(rctl_dict_entry_t *rde, struct proc *p) 913 { 914 if (p->p_model == DATAMODEL_NATIVE) 915 return (rde->rcd_max_native); 916 917 return (rde->rcd_max_ilp32); 918 } 919 920 /* 921 * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t) 922 * 923 * Overview 924 * Convenience function wrapping the rctl_model_maximum() functionality. 925 * 926 * Return values 927 * The lesser of the process's maximum value and the given value for the 928 * specified resource control. 929 * 930 * Caller's context 931 * No restrictions on context. 932 */ 933 rctl_qty_t 934 rctl_model_value(rctl_dict_entry_t *rde, struct proc *p, rctl_qty_t value) 935 { 936 rctl_qty_t max = rctl_model_maximum(rde, p); 937 938 return (value < max ? value : max); 939 } 940 941 static void 942 rctl_set_insert(rctl_set_t *set, rctl_hndl_t hndl, rctl_t *rctl) 943 { 944 uint_t index = hndl % rctl_set_size; 945 rctl_t *next_ctl, *prev_ctl; 946 947 ASSERT(MUTEX_HELD(&set->rcs_lock)); 948 949 rctl->rc_next = NULL; 950 951 if (set->rcs_ctls[index] == NULL) { 952 set->rcs_ctls[index] = rctl; 953 return; 954 } 955 956 if (hndl < set->rcs_ctls[index]->rc_id) { 957 rctl->rc_next = set->rcs_ctls[index]; 958 set->rcs_ctls[index] = rctl; 959 960 return; 961 } 962 963 for (next_ctl = set->rcs_ctls[index]->rc_next, 964 prev_ctl = set->rcs_ctls[index]; 965 next_ctl != NULL; 966 prev_ctl = next_ctl, 967 next_ctl = next_ctl->rc_next) { 968 if (next_ctl->rc_id > hndl) { 969 rctl->rc_next = next_ctl; 970 prev_ctl->rc_next = rctl; 971 972 return; 973 } 974 } 975 976 rctl->rc_next = next_ctl; 977 prev_ctl->rc_next = rctl; 978 } 979 980 /* 981 * rctl_set_t *rctl_set_create() 982 * 983 * Overview 984 * Create an empty resource control set, suitable for attaching to a 985 * controlled entity. 986 * 987 * Return values 988 * A pointer to the newly created set. 989 * 990 * Caller's context 991 * Safe for KM_SLEEP allocations. 992 */ 993 rctl_set_t * 994 rctl_set_create() 995 { 996 rctl_set_t *rset = kmem_zalloc(sizeof (rctl_set_t), KM_SLEEP); 997 998 mutex_init(&rset->rcs_lock, NULL, MUTEX_DEFAULT, NULL); 999 rset->rcs_ctls = kmem_zalloc(rctl_set_size * sizeof (rctl_t *), 1000 KM_SLEEP); 1001 rset->rcs_entity = -1; 1002 1003 return (rset); 1004 } 1005 1006 /* 1007 * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t) 1008 * 1009 * Overview 1010 * rctl_set_init_prealloc() examines the globally defined resource controls 1011 * and their default values and returns a resource control allocation group 1012 * populated with sufficient controls and values to form a representative 1013 * resource control set for the specified entity. 1014 * 1015 * Return values 1016 * A pointer to the newly created allocation group. 1017 * 1018 * Caller's context 1019 * Caller must be in a context suitable for KM_SLEEP allocations. 1020 */ 1021 rctl_alloc_gp_t * 1022 rctl_set_init_prealloc(rctl_entity_t entity) 1023 { 1024 rctl_dict_entry_t *rde; 1025 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1026 1027 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1028 1029 if (rctl_lists[entity] == NULL) 1030 return (ragp); 1031 1032 mutex_enter(&rctl_lists_lock); 1033 1034 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1035 ragp->rcag_nctls++; 1036 ragp->rcag_nvals += rctl_val_list_count(rde->rcd_default_value); 1037 } 1038 1039 mutex_exit(&rctl_lists_lock); 1040 1041 rctl_gp_alloc(ragp); 1042 1043 return (ragp); 1044 } 1045 1046 /* 1047 * rctl_set_t *rctl_set_init(rctl_entity_t) 1048 * 1049 * Overview 1050 * rctl_set_create() creates a resource control set, initialized with the 1051 * system infinite values on all registered controls, for attachment to a 1052 * system entity requiring resource controls, such as a process or a task. 1053 * 1054 * Return values 1055 * A pointer to the newly filled set. 1056 * 1057 * Caller's context 1058 * Caller must be holding p_lock on entry so that RCTLOP_SET() functions 1059 * may modify task and project members based on the proc structure 1060 * they are passed. 1061 */ 1062 rctl_set_t * 1063 rctl_set_init(rctl_entity_t entity, struct proc *p, rctl_entity_p_t *e, 1064 rctl_set_t *rset, rctl_alloc_gp_t *ragp) 1065 { 1066 rctl_dict_entry_t *rde; 1067 1068 ASSERT(MUTEX_HELD(&p->p_lock)); 1069 ASSERT(e); 1070 rset->rcs_entity = entity; 1071 1072 if (rctl_lists[entity] == NULL) 1073 return (rset); 1074 1075 mutex_enter(&rctl_lists_lock); 1076 mutex_enter(&rset->rcs_lock); 1077 1078 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1079 rctl_t *rctl = rctl_gp_detach_ctl(ragp); 1080 1081 rctl->rc_dict_entry = rde; 1082 rctl->rc_id = rde->rcd_id; 1083 1084 rctl->rc_values = rctl_val_list_dup(rde->rcd_default_value, 1085 ragp, NULL, p); 1086 rctl->rc_cursor = rctl->rc_values; 1087 1088 ASSERT(rctl->rc_cursor != NULL); 1089 1090 rctl_set_insert(rset, rde->rcd_id, rctl); 1091 1092 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1093 rctl->rc_cursor->rcv_value)); 1094 } 1095 1096 mutex_exit(&rset->rcs_lock); 1097 mutex_exit(&rctl_lists_lock); 1098 1099 return (rset); 1100 } 1101 1102 static rctl_t * 1103 rctl_dup(rctl_t *rctl, rctl_alloc_gp_t *ragp, struct proc *oldp, 1104 struct proc *newp) 1105 { 1106 rctl_t *dup = rctl_gp_detach_ctl(ragp); 1107 rctl_val_t *dval; 1108 1109 dup->rc_id = rctl->rc_id; 1110 dup->rc_dict_entry = rctl->rc_dict_entry; 1111 dup->rc_next = NULL; 1112 dup->rc_cursor = NULL; 1113 dup->rc_values = rctl_val_list_dup(rctl->rc_values, ragp, oldp, newp); 1114 1115 for (dval = dup->rc_values; 1116 dval != NULL; dval = dval->rcv_next) { 1117 if (rctl_val_cmp(rctl->rc_cursor, dval, 0) >= 0) { 1118 dup->rc_cursor = dval; 1119 break; 1120 } 1121 } 1122 1123 if (dup->rc_cursor == NULL) 1124 dup->rc_cursor = dup->rc_values; 1125 1126 return (dup); 1127 } 1128 1129 static void 1130 rctl_set_fill_alloc_gp(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1131 { 1132 uint_t i; 1133 1134 bzero(ragp, sizeof (rctl_alloc_gp_t)); 1135 1136 for (i = 0; i < rctl_set_size; i++) { 1137 rctl_t *r = set->rcs_ctls[i]; 1138 1139 while (r != NULL) { 1140 ragp->rcag_nctls++; 1141 1142 ragp->rcag_nvals += rctl_val_list_count(r->rc_values); 1143 1144 r = r->rc_next; 1145 } 1146 } 1147 } 1148 1149 /* 1150 * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *) 1151 * 1152 * Overview 1153 * Given a resource control set, allocate a sufficiently large allocation 1154 * group to contain a duplicate of the set. 1155 * 1156 * Return value 1157 * A pointer to the newly created allocation group. 1158 * 1159 * Caller's context 1160 * Safe for KM_SLEEP allocations. 1161 */ 1162 rctl_alloc_gp_t * 1163 rctl_set_dup_prealloc(rctl_set_t *set) 1164 { 1165 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1166 1167 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1168 1169 mutex_enter(&set->rcs_lock); 1170 rctl_set_fill_alloc_gp(set, ragp); 1171 mutex_exit(&set->rcs_lock); 1172 1173 rctl_gp_alloc(ragp); 1174 1175 return (ragp); 1176 } 1177 1178 /* 1179 * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *) 1180 * 1181 * Overview 1182 * Verify that the allocation group provided is large enough to allow a 1183 * duplicate of the given resource control set to be constructed from its 1184 * contents. 1185 * 1186 * Return values 1187 * 1 if the allocation group is sufficiently large, 0 otherwise. 1188 * 1189 * Caller's context 1190 * rcs_lock must be held prior to entry. 1191 */ 1192 int 1193 rctl_set_dup_ready(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1194 { 1195 rctl_alloc_gp_t curr_gp; 1196 1197 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1198 1199 rctl_set_fill_alloc_gp(set, &curr_gp); 1200 1201 if (curr_gp.rcag_nctls <= ragp->rcag_nctls && 1202 curr_gp.rcag_nvals <= ragp->rcag_nvals) 1203 return (1); 1204 1205 return (0); 1206 } 1207 1208 /* 1209 * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *, 1210 * rctl_set_t *, rctl_alloc_gp_t *, int) 1211 * 1212 * Overview 1213 * Make a duplicate of the resource control set. The proc pointers are those 1214 * of the owning process and of the process associated with the entity 1215 * receiving the duplicate. 1216 * 1217 * Duplication is a 3 stage process. Stage 1 is memory allocation for 1218 * the duplicate set, which is taken care of by rctl_set_dup_prealloc(). 1219 * Stage 2 consists of copying all rctls and values from the old set into 1220 * the new. Stage 3 completes the duplication by performing the appropriate 1221 * callbacks for each rctl in the new set. 1222 * 1223 * Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and 1224 * RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only 1225 * be supplied if the newp proc structure reflects the new task and 1226 * project linkage. 1227 * 1228 * Return value 1229 * A pointer to the duplicate set. 1230 * 1231 * Caller's context 1232 * The rcs_lock of the set to be duplicated must be held prior to entry. 1233 */ 1234 rctl_set_t * 1235 rctl_set_dup(rctl_set_t *set, struct proc *oldp, struct proc *newp, 1236 rctl_entity_p_t *e, rctl_set_t *dup, rctl_alloc_gp_t *ragp, int flag) 1237 { 1238 uint_t i; 1239 rctl_set_t *iter; 1240 1241 ASSERT((flag & RCD_DUP) || (flag & RCD_CALLBACK)); 1242 ASSERT(e); 1243 /* 1244 * When copying the old set, iterate over that. Otherwise, when 1245 * only callbacks have been requested, iterate over the dup set. 1246 */ 1247 if (flag & RCD_DUP) { 1248 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1249 iter = set; 1250 dup->rcs_entity = set->rcs_entity; 1251 } else { 1252 iter = dup; 1253 } 1254 1255 mutex_enter(&dup->rcs_lock); 1256 1257 for (i = 0; i < rctl_set_size; i++) { 1258 rctl_t *r = iter->rcs_ctls[i]; 1259 rctl_t *d; 1260 1261 while (r != NULL) { 1262 if (flag & RCD_DUP) { 1263 d = rctl_dup(r, ragp, oldp, newp); 1264 rctl_set_insert(dup, r->rc_id, d); 1265 } else { 1266 d = r; 1267 } 1268 1269 if (flag & RCD_CALLBACK) 1270 RCTLOP_SET(d, newp, e, 1271 rctl_model_value(d->rc_dict_entry, newp, 1272 d->rc_cursor->rcv_value)); 1273 1274 r = r->rc_next; 1275 } 1276 } 1277 1278 mutex_exit(&dup->rcs_lock); 1279 1280 return (dup); 1281 } 1282 1283 /* 1284 * void rctl_set_free(rctl_set_t *) 1285 * 1286 * Overview 1287 * Delete resource control set and all attached values. 1288 * 1289 * Return values 1290 * No value returned. 1291 * 1292 * Caller's context 1293 * No restrictions on context. 1294 */ 1295 void 1296 rctl_set_free(rctl_set_t *set) 1297 { 1298 uint_t i; 1299 1300 mutex_enter(&set->rcs_lock); 1301 for (i = 0; i < rctl_set_size; i++) { 1302 rctl_t *r = set->rcs_ctls[i]; 1303 1304 while (r != NULL) { 1305 rctl_val_t *v = r->rc_values; 1306 rctl_t *n = r->rc_next; 1307 1308 kmem_cache_free(rctl_cache, r); 1309 1310 rctl_val_list_free(v); 1311 1312 r = n; 1313 } 1314 } 1315 mutex_exit(&set->rcs_lock); 1316 1317 kmem_free(set->rcs_ctls, sizeof (rctl_t *) * rctl_set_size); 1318 kmem_free(set, sizeof (rctl_set_t)); 1319 } 1320 1321 /* 1322 * void rctl_set_reset(rctl_set_t *) 1323 * 1324 * Overview 1325 * Resets all rctls within the set such that the lowest value becomes active. 1326 * 1327 * Return values 1328 * No value returned. 1329 * 1330 * Caller's context 1331 * No restrictions on context. 1332 */ 1333 void 1334 rctl_set_reset(rctl_set_t *set, struct proc *p, rctl_entity_p_t *e) 1335 { 1336 uint_t i; 1337 1338 ASSERT(e); 1339 1340 mutex_enter(&set->rcs_lock); 1341 for (i = 0; i < rctl_set_size; i++) { 1342 rctl_t *r = set->rcs_ctls[i]; 1343 1344 while (r != NULL) { 1345 r->rc_cursor = r->rc_values; 1346 rctl_val_list_reset(r->rc_cursor); 1347 RCTLOP_SET(r, p, e, rctl_model_value(r->rc_dict_entry, 1348 p, r->rc_cursor->rcv_value)); 1349 1350 ASSERT(r->rc_cursor != NULL); 1351 1352 r = r->rc_next; 1353 } 1354 } 1355 1356 mutex_exit(&set->rcs_lock); 1357 } 1358 1359 /* 1360 * void rctl_set_tearoff(rctl_set *, struct proc *) 1361 * 1362 * Overview 1363 * Tear off any resource control values on this set with an action recipient 1364 * equal to the specified process (as they are becoming invalid with the 1365 * process's departure from this set as an observer). 1366 * 1367 * Return values 1368 * No value returned. 1369 * 1370 * Caller's context 1371 * No restrictions on context 1372 */ 1373 void 1374 rctl_set_tearoff(rctl_set_t *set, struct proc *p) 1375 { 1376 uint_t i; 1377 1378 mutex_enter(&set->rcs_lock); 1379 for (i = 0; i < rctl_set_size; i++) { 1380 rctl_t *r = set->rcs_ctls[i]; 1381 1382 while (r != NULL) { 1383 rctl_val_t *rval; 1384 1385 tearoff_rewalk_list: 1386 rval = r->rc_values; 1387 1388 while (rval != NULL) { 1389 if (rval->rcv_privilege == RCPRIV_BASIC && 1390 rval->rcv_action_recipient == p) { 1391 if (r->rc_cursor == rval) 1392 r->rc_cursor = rval->rcv_next; 1393 1394 (void) rctl_val_list_delete( 1395 &r->rc_values, rval); 1396 1397 goto tearoff_rewalk_list; 1398 } 1399 1400 rval = rval->rcv_next; 1401 } 1402 1403 ASSERT(r->rc_cursor != NULL); 1404 1405 r = r->rc_next; 1406 } 1407 } 1408 1409 mutex_exit(&set->rcs_lock); 1410 } 1411 1412 static int 1413 rctl_set_find(rctl_set_t *set, rctl_hndl_t hndl, rctl_t **rctl) 1414 { 1415 uint_t index = hndl % rctl_set_size; 1416 rctl_t *curr_ctl; 1417 1418 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1419 1420 for (curr_ctl = set->rcs_ctls[index]; curr_ctl != NULL; 1421 curr_ctl = curr_ctl->rc_next) { 1422 if (curr_ctl->rc_id == hndl) { 1423 *rctl = curr_ctl; 1424 1425 return (0); 1426 } 1427 } 1428 1429 return (-1); 1430 } 1431 1432 /* 1433 * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *) 1434 * 1435 * Overview 1436 * Given a process, get the next enforced value on the rctl of the specified 1437 * handle. 1438 * 1439 * Return value 1440 * The enforced value. 1441 * 1442 * Caller's context 1443 * For controls on process collectives, p->p_lock must be held across the 1444 * operation. 1445 */ 1446 /*ARGSUSED*/ 1447 rctl_qty_t 1448 rctl_enforced_value(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p) 1449 { 1450 rctl_t *rctl; 1451 rlim64_t ret; 1452 1453 mutex_enter(&rset->rcs_lock); 1454 1455 if (rctl_set_find(rset, hndl, &rctl) == -1) 1456 panic("unknown resource control handle %d requested", hndl); 1457 else 1458 ret = rctl_model_value(rctl->rc_dict_entry, p, 1459 rctl->rc_cursor->rcv_value); 1460 1461 mutex_exit(&rset->rcs_lock); 1462 1463 return (ret); 1464 } 1465 1466 /* 1467 * int rctl_global_get(const char *, rctl_dict_entry_t *) 1468 * 1469 * Overview 1470 * Copy a sanitized version of the global rctl for a given resource control 1471 * name. (By sanitization, we mean that the unsafe data pointers have been 1472 * zeroed.) 1473 * 1474 * Return value 1475 * -1 if name not defined, 0 otherwise. 1476 * 1477 * Caller's context 1478 * No restrictions on context. rctl_dict_lock must not be held. 1479 */ 1480 int 1481 rctl_global_get(const char *name, rctl_dict_entry_t *drde) 1482 { 1483 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1484 1485 if (rde == NULL) 1486 return (-1); 1487 1488 bcopy(rde, drde, sizeof (rctl_dict_entry_t)); 1489 1490 drde->rcd_next = NULL; 1491 drde->rcd_ops = NULL; 1492 1493 return (0); 1494 } 1495 1496 /* 1497 * int rctl_global_set(const char *, rctl_dict_entry_t *) 1498 * 1499 * Overview 1500 * Transfer the settable fields of the named rctl to the global rctl matching 1501 * the given resource control name. 1502 * 1503 * Return value 1504 * -1 if name not defined, 0 otherwise. 1505 * 1506 * Caller's context 1507 * No restrictions on context. rctl_dict_lock must not be held. 1508 */ 1509 int 1510 rctl_global_set(const char *name, rctl_dict_entry_t *drde) 1511 { 1512 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1513 1514 if (rde == NULL) 1515 return (-1); 1516 1517 rde->rcd_flagaction = drde->rcd_flagaction; 1518 rde->rcd_syslog_level = drde->rcd_syslog_level; 1519 rde->rcd_strlog_flags = drde->rcd_strlog_flags; 1520 1521 return (0); 1522 } 1523 1524 static int 1525 rctl_local_op(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1526 int (*cbop)(rctl_hndl_t, struct proc *p, rctl_entity_p_t *e, rctl_t *, 1527 rctl_val_t *, rctl_val_t *), struct proc *p) 1528 { 1529 rctl_t *rctl; 1530 rctl_set_t *rset; 1531 rctl_entity_p_t e; 1532 int ret = 0; 1533 rctl_dict_entry_t *rde = rctl_dict_lookup_hndl(hndl); 1534 1535 local_op_retry: 1536 1537 ASSERT(MUTEX_HELD(&p->p_lock)); 1538 1539 rset = rctl_entity_obtain_rset(rde, p); 1540 1541 if (rset == NULL) { 1542 return (-1); 1543 } 1544 rctl_entity_obtain_entity_p(rset->rcs_entity, p, &e); 1545 1546 mutex_enter(&rset->rcs_lock); 1547 1548 /* using rctl's hndl, get rctl from local set */ 1549 if (rctl_set_find(rset, hndl, &rctl) == -1) { 1550 mutex_exit(&rset->rcs_lock); 1551 return (-1); 1552 } 1553 1554 ret = cbop(hndl, p, &e, rctl, oval, nval); 1555 1556 mutex_exit(&rset->rcs_lock); 1557 return (ret); 1558 } 1559 1560 /*ARGSUSED*/ 1561 static int 1562 rctl_local_get_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1563 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1564 { 1565 if (oval == NULL) { 1566 /* 1567 * RCTL_FIRST 1568 */ 1569 bcopy(rctl->rc_values, nval, sizeof (rctl_val_t)); 1570 } else { 1571 /* 1572 * RCTL_NEXT 1573 */ 1574 rctl_val_t *tval = rctl_val_list_find(&rctl->rc_values, oval); 1575 1576 if (tval == NULL) 1577 return (ESRCH); 1578 else if (tval->rcv_next == NULL) 1579 return (ENOENT); 1580 else 1581 bcopy(tval->rcv_next, nval, sizeof (rctl_val_t)); 1582 } 1583 1584 return (0); 1585 } 1586 1587 /* 1588 * int rctl_local_get(rctl_hndl_t, rctl_val_t *) 1589 * 1590 * Overview 1591 * Get the rctl value for the given flags. 1592 * 1593 * Return values 1594 * 0 for successful get, errno otherwise. 1595 */ 1596 int 1597 rctl_local_get(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1598 struct proc *p) 1599 { 1600 return (rctl_local_op(hndl, oval, nval, rctl_local_get_cb, p)); 1601 } 1602 1603 /*ARGSUSED*/ 1604 static int 1605 rctl_local_delete_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1606 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1607 { 1608 if ((oval = rctl_val_list_find(&rctl->rc_values, nval)) == NULL) 1609 return (ESRCH); 1610 1611 if (rctl->rc_cursor == oval) { 1612 rctl->rc_cursor = oval->rcv_next; 1613 rctl_val_list_reset(rctl->rc_cursor); 1614 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1615 rctl->rc_cursor->rcv_value)); 1616 1617 ASSERT(rctl->rc_cursor != NULL); 1618 } 1619 1620 (void) rctl_val_list_delete(&rctl->rc_values, oval); 1621 1622 return (0); 1623 } 1624 1625 /* 1626 * int rctl_local_delete(rctl_hndl_t, rctl_val_t *) 1627 * 1628 * Overview 1629 * Delete the rctl value for the given flags. 1630 * 1631 * Return values 1632 * 0 for successful delete, errno otherwise. 1633 */ 1634 int 1635 rctl_local_delete(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1636 { 1637 return (rctl_local_op(hndl, NULL, val, rctl_local_delete_cb, p)); 1638 } 1639 1640 /* 1641 * rctl_local_insert_cb() 1642 * 1643 * Overview 1644 * Insert a new value into the rctl's val list. If an error occurs, 1645 * the val list must be left in the same state as when the function 1646 * was entered. 1647 * 1648 * Return Values 1649 * 0 for successful insert, EINVAL if the value is duplicated in the 1650 * existing list. 1651 */ 1652 /*ARGSUSED*/ 1653 static int 1654 rctl_local_insert_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1655 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1656 { 1657 /* 1658 * Before inserting, confirm there are no duplicates of this value 1659 * and flag level. If there is a duplicate, flag an error and do 1660 * nothing. 1661 */ 1662 if (rctl_val_list_insert(&rctl->rc_values, nval) != 0) 1663 return (EINVAL); 1664 1665 if (rctl_val_cmp(nval, rctl->rc_cursor, 0) < 0) { 1666 rctl->rc_cursor = nval; 1667 rctl_val_list_reset(rctl->rc_cursor); 1668 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1669 rctl->rc_cursor->rcv_value)); 1670 1671 ASSERT(rctl->rc_cursor != NULL); 1672 } 1673 1674 return (0); 1675 } 1676 1677 /* 1678 * int rctl_local_insert(rctl_hndl_t, rctl_val_t *) 1679 * 1680 * Overview 1681 * Insert the rctl value into the appropriate rctl set for the calling 1682 * process, given the handle. 1683 */ 1684 int 1685 rctl_local_insert(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1686 { 1687 return (rctl_local_op(hndl, NULL, val, rctl_local_insert_cb, p)); 1688 } 1689 1690 static int 1691 rctl_local_replace_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1692 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1693 { 1694 int ret; 1695 1696 /* 1697 * rctl_local_insert_cb() does the job of flagging an error 1698 * for any duplicate values. So, call rctl_local_insert_cb() 1699 * for the new value first, then do deletion of the old value. 1700 * Since this is a callback function to rctl_local_op, we can 1701 * count on rcs_lock being held at this point. This guarantees 1702 * that there is at no point a visible list which contains both 1703 * new and old values. 1704 */ 1705 if (ret = rctl_local_insert_cb(hndl, p, e, rctl, NULL, nval)) 1706 return (ret); 1707 1708 return (rctl_local_delete_cb(hndl, p, e, rctl, NULL, oval)); 1709 } 1710 1711 /* 1712 * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *) 1713 * 1714 * Overview 1715 * Replace the rctl value with a new one. 1716 * 1717 * Return values 1718 * 0 for successful replace, errno otherwise. 1719 */ 1720 int 1721 rctl_local_replace(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1722 struct proc *p) 1723 { 1724 return (rctl_local_op(hndl, oval, nval, rctl_local_replace_cb, p)); 1725 } 1726 1727 /* 1728 * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *) 1729 * 1730 * Overview 1731 * To support rlimit compatibility, we need a function which takes a 64-bit 1732 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 1733 * This operation is only intended for legacy rlimits. 1734 */ 1735 int 1736 rctl_rlimit_get(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64) 1737 { 1738 rctl_t *rctl; 1739 rctl_val_t *rval; 1740 rctl_set_t *rset = p->p_rctls; 1741 int soft_limit_seen = 0; 1742 int test_for_deny = 1; 1743 1744 mutex_enter(&rset->rcs_lock); 1745 if (rctl_set_find(rset, rc, &rctl) == -1) { 1746 mutex_exit(&rset->rcs_lock); 1747 return (-1); 1748 } 1749 1750 rval = rctl->rc_values; 1751 1752 if (rctl->rc_dict_entry->rcd_flagaction & (RCTL_GLOBAL_DENY_NEVER | 1753 RCTL_GLOBAL_DENY_ALWAYS)) 1754 test_for_deny = 0; 1755 1756 /* 1757 * 1. Find the first control value with the RCTL_LOCAL_DENY bit set. 1758 */ 1759 while (rval != NULL && rval->rcv_privilege != RCPRIV_SYSTEM) { 1760 if (test_for_deny && 1761 (rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0) { 1762 rval = rval->rcv_next; 1763 continue; 1764 } 1765 1766 /* 1767 * 2. If this is an RCPRIV_BASIC value, then we've found the 1768 * effective soft limit and should set rlim_cur. We should then 1769 * continue looking for another control value with the DENY bit 1770 * set. 1771 */ 1772 if (rval->rcv_privilege == RCPRIV_BASIC) { 1773 if (soft_limit_seen) { 1774 rval = rval->rcv_next; 1775 continue; 1776 } 1777 1778 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 1779 rval->rcv_value < rctl_model_maximum( 1780 rctl->rc_dict_entry, p)) 1781 rlp64->rlim_cur = rval->rcv_value; 1782 else 1783 rlp64->rlim_cur = RLIM64_INFINITY; 1784 soft_limit_seen = 1; 1785 1786 rval = rval->rcv_next; 1787 continue; 1788 } 1789 1790 /* 1791 * 3. This is an RCPRIV_PRIVILEGED value. If we haven't found 1792 * a soft limit candidate, then we've found the effective hard 1793 * and soft limits and should set both If we had found a soft 1794 * limit, then this is only the hard limit and we need only set 1795 * rlim_max. 1796 */ 1797 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 1798 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, 1799 p)) 1800 rlp64->rlim_max = rval->rcv_value; 1801 else 1802 rlp64->rlim_max = RLIM64_INFINITY; 1803 if (!soft_limit_seen) 1804 rlp64->rlim_cur = rlp64->rlim_max; 1805 1806 mutex_exit(&rset->rcs_lock); 1807 return (0); 1808 } 1809 1810 if (rval == NULL) { 1811 /* 1812 * This control sequence is corrupt, as it is not terminated by 1813 * a system privileged control value. 1814 */ 1815 mutex_exit(&rset->rcs_lock); 1816 return (-1); 1817 } 1818 1819 /* 1820 * 4. If we run into a RCPRIV_SYSTEM value, then the hard limit (and 1821 * the soft, if we haven't a soft candidate) should be the value of the 1822 * system control value. 1823 */ 1824 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 1825 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, p)) 1826 rlp64->rlim_max = rval->rcv_value; 1827 else 1828 rlp64->rlim_max = RLIM64_INFINITY; 1829 1830 if (!soft_limit_seen) 1831 rlp64->rlim_cur = rlp64->rlim_max; 1832 1833 mutex_exit(&rset->rcs_lock); 1834 return (0); 1835 } 1836 1837 /* 1838 * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t) 1839 * 1840 * Overview 1841 * Before making a series of calls to rctl_rlimit_set(), we must have a 1842 * preallocated batch of resource control values, as rctl_rlimit_set() can 1843 * potentially consume two resource control values per call. 1844 * 1845 * Return values 1846 * A populated resource control allocation group with 2n resource control 1847 * values. 1848 * 1849 * Caller's context 1850 * Must be safe for KM_SLEEP allocations. 1851 */ 1852 rctl_alloc_gp_t * 1853 rctl_rlimit_set_prealloc(uint_t n) 1854 { 1855 rctl_alloc_gp_t *gp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1856 1857 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1858 1859 gp->rcag_nvals = 2 * n; 1860 1861 rctl_gp_alloc(gp); 1862 1863 return (gp); 1864 } 1865 1866 /* 1867 * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int, 1868 * int) 1869 * 1870 * Overview 1871 * To support rlimit compatibility, we need a function which takes a 64-bit 1872 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 1873 * This operation is only intended for legacy rlimits. 1874 * 1875 * The implementation of rctl_rlimit_set() is a bit clever, as it tries to 1876 * minimize the number of values placed on the value sequence in various 1877 * cases. Furthermore, we don't allow multiple identical privilege-action 1878 * values on the same sequence. (That is, we don't want a sequence like 1879 * "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel 1880 * memory.) So we want to delete any values with the same privilege value and 1881 * action. 1882 * 1883 * Return values 1884 * 0 for successful set, errno otherwise. Errno will be either EINVAL 1885 * or EPERM, in keeping with defined errnos for ulimit() and setrlimit() 1886 * system calls. 1887 */ 1888 /*ARGSUSED*/ 1889 int 1890 rctl_rlimit_set(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64, 1891 rctl_alloc_gp_t *ragp, int flagaction, int signal, const cred_t *cr) 1892 { 1893 rctl_t *rctl; 1894 rctl_val_t *rval, *rval_priv, *rval_basic; 1895 rctl_set_t *rset = p->p_rctls; 1896 rctl_qty_t max; 1897 rctl_entity_p_t e; 1898 struct rlimit64 cur_rl; 1899 1900 e.rcep_t = RCENTITY_PROCESS; 1901 e.rcep_p.proc = p; 1902 1903 if (rlp64->rlim_cur > rlp64->rlim_max) 1904 return (EINVAL); 1905 1906 if (rctl_rlimit_get(rc, p, &cur_rl) == -1) 1907 return (EINVAL); 1908 1909 /* 1910 * If we are not privileged, we can only lower the hard limit. 1911 */ 1912 if ((rlp64->rlim_max > cur_rl.rlim_max) && 1913 cur_rl.rlim_max != RLIM64_INFINITY && 1914 secpolicy_resource(cr) != 0) 1915 return (EPERM); 1916 1917 mutex_enter(&rset->rcs_lock); 1918 1919 if (rctl_set_find(rset, rc, &rctl) == -1) { 1920 mutex_exit(&rset->rcs_lock); 1921 return (EINVAL); 1922 } 1923 1924 rval_priv = rctl_gp_detach_val(ragp); 1925 1926 rval = rctl->rc_values; 1927 1928 while (rval != NULL) { 1929 rctl_val_t *next = rval->rcv_next; 1930 1931 if (rval->rcv_privilege == RCPRIV_SYSTEM) 1932 break; 1933 1934 if ((rval->rcv_privilege == RCPRIV_BASIC) || 1935 (rval->rcv_flagaction & ~RCTL_LOCAL_ACTION_MASK) == 1936 (flagaction & ~RCTL_LOCAL_ACTION_MASK)) { 1937 if (rctl->rc_cursor == rval) { 1938 rctl->rc_cursor = rval->rcv_next; 1939 rctl_val_list_reset(rctl->rc_cursor); 1940 RCTLOP_SET(rctl, p, &e, rctl_model_value( 1941 rctl->rc_dict_entry, p, 1942 rctl->rc_cursor->rcv_value)); 1943 } 1944 (void) rctl_val_list_delete(&rctl->rc_values, rval); 1945 } 1946 1947 rval = next; 1948 } 1949 1950 rval_priv->rcv_privilege = RCPRIV_PRIVILEGED; 1951 rval_priv->rcv_flagaction = flagaction; 1952 if (rlp64->rlim_max == RLIM64_INFINITY) { 1953 rval_priv->rcv_flagaction |= RCTL_LOCAL_MAXIMAL; 1954 max = rctl->rc_dict_entry->rcd_max_native; 1955 } else { 1956 max = rlp64->rlim_max; 1957 } 1958 rval_priv->rcv_value = max; 1959 rval_priv->rcv_action_signal = signal; 1960 rval_priv->rcv_action_recipient = NULL; 1961 rval_priv->rcv_action_recip_pid = -1; 1962 rval_priv->rcv_firing_time = 0; 1963 rval_priv->rcv_prev = rval_priv->rcv_next = NULL; 1964 1965 (void) rctl_val_list_insert(&rctl->rc_values, rval_priv); 1966 rctl->rc_cursor = rval_priv; 1967 rctl_val_list_reset(rctl->rc_cursor); 1968 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 1969 rctl->rc_cursor->rcv_value)); 1970 1971 if (rlp64->rlim_cur != RLIM64_INFINITY && rlp64->rlim_cur < max) { 1972 rval_basic = rctl_gp_detach_val(ragp); 1973 1974 rval_basic->rcv_privilege = RCPRIV_BASIC; 1975 rval_basic->rcv_value = rlp64->rlim_cur; 1976 rval_basic->rcv_flagaction = flagaction; 1977 rval_basic->rcv_action_signal = signal; 1978 rval_basic->rcv_action_recipient = p; 1979 rval_basic->rcv_action_recip_pid = p->p_pid; 1980 rval_basic->rcv_firing_time = 0; 1981 rval_basic->rcv_prev = rval_basic->rcv_next = NULL; 1982 1983 (void) rctl_val_list_insert(&rctl->rc_values, rval_basic); 1984 rctl->rc_cursor = rval_basic; 1985 rctl_val_list_reset(rctl->rc_cursor); 1986 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 1987 rctl->rc_cursor->rcv_value)); 1988 } 1989 1990 ASSERT(rctl->rc_cursor != NULL); 1991 1992 mutex_exit(&rset->rcs_lock); 1993 return (0); 1994 } 1995 1996 1997 /* 1998 * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t, 1999 * rlim64_t, rctl_ops_t *) 2000 * 2001 * Overview 2002 * rctl_register() performs a look-up in the dictionary of rctls 2003 * active on the system; if a rctl of that name is absent, an entry is 2004 * made into the dictionary. The rctl is returned with its reference 2005 * count incremented by one. If the rctl name already exists, we panic. 2006 * (Were the resource control system to support dynamic loading and unloading, 2007 * which it is structured for, duplicate registration should lead to load 2008 * failure instead of panicking.) 2009 * 2010 * Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be 2011 * defined. This limit contains the highest possible value for this quantity 2012 * on the system. Furthermore, the registered control must provide infinite 2013 * values for all applicable address space models supported by the operating 2014 * system. Attempts to set resource control values beyond the system limit 2015 * will fail. 2016 * 2017 * Return values 2018 * The rctl's ID. 2019 * 2020 * Caller's context 2021 * Caller must be in a context suitable for KM_SLEEP allocations. 2022 */ 2023 rctl_hndl_t 2024 rctl_register( 2025 const char *name, 2026 rctl_entity_t entity, 2027 int global_flags, 2028 rlim64_t max_native, 2029 rlim64_t max_ilp32, 2030 rctl_ops_t *ops) 2031 { 2032 rctl_t *rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 2033 rctl_val_t *rctl_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 2034 rctl_dict_entry_t *rctl_de = kmem_zalloc(sizeof (rctl_dict_entry_t), 2035 KM_SLEEP); 2036 rctl_t *old_rctl; 2037 rctl_hndl_t rhndl; 2038 int localflags; 2039 2040 ASSERT(ops != NULL); 2041 2042 bzero(rctl, sizeof (rctl_t)); 2043 bzero(rctl_val, sizeof (rctl_val_t)); 2044 2045 if (global_flags & RCTL_GLOBAL_DENY_NEVER) 2046 localflags = RCTL_LOCAL_MAXIMAL; 2047 else 2048 localflags = RCTL_LOCAL_MAXIMAL | RCTL_LOCAL_DENY; 2049 2050 rctl_val->rcv_privilege = RCPRIV_SYSTEM; 2051 rctl_val->rcv_value = max_native; 2052 rctl_val->rcv_flagaction = localflags; 2053 rctl_val->rcv_action_signal = 0; 2054 rctl_val->rcv_action_recipient = NULL; 2055 rctl_val->rcv_action_recip_pid = -1; 2056 rctl_val->rcv_firing_time = 0; 2057 rctl_val->rcv_next = NULL; 2058 rctl_val->rcv_prev = NULL; 2059 2060 rctl_de->rcd_name = (char *)name; 2061 rctl_de->rcd_default_value = rctl_val; 2062 rctl_de->rcd_max_native = max_native; 2063 rctl_de->rcd_max_ilp32 = max_ilp32; 2064 rctl_de->rcd_entity = entity; 2065 rctl_de->rcd_ops = ops; 2066 rctl_de->rcd_flagaction = global_flags; 2067 2068 rctl->rc_dict_entry = rctl_de; 2069 rctl->rc_values = rctl_val; 2070 2071 /* 2072 * 1. Take global lock, validate nonexistence of name, get ID. 2073 */ 2074 mutex_enter(&rctl_dict_lock); 2075 2076 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 2077 (mod_hash_val_t *)&rhndl) != MH_ERR_NOTFOUND) 2078 panic("duplicate registration of rctl %s", name); 2079 2080 rhndl = rctl_de->rcd_id = rctl->rc_id = 2081 (rctl_hndl_t)id_alloc(rctl_ids); 2082 2083 /* 2084 * 2. Insert name-entry pair in rctl_dict_by_name. 2085 */ 2086 if (mod_hash_insert(rctl_dict_by_name, (mod_hash_key_t)name, 2087 (mod_hash_val_t)rctl_de)) 2088 panic("unable to insert rctl dict entry for %s (%u)", name, 2089 (uint_t)rctl->rc_id); 2090 2091 /* 2092 * 3. Insert ID-rctl_t * pair in rctl_dict. 2093 */ 2094 if (mod_hash_find(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2095 (mod_hash_val_t *)&old_rctl) != MH_ERR_NOTFOUND) 2096 panic("duplicate rctl ID %u registered", rctl->rc_id); 2097 2098 if (mod_hash_insert(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2099 (mod_hash_val_t)rctl)) 2100 panic("unable to insert rctl %s/%u (%p)", name, 2101 (uint_t)rctl->rc_id, rctl); 2102 2103 /* 2104 * 3a. Insert rctl_dict_entry_t * in appropriate entity list. 2105 */ 2106 2107 mutex_enter(&rctl_lists_lock); 2108 2109 switch (entity) { 2110 case RCENTITY_ZONE: 2111 case RCENTITY_PROJECT: 2112 case RCENTITY_TASK: 2113 case RCENTITY_PROCESS: 2114 rctl_de->rcd_next = rctl_lists[entity]; 2115 rctl_lists[entity] = rctl_de; 2116 break; 2117 default: 2118 panic("registering unknown rctl entity %d (%s)", entity, 2119 name); 2120 break; 2121 } 2122 2123 mutex_exit(&rctl_lists_lock); 2124 2125 /* 2126 * 4. Drop lock. 2127 */ 2128 mutex_exit(&rctl_dict_lock); 2129 2130 return (rhndl); 2131 } 2132 2133 /* 2134 * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, 2135 * rctl_val_t *v) 2136 * 2137 * Overview 2138 * rctl_global_action() takes, in according with the flags on the rctl_dict 2139 * entry for the given control, the appropriate actions on the exceeded 2140 * control value. Additionally, rctl_global_action() updates the firing time 2141 * on the exceeded value. 2142 * 2143 * Return values 2144 * A bitmask reflecting the actions actually taken. 2145 * 2146 * Caller's context 2147 * No restrictions on context. 2148 */ 2149 /*ARGSUSED*/ 2150 static int 2151 rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v) 2152 { 2153 rctl_dict_entry_t *rde = r->rc_dict_entry; 2154 const char *pr, *en, *idstr; 2155 id_t id; 2156 enum { 2157 SUFFIX_NONE, /* id consumed directly */ 2158 SUFFIX_NUMERIC, /* id consumed in suffix */ 2159 SUFFIX_STRING /* idstr consumed in suffix */ 2160 } suffix = SUFFIX_NONE; 2161 int ret = 0; 2162 2163 v->rcv_firing_time = gethrtime(); 2164 2165 switch (v->rcv_privilege) { 2166 case RCPRIV_BASIC: 2167 pr = "basic"; 2168 break; 2169 case RCPRIV_PRIVILEGED: 2170 pr = "privileged"; 2171 break; 2172 case RCPRIV_SYSTEM: 2173 pr = "system"; 2174 break; 2175 default: 2176 pr = "unknown"; 2177 break; 2178 } 2179 2180 switch (rde->rcd_entity) { 2181 case RCENTITY_PROCESS: 2182 en = "process"; 2183 id = p->p_pid; 2184 suffix = SUFFIX_NONE; 2185 break; 2186 case RCENTITY_TASK: 2187 en = "task"; 2188 id = p->p_task->tk_tkid; 2189 suffix = SUFFIX_NUMERIC; 2190 break; 2191 case RCENTITY_PROJECT: 2192 en = "project"; 2193 id = p->p_task->tk_proj->kpj_id; 2194 suffix = SUFFIX_NUMERIC; 2195 break; 2196 case RCENTITY_ZONE: 2197 en = "zone"; 2198 idstr = p->p_zone->zone_name; 2199 suffix = SUFFIX_STRING; 2200 break; 2201 default: 2202 en = "unknown entity associated with process"; 2203 id = p->p_pid; 2204 suffix = SUFFIX_NONE; 2205 break; 2206 } 2207 2208 if (rde->rcd_flagaction & RCTL_GLOBAL_SYSLOG) { 2209 switch (suffix) { 2210 default: 2211 case SUFFIX_NONE: 2212 (void) strlog(0, 0, 0, 2213 rde->rcd_strlog_flags | log_global.lz_active, 2214 "%s rctl %s (value %llu) exceeded by %s %d.", 2215 pr, rde->rcd_name, v->rcv_value, en, id); 2216 break; 2217 case SUFFIX_NUMERIC: 2218 (void) strlog(0, 0, 0, 2219 rde->rcd_strlog_flags | log_global.lz_active, 2220 "%s rctl %s (value %llu) exceeded by process %d" 2221 " in %s %d.", 2222 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2223 en, id); 2224 break; 2225 case SUFFIX_STRING: 2226 (void) strlog(0, 0, 0, 2227 rde->rcd_strlog_flags | log_global.lz_active, 2228 "%s rctl %s (value %llu) exceeded by process %d" 2229 " in %s %s.", 2230 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2231 en, idstr); 2232 break; 2233 } 2234 } 2235 2236 if (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) 2237 ret |= RCT_DENY; 2238 2239 return (ret); 2240 } 2241 2242 static int 2243 rctl_local_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v, 2244 uint_t safety) 2245 { 2246 int ret = 0; 2247 sigqueue_t *sqp = NULL; 2248 rctl_dict_entry_t *rde = r->rc_dict_entry; 2249 int unobservable = (rde->rcd_flagaction & RCTL_GLOBAL_UNOBSERVABLE); 2250 2251 proc_t *recipient = v->rcv_action_recipient; 2252 id_t recip_pid = v->rcv_action_recip_pid; 2253 int recip_signal = v->rcv_action_signal; 2254 uint_t flagaction = v->rcv_flagaction; 2255 2256 if (safety == RCA_UNSAFE_ALL) { 2257 if (flagaction & RCTL_LOCAL_DENY) { 2258 ret |= RCT_DENY; 2259 } 2260 return (ret); 2261 } 2262 2263 if (flagaction & RCTL_LOCAL_SIGNAL) { 2264 /* 2265 * We can build a siginfo only in the case that it is 2266 * safe for us to drop p_lock. (For asynchronous 2267 * checks this is currently not true.) 2268 */ 2269 if (safety == RCA_SAFE) { 2270 mutex_exit(&rset->rcs_lock); 2271 mutex_exit(&p->p_lock); 2272 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 2273 mutex_enter(&p->p_lock); 2274 mutex_enter(&rset->rcs_lock); 2275 2276 sqp->sq_info.si_signo = recip_signal; 2277 sqp->sq_info.si_code = SI_RCTL; 2278 sqp->sq_info.si_errno = 0; 2279 sqp->sq_info.si_entity = (int)rde->rcd_entity; 2280 } 2281 2282 if (recipient == NULL || recipient == p) { 2283 ret |= RCT_SIGNAL; 2284 2285 if (sqp == NULL) { 2286 sigtoproc(p, NULL, recip_signal); 2287 } else if (p == curproc) { 2288 /* 2289 * Then this is a synchronous test and we can 2290 * direct the signal at the violating thread. 2291 */ 2292 sigaddqa(curproc, curthread, sqp); 2293 } else { 2294 sigaddqa(p, NULL, sqp); 2295 } 2296 } else if (!unobservable) { 2297 proc_t *rp; 2298 2299 mutex_exit(&rset->rcs_lock); 2300 mutex_exit(&p->p_lock); 2301 2302 mutex_enter(&pidlock); 2303 if ((rp = prfind(recip_pid)) == recipient) { 2304 /* 2305 * Recipient process is still alive, but may not 2306 * be in this task or project any longer. In 2307 * this case, the recipient's resource control 2308 * set pertinent to this control will have 2309 * changed--and we will not deliver the signal, 2310 * as the recipient process is trying to tear 2311 * itself off of its former set. 2312 */ 2313 mutex_enter(&rp->p_lock); 2314 mutex_exit(&pidlock); 2315 2316 if (rctl_entity_obtain_rset(rde, rp) == rset) { 2317 ret |= RCT_SIGNAL; 2318 2319 if (sqp == NULL) 2320 sigtoproc(rp, NULL, 2321 recip_signal); 2322 else 2323 sigaddqa(rp, NULL, sqp); 2324 } else if (sqp) { 2325 kmem_free(sqp, sizeof (sigqueue_t)); 2326 } 2327 mutex_exit(&rp->p_lock); 2328 } else { 2329 mutex_exit(&pidlock); 2330 if (sqp) 2331 kmem_free(sqp, sizeof (sigqueue_t)); 2332 } 2333 2334 mutex_enter(&p->p_lock); 2335 /* 2336 * Since we dropped p_lock, we may no longer be in the 2337 * same task or project as we were at entry. It is thus 2338 * unsafe for us to reacquire the set lock at this 2339 * point; callers of rctl_local_action() must handle 2340 * this possibility. 2341 */ 2342 ret |= RCT_LK_ABANDONED; 2343 } else if (sqp) { 2344 kmem_free(sqp, sizeof (sigqueue_t)); 2345 } 2346 } 2347 2348 if ((flagaction & RCTL_LOCAL_DENY) && 2349 (recipient == NULL || recipient == p)) { 2350 ret |= RCT_DENY; 2351 } 2352 2353 return (ret); 2354 } 2355 2356 /* 2357 * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t) 2358 * 2359 * Overview 2360 * Take the action associated with the enforced value (as defined by 2361 * rctl_get_enforced_value()) being exceeded or encountered. Possibly perform 2362 * a restricted subset of the available actions, if circumstances dictate that 2363 * we cannot safely allocate memory (for a sigqueue_t) or guarantee process 2364 * persistence across the duration of the function (an asynchronous action). 2365 * 2366 * Return values 2367 * Actions taken, according to the rctl_test bitmask. 2368 * 2369 * Caller's context 2370 * Safe to acquire rcs_lock. 2371 */ 2372 int 2373 rctl_action(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, uint_t safety) 2374 { 2375 return (rctl_action_entity(hndl, rset, p, NULL, safety)); 2376 } 2377 2378 int 2379 rctl_action_entity(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, 2380 rctl_entity_p_t *e, uint_t safety) 2381 { 2382 int ret = RCT_NONE; 2383 rctl_t *lrctl; 2384 rctl_entity_p_t e_tmp; 2385 2386 rctl_action_acquire: 2387 mutex_enter(&rset->rcs_lock); 2388 if (rctl_set_find(rset, hndl, &lrctl) == -1) { 2389 mutex_exit(&rset->rcs_lock); 2390 return (ret); 2391 } 2392 2393 if (e == NULL) { 2394 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2395 p, &e_tmp); 2396 e = &e_tmp; 2397 } 2398 2399 if ((ret & RCT_LK_ABANDONED) == 0) { 2400 ret |= rctl_global_action(lrctl, rset, p, lrctl->rc_cursor); 2401 2402 RCTLOP_ACTION(lrctl, p, e); 2403 2404 ret |= rctl_local_action(lrctl, rset, p, 2405 lrctl->rc_cursor, safety); 2406 2407 if (ret & RCT_LK_ABANDONED) 2408 goto rctl_action_acquire; 2409 } 2410 2411 ret &= ~RCT_LK_ABANDONED; 2412 2413 if (!(ret & RCT_DENY) && 2414 lrctl->rc_cursor->rcv_next != NULL) { 2415 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2416 2417 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2418 p, lrctl->rc_cursor->rcv_value)); 2419 2420 } 2421 mutex_exit(&rset->rcs_lock); 2422 2423 return (ret); 2424 } 2425 2426 /* 2427 * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t) 2428 * 2429 * Overview 2430 * Increment the resource associated with the given handle, returning zero if 2431 * the incremented value does not exceed the threshold for the current limit 2432 * on the resource. 2433 * 2434 * Return values 2435 * Actions taken, according to the rctl_test bitmask. 2436 * 2437 * Caller's context 2438 * p_lock held by caller. 2439 */ 2440 /*ARGSUSED*/ 2441 int 2442 rctl_test(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2443 rctl_qty_t incr, uint_t flags) 2444 { 2445 return (rctl_test_entity(rhndl, rset, p, NULL, incr, flags)); 2446 } 2447 2448 int 2449 rctl_test_entity(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2450 rctl_entity_p_t *e, rctl_qty_t incr, uint_t flags) 2451 { 2452 rctl_t *lrctl; 2453 int ret = RCT_NONE; 2454 rctl_entity_p_t e_tmp; 2455 if (p == &p0) { 2456 /* 2457 * We don't enforce rctls on the kernel itself. 2458 */ 2459 return (ret); 2460 } 2461 2462 rctl_test_acquire: 2463 ASSERT(MUTEX_HELD(&p->p_lock)); 2464 2465 mutex_enter(&rset->rcs_lock); 2466 2467 /* 2468 * Dereference from rctl_set. We don't enforce newly loaded controls 2469 * that haven't been set on this entity (since the only valid value is 2470 * the infinite system value). 2471 */ 2472 if (rctl_set_find(rset, rhndl, &lrctl) == -1) { 2473 mutex_exit(&rset->rcs_lock); 2474 return (ret); 2475 } 2476 2477 /* 2478 * This control is currently unenforced: maximal value on control 2479 * supporting infinitely available resource. 2480 */ 2481 if ((lrctl->rc_dict_entry->rcd_flagaction & RCTL_GLOBAL_INFINITE) && 2482 (lrctl->rc_cursor->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) { 2483 2484 mutex_exit(&rset->rcs_lock); 2485 return (ret); 2486 } 2487 2488 /* 2489 * If we have been called by rctl_test, look up the entity pointer 2490 * from the proc pointer. 2491 */ 2492 if (e == NULL) { 2493 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2494 p, &e_tmp); 2495 e = &e_tmp; 2496 } 2497 2498 /* 2499 * Get enforced rctl value and current usage. Test the increment 2500 * with the current usage against the enforced value--take action as 2501 * necessary. 2502 */ 2503 while (RCTLOP_TEST(lrctl, p, e, lrctl->rc_cursor, incr, flags)) { 2504 if ((ret & RCT_LK_ABANDONED) == 0) { 2505 ret |= rctl_global_action(lrctl, rset, p, 2506 lrctl->rc_cursor); 2507 2508 RCTLOP_ACTION(lrctl, p, e); 2509 2510 ret |= rctl_local_action(lrctl, rset, p, 2511 lrctl->rc_cursor, flags); 2512 2513 if (ret & RCT_LK_ABANDONED) 2514 goto rctl_test_acquire; 2515 } 2516 2517 ret &= ~RCT_LK_ABANDONED; 2518 2519 if ((ret & RCT_DENY) == RCT_DENY || 2520 lrctl->rc_cursor->rcv_next == NULL) { 2521 ret |= RCT_DENY; 2522 break; 2523 } 2524 2525 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2526 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2527 p, lrctl->rc_cursor->rcv_value)); 2528 } 2529 2530 mutex_exit(&rset->rcs_lock); 2531 2532 return (ret); 2533 } 2534 2535 /* 2536 * void rctl_init(void) 2537 * 2538 * Overview 2539 * Initialize the rctl subsystem, including the primoridal rctls 2540 * provided by the system. New subsystem-specific rctls should _not_ be 2541 * initialized here. (Do it in your own file.) 2542 * 2543 * Return values 2544 * None. 2545 * 2546 * Caller's context 2547 * Safe for KM_SLEEP allocations. Must be called prior to any process model 2548 * initialization. 2549 */ 2550 void 2551 rctl_init(void) 2552 { 2553 rctl_cache = kmem_cache_create("rctl_cache", sizeof (rctl_t), 2554 0, NULL, NULL, NULL, NULL, NULL, 0); 2555 rctl_val_cache = kmem_cache_create("rctl_val_cache", 2556 sizeof (rctl_val_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 2557 2558 rctl_dict = mod_hash_create_extended("rctl_dict", 2559 rctl_dict_size, mod_hash_null_keydtor, rctl_dict_val_dtor, 2560 rctl_dict_hash_by_id, NULL, rctl_dict_id_cmp, KM_SLEEP); 2561 rctl_dict_by_name = mod_hash_create_strhash( 2562 "rctl_handles_by_name", rctl_dict_size, 2563 mod_hash_null_valdtor); 2564 rctl_ids = id_space_create("rctl_ids", 1, max_rctl_hndl); 2565 bzero(rctl_lists, (RC_MAX_ENTITY + 1) * sizeof (rctl_dict_entry_t *)); 2566 2567 rctlproc_init(); 2568 } 2569 2570 /* 2571 * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc) 2572 * 2573 * Increments the amount of locked memory on a project, and 2574 * zone. If proj is NULL, the proj and zone of proc_t p is used. If 2575 * chargeproc is non-zero, then the charged amount is cached on p->p_locked_mem 2576 * so that the charge can be migrated when a process changes projects. 2577 * 2578 * Return values 2579 * 0 - success 2580 * EAGAIN - attempting to increment locked memory is denied by one 2581 * or more resource entities. 2582 */ 2583 int 2584 rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2585 int chargeproc) 2586 { 2587 kproject_t *projp; 2588 zone_t *zonep; 2589 rctl_entity_p_t e; 2590 int ret = 0; 2591 2592 ASSERT(p != NULL); 2593 ASSERT(MUTEX_HELD(&p->p_lock)); 2594 if (proj != NULL) { 2595 projp = proj; 2596 zonep = zone_find_by_id(projp->kpj_zoneid); 2597 } else { 2598 projp = p->p_task->tk_proj; 2599 zonep = p->p_zone; 2600 } 2601 2602 mutex_enter(&zonep->zone_rctl_lock); 2603 2604 e.rcep_p.proj = projp; 2605 e.rcep_t = RCENTITY_PROJECT; 2606 if (projp->kpj_data.kpd_locked_mem + inc > 2607 projp->kpj_data.kpd_locked_mem_ctl) { 2608 if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls, 2609 p, &e, inc, 0) & RCT_DENY) { 2610 ret = EAGAIN; 2611 goto out; 2612 } 2613 } 2614 e.rcep_p.zone = zonep; 2615 e.rcep_t = RCENTITY_ZONE; 2616 if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) { 2617 if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls, 2618 p, &e, inc, 0) & RCT_DENY) { 2619 ret = EAGAIN; 2620 goto out; 2621 } 2622 } 2623 2624 zonep->zone_locked_mem += inc; 2625 projp->kpj_data.kpd_locked_mem += inc; 2626 if (chargeproc != 0) { 2627 p->p_locked_mem += inc; 2628 } 2629 out: 2630 mutex_exit(&zonep->zone_rctl_lock); 2631 if (proj != NULL) 2632 zone_rele(zonep); 2633 return (ret); 2634 } 2635 2636 /* 2637 * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc) 2638 * 2639 * Decrements the amount of locked memory on a project and 2640 * zone. If proj is NULL, the proj and zone of proc_t p is used. If 2641 * creditproc is non-zero, then the quantity of locked memory is subtracted 2642 * from p->p_locked_mem. 2643 * 2644 * Return values 2645 * none 2646 */ 2647 void 2648 rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2649 int creditproc) 2650 { 2651 kproject_t *projp; 2652 zone_t *zonep; 2653 2654 if (proj != NULL) { 2655 projp = proj; 2656 zonep = zone_find_by_id(projp->kpj_zoneid); 2657 } else { 2658 ASSERT(p != NULL); 2659 ASSERT(MUTEX_HELD(&p->p_lock)); 2660 projp = p->p_task->tk_proj; 2661 zonep = p->p_zone; 2662 } 2663 2664 mutex_enter(&zonep->zone_rctl_lock); 2665 zonep->zone_locked_mem -= inc; 2666 projp->kpj_data.kpd_locked_mem -= inc; 2667 if (creditproc != 0) { 2668 ASSERT(p != NULL); 2669 ASSERT(MUTEX_HELD(&p->p_lock)); 2670 p->p_locked_mem -= inc; 2671 } 2672 mutex_exit(&zonep->zone_rctl_lock); 2673 if (proj != NULL) 2674 zone_rele(zonep); 2675 } 2676