1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/atomic.h> 29 #include <sys/cmn_err.h> 30 #include <sys/id_space.h> 31 #include <sys/kmem.h> 32 #include <sys/kstat.h> 33 #include <sys/log.h> 34 #include <sys/modctl.h> 35 #include <sys/modhash.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/procset.h> 39 #include <sys/project.h> 40 #include <sys/resource.h> 41 #include <sys/rctl.h> 42 #include <sys/siginfo.h> 43 #include <sys/strlog.h> 44 #include <sys/systm.h> 45 #include <sys/task.h> 46 #include <sys/types.h> 47 #include <sys/policy.h> 48 #include <sys/zone.h> 49 50 /* 51 * Resource controls (rctls) 52 * 53 * The rctl subsystem provides a mechanism for kernel components to 54 * register their individual resource controls with the system as a whole, 55 * such that those controls can subscribe to specific actions while being 56 * associated with the various process-model entities provided by the kernel: 57 * the process, the task, the project, and the zone. (In principle, only 58 * minor modifications would be required to connect the resource control 59 * functionality to non-process-model entities associated with the system.) 60 * 61 * Subsystems register their rctls via rctl_register(). Subsystems 62 * also wishing to provide additional limits on a given rctl can modify 63 * them once they have the rctl handle. Each subsystem should store the 64 * handle to their rctl for direct access. 65 * 66 * A primary dictionary, rctl_dict, contains a hash of id to the default 67 * control definition for each controlled resource-entity pair on the system. 68 * A secondary dictionary, rctl_dict_by_name, contains a hash of name to 69 * resource control handles. The resource control handles are distributed by 70 * the rctl_ids ID space. The handles are private and not to be 71 * advertised to userland; all userland interactions are via the rctl 72 * names. 73 * 74 * Entities inherit their rctls from their predecessor. Since projects have 75 * no ancestor, they inherit their rctls from the rctl dict for project 76 * rctls. It is expected that project controls will be set to their 77 * appropriate values shortly after project creation, presumably from a 78 * policy source such as the project database. 79 * 80 * Data structures 81 * The rctl_set_t attached to each of the process model entities is a simple 82 * hash table keyed on the rctl handle assigned at registration. The entries 83 * in the hash table are rctl_t's, whose relationship with the active control 84 * values on that resource and with the global state of the resource we 85 * illustrate below: 86 * 87 * rctl_dict[key] --> rctl_dict_entry 88 * ^ 89 * | 90 * +--+---+ 91 * rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL 92 * +--+---+ ^ 93 * | | 94 * +------- cursor ------+ 95 * 96 * That is, the rctl contains a back pointer to the global resource control 97 * state for this resource, which is also available in the rctl_dict hash 98 * table mentioned earlier. The rctl contains two pointers to resource 99 * control values: one, values, indicates the entire sequence of control 100 * values; the other, cursor, indicates the currently active control 101 * value--the next value to be enforced. The value list itself is an open, 102 * doubly-linked list, the last non-NULL member of which is the system value 103 * for that resource (being the theoretical/conventional maximum allowable 104 * value for the resource on this OS instance). 105 * 106 * Ops Vector 107 * Subsystems publishing rctls need not provide instances of all of the 108 * functions specified by the ops vector. In particular, if general 109 * rctl_*() entry points are not being called, certain functions can be 110 * omitted. These align as follows: 111 * 112 * rctl_set() 113 * You may wish to provide a set callback if locking circumstances prevent 114 * it or if the performance cost of requesting the enforced value from the 115 * resource control is prohibitively expensive. For instance, the currently 116 * enforced file size limit is stored on the process in the p_fsz_ctl to 117 * maintain read()/write() performance. 118 * 119 * rctl_test() 120 * You must provide a test callback if you are using the rctl_test() 121 * interface. An action callback is optional. 122 * 123 * rctl_action() 124 * You may wish to provide an action callback. 125 * 126 * Registration 127 * New resource controls can be added to a running instance by loaded modules 128 * via registration. (The current implementation does not support unloadable 129 * modules; this functionality can be added if needed, via an 130 * activation/deactivation interface involving the manipulation of the 131 * ops vector for the resource control(s) needing to support unloading.) 132 * 133 * Control value ordering 134 * Because the rctl_val chain on each rctl must be navigable in a 135 * deterministic way, we have to define an ordering on the rctl_val_t's. The 136 * defined order is (flags & [maximal], value, flags & [deny-action], 137 * privilege). 138 * 139 * Locking 140 * rctl_dict_lock must be acquired prior to rctl_lists_lock. Since 141 * rctl_dict_lock or rctl_lists_lock can be called at the enforcement point 142 * of any subsystem, holding subsystem locks, it is at all times inappropriate 143 * to call kmem_alloc(., KM_SLEEP) while holding either of these locks. 144 * Traversing any of the various resource control entity lists requires 145 * holding rctl_lists_lock. 146 * 147 * Each individual resource control set associated with an entity must have 148 * its rcs_lock held for the duration of any operations that would add 149 * resource controls or control values to the set. 150 * 151 * The locking subsequence of interest is: p_lock, rctl_dict_lock, 152 * rctl_lists_lock, entity->rcs_lock. 153 * 154 * The projects(4) database and project entity resource controls 155 * A special case is made for RCENTITY_PROJECT values set through the 156 * setproject(3PROJECT) interface. setproject() makes use of a private 157 * interface, setprojrctl(), which passes through an array of resource control 158 * blocks that need to be set while holding the entity->rcs_lock. This 159 * ensures that the act of modifying a project's resource controls is 160 * "atomic" within the kernel. 161 * 162 * Within the rctl sub-system, we provide two interfaces that are only used by 163 * the setprojrctl() code path - rctl_local_insert_all() and 164 * rctl_local_replace_all(). rctl_local_insert_all() will ensure that the 165 * resource values specified in *new_values are applied. 166 * rctl_local_replace_all() will purge the current rctl->rc_projdb and 167 * rctl->rc_values entries, and apply the *new_values. 168 * 169 * These functions modify not only the linked list of active resource controls 170 * (rctl->rc_values), but also a "cached" linked list (rctl->rc_projdb) of 171 * values set through these interfaces. To clarify: 172 * 173 * rctl->rc_values - a linked list of rctl_val_t. These are the active 174 * resource values associated with this rctl, and may have been set by 175 * setrctl() - via prctl(1M), or by setprojrctl() - via 176 * setproject(3PROJECT). 177 * 178 * rctl->rc_projdb - a linked list of rctl_val_t. These reflect the 179 * resource values set by the setprojrctl() code path. rc_projdb is not 180 * referenced by any other component of the rctl sub-system. 181 * 182 * As various locks are held when calling these functions, we ensure that all 183 * the possible memory allocations are performed prior to calling the 184 * function. *alloc_values is a linked list of uninitialized rctl_val_t, 185 * which may be used to duplicate a new resource control value (passed in as 186 * one of the members of the *new_values linked list), in order to populate 187 * rctl->rc_values. 188 */ 189 190 id_t max_rctl_hndl = 32768; 191 int rctl_dict_size = 64; 192 int rctl_set_size = 8; 193 kmutex_t rctl_dict_lock; 194 mod_hash_t *rctl_dict; 195 mod_hash_t *rctl_dict_by_name; 196 id_space_t *rctl_ids; 197 kmem_cache_t *rctl_cache; /* kmem cache for rctl structures */ 198 kmem_cache_t *rctl_val_cache; /* kmem cache for rctl values */ 199 200 kmutex_t rctl_lists_lock; 201 rctl_dict_entry_t *rctl_lists[RC_MAX_ENTITY + 1]; 202 203 /* 204 * Default resource control operations and ops vector 205 * To be used if the particular rcontrol has no specific actions defined, or 206 * if the subsystem providing the control is quiescing (in preparation for 207 * unloading, presumably.) 208 * 209 * Resource controls with callbacks should fill the unused operations with the 210 * appropriate default impotent callback. 211 */ 212 /*ARGSUSED*/ 213 void 214 rcop_no_action(struct rctl *r, struct proc *p, rctl_entity_p_t *e) 215 { 216 } 217 218 /*ARGSUSED*/ 219 rctl_qty_t 220 rcop_no_usage(struct rctl *r, struct proc *p) 221 { 222 return (0); 223 } 224 225 /*ARGSUSED*/ 226 int 227 rcop_no_set(struct rctl *r, struct proc *p, rctl_entity_p_t *e, rctl_qty_t l) 228 { 229 return (0); 230 } 231 232 /*ARGSUSED*/ 233 int 234 rcop_no_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 235 struct rctl_val *rv, rctl_qty_t i, uint_t f) 236 { 237 return (0); 238 } 239 240 rctl_ops_t rctl_default_ops = { 241 rcop_no_action, 242 rcop_no_usage, 243 rcop_no_set, 244 rcop_no_test 245 }; 246 247 /* 248 * Default "absolute" resource control operation and ops vector 249 * Useful if there is no usage associated with the 250 * resource control. 251 */ 252 /*ARGSUSED*/ 253 int 254 rcop_absolute_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 255 struct rctl_val *rv, rctl_qty_t i, uint_t f) 256 { 257 return (i > rv->rcv_value); 258 } 259 260 rctl_ops_t rctl_absolute_ops = { 261 rcop_no_action, 262 rcop_no_usage, 263 rcop_no_set, 264 rcop_absolute_test 265 }; 266 267 /*ARGSUSED*/ 268 static uint_t 269 rctl_dict_hash_by_id(void *hash_data, mod_hash_key_t key) 270 { 271 return ((uint_t)(uintptr_t)key % rctl_dict_size); 272 } 273 274 static int 275 rctl_dict_id_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 276 { 277 uint_t u1 = (uint_t)(uintptr_t)key1; 278 uint_t u2 = (uint_t)(uintptr_t)key2; 279 280 if (u1 > u2) 281 return (1); 282 283 if (u1 == u2) 284 return (0); 285 286 return (-1); 287 } 288 289 static void 290 rctl_dict_val_dtor(mod_hash_val_t val) 291 { 292 rctl_dict_entry_t *kr = (rctl_dict_entry_t *)val; 293 294 kmem_free(kr, sizeof (rctl_dict_entry_t)); 295 } 296 297 /* 298 * size_t rctl_build_name_buf() 299 * 300 * Overview 301 * rctl_build_name_buf() walks all active resource controls in the dictionary, 302 * building a buffer of continguous NUL-terminated strings. 303 * 304 * Return values 305 * The size of the buffer is returned, the passed pointer's contents are 306 * modified to that of the location of the buffer. 307 * 308 * Caller's context 309 * Caller must be in a context suitable for KM_SLEEP allocations. 310 */ 311 size_t 312 rctl_build_name_buf(char **rbufp) 313 { 314 size_t req_size, cpy_size; 315 char *rbufloc; 316 int i; 317 318 rctl_rebuild_name_buf: 319 req_size = cpy_size = 0; 320 321 /* 322 * Calculate needed buffer length. 323 */ 324 mutex_enter(&rctl_lists_lock); 325 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 326 rctl_dict_entry_t *rde; 327 328 for (rde = rctl_lists[i]; 329 rde != NULL; 330 rde = rde->rcd_next) 331 req_size += strlen(rde->rcd_name) + 1; 332 } 333 mutex_exit(&rctl_lists_lock); 334 335 rbufloc = *rbufp = kmem_alloc(req_size, KM_SLEEP); 336 337 /* 338 * Copy rctl names into our buffer. If the copy length exceeds the 339 * allocate length (due to registration changes), stop copying, free the 340 * buffer, and start again. 341 */ 342 mutex_enter(&rctl_lists_lock); 343 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 344 rctl_dict_entry_t *rde; 345 346 for (rde = rctl_lists[i]; 347 rde != NULL; 348 rde = rde->rcd_next) { 349 size_t length = strlen(rde->rcd_name) + 1; 350 351 cpy_size += length; 352 353 if (cpy_size > req_size) { 354 kmem_free(*rbufp, req_size); 355 mutex_exit(&rctl_lists_lock); 356 goto rctl_rebuild_name_buf; 357 } 358 359 bcopy(rde->rcd_name, rbufloc, length); 360 rbufloc += length; 361 } 362 } 363 mutex_exit(&rctl_lists_lock); 364 365 return (req_size); 366 } 367 368 /* 369 * rctl_dict_entry_t *rctl_dict_lookup(const char *) 370 * 371 * Overview 372 * rctl_dict_lookup() returns the resource control dictionary entry for the 373 * named resource control. 374 * 375 * Return values 376 * A pointer to the appropriate resource control dictionary entry, or NULL if 377 * no such named entry exists. 378 * 379 * Caller's context 380 * Caller must not be holding rctl_dict_lock. 381 */ 382 rctl_dict_entry_t * 383 rctl_dict_lookup(const char *name) 384 { 385 rctl_dict_entry_t *rde; 386 387 mutex_enter(&rctl_dict_lock); 388 389 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 390 (mod_hash_val_t *)&rde) == MH_ERR_NOTFOUND) { 391 mutex_exit(&rctl_dict_lock); 392 return (NULL); 393 } 394 395 mutex_exit(&rctl_dict_lock); 396 397 return (rde); 398 } 399 400 /* 401 * rctl_hndl_t rctl_hndl_lookup(const char *) 402 * 403 * Overview 404 * rctl_hndl_lookup() returns the resource control id (the "handle") for the 405 * named resource control. 406 * 407 * Return values 408 * The appropriate id, or -1 if no such named entry exists. 409 * 410 * Caller's context 411 * Caller must not be holding rctl_dict_lock. 412 */ 413 rctl_hndl_t 414 rctl_hndl_lookup(const char *name) 415 { 416 rctl_dict_entry_t *rde; 417 418 if ((rde = rctl_dict_lookup(name)) == NULL) 419 return (-1); 420 421 return (rde->rcd_id); 422 } 423 424 /* 425 * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t) 426 * 427 * Overview 428 * rctl_dict_lookup_hndl() completes the public lookup functions, by returning 429 * the resource control dictionary entry matching a given resource control id. 430 * 431 * Return values 432 * A pointer to the matching resource control dictionary entry, or NULL if the 433 * id does not match any existing entries. 434 * 435 * Caller's context 436 * Caller must not be holding rctl_lists_lock. 437 */ 438 rctl_dict_entry_t * 439 rctl_dict_lookup_hndl(rctl_hndl_t hndl) 440 { 441 uint_t i; 442 443 mutex_enter(&rctl_lists_lock); 444 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 445 rctl_dict_entry_t *rde; 446 447 for (rde = rctl_lists[i]; 448 rde != NULL; 449 rde = rde->rcd_next) 450 if (rde->rcd_id == hndl) { 451 mutex_exit(&rctl_lists_lock); 452 return (rde); 453 } 454 } 455 mutex_exit(&rctl_lists_lock); 456 457 return (NULL); 458 } 459 460 /* 461 * void rctl_add_default_limit(const char *name, rctl_qty_t value, 462 * rctl_priv_t privilege, uint_t action) 463 * 464 * Overview 465 * Create a default limit with specified value, privilege, and action. 466 * 467 * Return value 468 * No value returned. 469 */ 470 void 471 rctl_add_default_limit(const char *name, rctl_qty_t value, 472 rctl_priv_t privilege, uint_t action) 473 { 474 rctl_val_t *dval; 475 rctl_dict_entry_t *rde; 476 477 dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 478 bzero(dval, sizeof (rctl_val_t)); 479 dval->rcv_value = value; 480 dval->rcv_privilege = privilege; 481 dval->rcv_flagaction = action; 482 dval->rcv_action_recip_pid = -1; 483 484 rde = rctl_dict_lookup(name); 485 (void) rctl_val_list_insert(&rde->rcd_default_value, dval); 486 } 487 488 /* 489 * void rctl_add_legacy_limit(const char *name, const char *mname, 490 * const char *lname, rctl_qty_t dflt) 491 * 492 * Overview 493 * Create a default privileged limit, using the value obtained from 494 * /etc/system if it exists and is greater than the specified default 495 * value. Exists primarily for System V IPC. 496 * 497 * Return value 498 * No value returned. 499 */ 500 void 501 rctl_add_legacy_limit(const char *name, const char *mname, const char *lname, 502 rctl_qty_t dflt, rctl_qty_t max) 503 { 504 rctl_qty_t qty; 505 506 if (!mod_sysvar(mname, lname, &qty) || (qty < dflt)) 507 qty = dflt; 508 509 if (qty > max) 510 qty = max; 511 512 rctl_add_default_limit(name, qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY); 513 } 514 515 static rctl_set_t * 516 rctl_entity_obtain_rset(rctl_dict_entry_t *rcd, struct proc *p) 517 { 518 rctl_set_t *rset = NULL; 519 520 if (rcd == NULL) 521 return (NULL); 522 523 switch (rcd->rcd_entity) { 524 case RCENTITY_PROCESS: 525 rset = p->p_rctls; 526 break; 527 case RCENTITY_TASK: 528 ASSERT(MUTEX_HELD(&p->p_lock)); 529 if (p->p_task != NULL) 530 rset = p->p_task->tk_rctls; 531 break; 532 case RCENTITY_PROJECT: 533 ASSERT(MUTEX_HELD(&p->p_lock)); 534 if (p->p_task != NULL && 535 p->p_task->tk_proj != NULL) 536 rset = p->p_task->tk_proj->kpj_rctls; 537 break; 538 case RCENTITY_ZONE: 539 ASSERT(MUTEX_HELD(&p->p_lock)); 540 if (p->p_zone != NULL) 541 rset = p->p_zone->zone_rctls; 542 break; 543 default: 544 panic("unknown rctl entity type %d seen", rcd->rcd_entity); 545 break; 546 } 547 548 return (rset); 549 } 550 551 static void 552 rctl_entity_obtain_entity_p(rctl_entity_t entity, struct proc *p, 553 rctl_entity_p_t *e) 554 { 555 e->rcep_p.proc = NULL; 556 e->rcep_t = entity; 557 558 switch (entity) { 559 case RCENTITY_PROCESS: 560 e->rcep_p.proc = p; 561 break; 562 case RCENTITY_TASK: 563 ASSERT(MUTEX_HELD(&p->p_lock)); 564 if (p->p_task != NULL) 565 e->rcep_p.task = p->p_task; 566 break; 567 case RCENTITY_PROJECT: 568 ASSERT(MUTEX_HELD(&p->p_lock)); 569 if (p->p_task != NULL && 570 p->p_task->tk_proj != NULL) 571 e->rcep_p.proj = p->p_task->tk_proj; 572 break; 573 case RCENTITY_ZONE: 574 ASSERT(MUTEX_HELD(&p->p_lock)); 575 if (p->p_zone != NULL) 576 e->rcep_p.zone = p->p_zone; 577 break; 578 default: 579 panic("unknown rctl entity type %d seen", entity); 580 break; 581 } 582 } 583 584 static void 585 rctl_gp_alloc(rctl_alloc_gp_t *rcgp) 586 { 587 uint_t i; 588 589 if (rcgp->rcag_nctls > 0) { 590 rctl_t *prev = kmem_cache_alloc(rctl_cache, KM_SLEEP); 591 rctl_t *rctl = prev; 592 593 rcgp->rcag_ctls = prev; 594 595 for (i = 1; i < rcgp->rcag_nctls; i++) { 596 rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 597 prev->rc_next = rctl; 598 prev = rctl; 599 } 600 601 rctl->rc_next = NULL; 602 } 603 604 if (rcgp->rcag_nvals > 0) { 605 rctl_val_t *prev = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 606 rctl_val_t *rval = prev; 607 608 rcgp->rcag_vals = prev; 609 610 for (i = 1; i < rcgp->rcag_nvals; i++) { 611 rval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 612 prev->rcv_next = rval; 613 prev = rval; 614 } 615 616 rval->rcv_next = NULL; 617 } 618 619 } 620 621 static rctl_val_t * 622 rctl_gp_detach_val(rctl_alloc_gp_t *rcgp) 623 { 624 rctl_val_t *rval = rcgp->rcag_vals; 625 626 ASSERT(rcgp->rcag_nvals > 0); 627 rcgp->rcag_nvals--; 628 rcgp->rcag_vals = rval->rcv_next; 629 630 rval->rcv_next = NULL; 631 632 return (rval); 633 } 634 635 static rctl_t * 636 rctl_gp_detach_ctl(rctl_alloc_gp_t *rcgp) 637 { 638 rctl_t *rctl = rcgp->rcag_ctls; 639 640 ASSERT(rcgp->rcag_nctls > 0); 641 rcgp->rcag_nctls--; 642 rcgp->rcag_ctls = rctl->rc_next; 643 644 rctl->rc_next = NULL; 645 646 return (rctl); 647 648 } 649 650 static void 651 rctl_gp_free(rctl_alloc_gp_t *rcgp) 652 { 653 rctl_val_t *rval = rcgp->rcag_vals; 654 rctl_t *rctl = rcgp->rcag_ctls; 655 656 while (rval != NULL) { 657 rctl_val_t *next = rval->rcv_next; 658 659 kmem_cache_free(rctl_val_cache, rval); 660 rval = next; 661 } 662 663 while (rctl != NULL) { 664 rctl_t *next = rctl->rc_next; 665 666 kmem_cache_free(rctl_cache, rctl); 667 rctl = next; 668 } 669 } 670 671 /* 672 * void rctl_prealloc_destroy(rctl_alloc_gp_t *) 673 * 674 * Overview 675 * Release all unused memory allocated via one of the "prealloc" functions: 676 * rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc. 677 * 678 * Return values 679 * None. 680 * 681 * Caller's context 682 * No restrictions on context. 683 */ 684 void 685 rctl_prealloc_destroy(rctl_alloc_gp_t *gp) 686 { 687 rctl_gp_free(gp); 688 kmem_free(gp, sizeof (rctl_alloc_gp_t)); 689 } 690 691 /* 692 * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int) 693 * 694 * Overview 695 * This function defines an ordering to rctl_val_t's in order to allow 696 * for correct placement in value lists. When the imprecise flag is set, 697 * the action recipient is ignored. This is to facilitate insert, 698 * delete, and replace operations by rctlsys. 699 * 700 * Return values 701 * 0 if the val_t's are are considered identical 702 * -1 if a is ordered lower than b 703 * 1 if a is lowered higher than b 704 * 705 * Caller's context 706 * No restrictions on context. 707 */ 708 int 709 rctl_val_cmp(rctl_val_t *a, rctl_val_t *b, int imprecise) 710 { 711 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) < 712 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 713 return (-1); 714 715 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) > 716 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 717 return (1); 718 719 if (a->rcv_value < b->rcv_value) 720 return (-1); 721 722 if (a->rcv_value > b->rcv_value) 723 return (1); 724 725 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) < 726 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 727 return (-1); 728 729 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) > 730 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 731 return (1); 732 733 if (a->rcv_privilege < b->rcv_privilege) 734 return (-1); 735 736 if (a->rcv_privilege > b->rcv_privilege) 737 return (1); 738 739 if (imprecise) 740 return (0); 741 742 if (a->rcv_action_recip_pid < b->rcv_action_recip_pid) 743 return (-1); 744 745 if (a->rcv_action_recip_pid > b->rcv_action_recip_pid) 746 return (1); 747 748 return (0); 749 } 750 751 static rctl_val_t * 752 rctl_val_list_find(rctl_val_t **head, rctl_val_t *cval) 753 { 754 rctl_val_t *rval = *head; 755 756 while (rval != NULL) { 757 if (rctl_val_cmp(cval, rval, 0) == 0) 758 return (rval); 759 760 rval = rval->rcv_next; 761 } 762 763 return (NULL); 764 765 } 766 767 /* 768 * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *) 769 * 770 * Overview 771 * This function inserts the rctl_val_t into the value list provided. 772 * The insert is always successful unless if the value is a duplicate 773 * of one already in the list. 774 * 775 * Return values 776 * 1 if the value was a duplicate of an existing value in the list. 777 * 0 if the insert was successful. 778 */ 779 int 780 rctl_val_list_insert(rctl_val_t **root, rctl_val_t *rval) 781 { 782 rctl_val_t *prev; 783 int equiv; 784 785 rval->rcv_next = NULL; 786 rval->rcv_prev = NULL; 787 788 if (*root == NULL) { 789 *root = rval; 790 return (0); 791 } 792 793 equiv = rctl_val_cmp(rval, *root, 0); 794 795 if (equiv == 0) 796 return (1); 797 798 if (equiv < 0) { 799 rval->rcv_next = *root; 800 rval->rcv_next->rcv_prev = rval; 801 *root = rval; 802 803 return (0); 804 } 805 806 prev = *root; 807 while (prev->rcv_next != NULL && 808 (equiv = rctl_val_cmp(rval, prev->rcv_next, 0)) > 0) { 809 prev = prev->rcv_next; 810 } 811 812 if (equiv == 0) 813 return (1); 814 815 rval->rcv_next = prev->rcv_next; 816 if (rval->rcv_next != NULL) 817 rval->rcv_next->rcv_prev = rval; 818 prev->rcv_next = rval; 819 rval->rcv_prev = prev; 820 821 return (0); 822 } 823 824 static int 825 rctl_val_list_delete(rctl_val_t **root, rctl_val_t *rval) 826 { 827 rctl_val_t *prev; 828 829 if (*root == NULL) 830 return (-1); 831 832 prev = *root; 833 if (rctl_val_cmp(rval, prev, 0) == 0) { 834 *root = prev->rcv_next; 835 (*root)->rcv_prev = NULL; 836 837 kmem_cache_free(rctl_val_cache, prev); 838 839 return (0); 840 } 841 842 while (prev->rcv_next != NULL && 843 rctl_val_cmp(rval, prev->rcv_next, 0) != 0) { 844 prev = prev->rcv_next; 845 } 846 847 if (prev->rcv_next == NULL) { 848 /* 849 * If we navigate the entire list and cannot find a match, then 850 * return failure. 851 */ 852 return (-1); 853 } 854 855 prev = prev->rcv_next; 856 prev->rcv_prev->rcv_next = prev->rcv_next; 857 if (prev->rcv_next != NULL) 858 prev->rcv_next->rcv_prev = prev->rcv_prev; 859 860 kmem_cache_free(rctl_val_cache, prev); 861 862 return (0); 863 } 864 865 static rctl_val_t * 866 rctl_val_list_dup(rctl_val_t *rval, rctl_alloc_gp_t *ragp, struct proc *oldp, 867 struct proc *newp) 868 { 869 rctl_val_t *head = NULL; 870 871 for (; rval != NULL; rval = rval->rcv_next) { 872 rctl_val_t *dval = rctl_gp_detach_val(ragp); 873 874 bcopy(rval, dval, sizeof (rctl_val_t)); 875 dval->rcv_prev = dval->rcv_next = NULL; 876 877 if (oldp == NULL || 878 rval->rcv_action_recipient == NULL || 879 rval->rcv_action_recipient == oldp) { 880 if (rval->rcv_privilege == RCPRIV_BASIC) { 881 dval->rcv_action_recipient = newp; 882 dval->rcv_action_recip_pid = newp->p_pid; 883 } else { 884 dval->rcv_action_recipient = NULL; 885 dval->rcv_action_recip_pid = -1; 886 } 887 888 (void) rctl_val_list_insert(&head, dval); 889 } else { 890 kmem_cache_free(rctl_val_cache, dval); 891 } 892 } 893 894 return (head); 895 } 896 897 static void 898 rctl_val_list_reset(rctl_val_t *rval) 899 { 900 for (; rval != NULL; rval = rval->rcv_next) 901 rval->rcv_firing_time = 0; 902 } 903 904 static uint_t 905 rctl_val_list_count(rctl_val_t *rval) 906 { 907 uint_t n = 0; 908 909 for (; rval != NULL; rval = rval->rcv_next) 910 n++; 911 912 return (n); 913 } 914 915 916 static void 917 rctl_val_list_free(rctl_val_t *rval) 918 { 919 while (rval != NULL) { 920 rctl_val_t *next = rval->rcv_next; 921 922 kmem_cache_free(rctl_val_cache, rval); 923 924 rval = next; 925 } 926 } 927 928 /* 929 * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *) 930 * 931 * Overview 932 * In cases where the operating system supports more than one process 933 * addressing model, the operating system capabilities will exceed those of 934 * one or more of these models. Processes in a less capable model must have 935 * their resources accurately controlled, without diluting those of their 936 * descendants reached via exec(). rctl_model_maximum() returns the governing 937 * value for the specified process with respect to a resource control, such 938 * that the value can used for the RCTLOP_SET callback or compatability 939 * support. 940 * 941 * Return values 942 * The maximum value for the given process for the specified resource control. 943 * 944 * Caller's context 945 * No restrictions on context. 946 */ 947 rctl_qty_t 948 rctl_model_maximum(rctl_dict_entry_t *rde, struct proc *p) 949 { 950 if (p->p_model == DATAMODEL_NATIVE) 951 return (rde->rcd_max_native); 952 953 return (rde->rcd_max_ilp32); 954 } 955 956 /* 957 * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t) 958 * 959 * Overview 960 * Convenience function wrapping the rctl_model_maximum() functionality. 961 * 962 * Return values 963 * The lesser of the process's maximum value and the given value for the 964 * specified resource control. 965 * 966 * Caller's context 967 * No restrictions on context. 968 */ 969 rctl_qty_t 970 rctl_model_value(rctl_dict_entry_t *rde, struct proc *p, rctl_qty_t value) 971 { 972 rctl_qty_t max = rctl_model_maximum(rde, p); 973 974 return (value < max ? value : max); 975 } 976 977 static void 978 rctl_set_insert(rctl_set_t *set, rctl_hndl_t hndl, rctl_t *rctl) 979 { 980 uint_t index = hndl % rctl_set_size; 981 rctl_t *next_ctl, *prev_ctl; 982 983 ASSERT(MUTEX_HELD(&set->rcs_lock)); 984 985 rctl->rc_next = NULL; 986 987 if (set->rcs_ctls[index] == NULL) { 988 set->rcs_ctls[index] = rctl; 989 return; 990 } 991 992 if (hndl < set->rcs_ctls[index]->rc_id) { 993 rctl->rc_next = set->rcs_ctls[index]; 994 set->rcs_ctls[index] = rctl; 995 996 return; 997 } 998 999 for (next_ctl = set->rcs_ctls[index]->rc_next, 1000 prev_ctl = set->rcs_ctls[index]; 1001 next_ctl != NULL; 1002 prev_ctl = next_ctl, 1003 next_ctl = next_ctl->rc_next) { 1004 if (next_ctl->rc_id > hndl) { 1005 rctl->rc_next = next_ctl; 1006 prev_ctl->rc_next = rctl; 1007 1008 return; 1009 } 1010 } 1011 1012 rctl->rc_next = next_ctl; 1013 prev_ctl->rc_next = rctl; 1014 } 1015 1016 /* 1017 * rctl_set_t *rctl_set_create() 1018 * 1019 * Overview 1020 * Create an empty resource control set, suitable for attaching to a 1021 * controlled entity. 1022 * 1023 * Return values 1024 * A pointer to the newly created set. 1025 * 1026 * Caller's context 1027 * Safe for KM_SLEEP allocations. 1028 */ 1029 rctl_set_t * 1030 rctl_set_create() 1031 { 1032 rctl_set_t *rset = kmem_zalloc(sizeof (rctl_set_t), KM_SLEEP); 1033 1034 mutex_init(&rset->rcs_lock, NULL, MUTEX_DEFAULT, NULL); 1035 rset->rcs_ctls = kmem_zalloc(rctl_set_size * sizeof (rctl_t *), 1036 KM_SLEEP); 1037 rset->rcs_entity = -1; 1038 1039 return (rset); 1040 } 1041 1042 /* 1043 * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t) 1044 * 1045 * Overview 1046 * rctl_set_init_prealloc() examines the globally defined resource controls 1047 * and their default values and returns a resource control allocation group 1048 * populated with sufficient controls and values to form a representative 1049 * resource control set for the specified entity. 1050 * 1051 * Return values 1052 * A pointer to the newly created allocation group. 1053 * 1054 * Caller's context 1055 * Caller must be in a context suitable for KM_SLEEP allocations. 1056 */ 1057 rctl_alloc_gp_t * 1058 rctl_set_init_prealloc(rctl_entity_t entity) 1059 { 1060 rctl_dict_entry_t *rde; 1061 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1062 1063 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1064 1065 if (rctl_lists[entity] == NULL) 1066 return (ragp); 1067 1068 mutex_enter(&rctl_lists_lock); 1069 1070 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1071 ragp->rcag_nctls++; 1072 ragp->rcag_nvals += rctl_val_list_count(rde->rcd_default_value); 1073 } 1074 1075 mutex_exit(&rctl_lists_lock); 1076 1077 rctl_gp_alloc(ragp); 1078 1079 return (ragp); 1080 } 1081 1082 /* 1083 * rctl_set_t *rctl_set_init(rctl_entity_t) 1084 * 1085 * Overview 1086 * rctl_set_create() creates a resource control set, initialized with the 1087 * system infinite values on all registered controls, for attachment to a 1088 * system entity requiring resource controls, such as a process or a task. 1089 * 1090 * Return values 1091 * A pointer to the newly filled set. 1092 * 1093 * Caller's context 1094 * Caller must be holding p_lock on entry so that RCTLOP_SET() functions 1095 * may modify task and project members based on the proc structure 1096 * they are passed. 1097 */ 1098 rctl_set_t * 1099 rctl_set_init(rctl_entity_t entity, struct proc *p, rctl_entity_p_t *e, 1100 rctl_set_t *rset, rctl_alloc_gp_t *ragp) 1101 { 1102 rctl_dict_entry_t *rde; 1103 1104 ASSERT(MUTEX_HELD(&p->p_lock)); 1105 ASSERT(e); 1106 rset->rcs_entity = entity; 1107 1108 if (rctl_lists[entity] == NULL) 1109 return (rset); 1110 1111 mutex_enter(&rctl_lists_lock); 1112 mutex_enter(&rset->rcs_lock); 1113 1114 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1115 rctl_t *rctl = rctl_gp_detach_ctl(ragp); 1116 1117 rctl->rc_dict_entry = rde; 1118 rctl->rc_id = rde->rcd_id; 1119 rctl->rc_projdb = NULL; 1120 1121 rctl->rc_values = rctl_val_list_dup(rde->rcd_default_value, 1122 ragp, NULL, p); 1123 rctl->rc_cursor = rctl->rc_values; 1124 1125 ASSERT(rctl->rc_cursor != NULL); 1126 1127 rctl_set_insert(rset, rde->rcd_id, rctl); 1128 1129 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1130 rctl->rc_cursor->rcv_value)); 1131 } 1132 1133 mutex_exit(&rset->rcs_lock); 1134 mutex_exit(&rctl_lists_lock); 1135 1136 return (rset); 1137 } 1138 1139 static rctl_t * 1140 rctl_dup(rctl_t *rctl, rctl_alloc_gp_t *ragp, struct proc *oldp, 1141 struct proc *newp) 1142 { 1143 rctl_t *dup = rctl_gp_detach_ctl(ragp); 1144 rctl_val_t *dval; 1145 1146 dup->rc_id = rctl->rc_id; 1147 dup->rc_dict_entry = rctl->rc_dict_entry; 1148 dup->rc_next = NULL; 1149 dup->rc_cursor = NULL; 1150 dup->rc_values = rctl_val_list_dup(rctl->rc_values, ragp, oldp, newp); 1151 1152 for (dval = dup->rc_values; 1153 dval != NULL; dval = dval->rcv_next) { 1154 if (rctl_val_cmp(rctl->rc_cursor, dval, 0) >= 0) { 1155 dup->rc_cursor = dval; 1156 break; 1157 } 1158 } 1159 1160 if (dup->rc_cursor == NULL) 1161 dup->rc_cursor = dup->rc_values; 1162 1163 return (dup); 1164 } 1165 1166 static void 1167 rctl_set_fill_alloc_gp(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1168 { 1169 uint_t i; 1170 1171 bzero(ragp, sizeof (rctl_alloc_gp_t)); 1172 1173 for (i = 0; i < rctl_set_size; i++) { 1174 rctl_t *r = set->rcs_ctls[i]; 1175 1176 while (r != NULL) { 1177 ragp->rcag_nctls++; 1178 1179 ragp->rcag_nvals += rctl_val_list_count(r->rc_values); 1180 1181 r = r->rc_next; 1182 } 1183 } 1184 } 1185 1186 /* 1187 * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *) 1188 * 1189 * Overview 1190 * Given a resource control set, allocate a sufficiently large allocation 1191 * group to contain a duplicate of the set. 1192 * 1193 * Return value 1194 * A pointer to the newly created allocation group. 1195 * 1196 * Caller's context 1197 * Safe for KM_SLEEP allocations. 1198 */ 1199 rctl_alloc_gp_t * 1200 rctl_set_dup_prealloc(rctl_set_t *set) 1201 { 1202 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1203 1204 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1205 1206 mutex_enter(&set->rcs_lock); 1207 rctl_set_fill_alloc_gp(set, ragp); 1208 mutex_exit(&set->rcs_lock); 1209 1210 rctl_gp_alloc(ragp); 1211 1212 return (ragp); 1213 } 1214 1215 /* 1216 * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *) 1217 * 1218 * Overview 1219 * Verify that the allocation group provided is large enough to allow a 1220 * duplicate of the given resource control set to be constructed from its 1221 * contents. 1222 * 1223 * Return values 1224 * 1 if the allocation group is sufficiently large, 0 otherwise. 1225 * 1226 * Caller's context 1227 * rcs_lock must be held prior to entry. 1228 */ 1229 int 1230 rctl_set_dup_ready(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1231 { 1232 rctl_alloc_gp_t curr_gp; 1233 1234 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1235 1236 rctl_set_fill_alloc_gp(set, &curr_gp); 1237 1238 if (curr_gp.rcag_nctls <= ragp->rcag_nctls && 1239 curr_gp.rcag_nvals <= ragp->rcag_nvals) 1240 return (1); 1241 1242 return (0); 1243 } 1244 1245 /* 1246 * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *, 1247 * rctl_set_t *, rctl_alloc_gp_t *, int) 1248 * 1249 * Overview 1250 * Make a duplicate of the resource control set. The proc pointers are those 1251 * of the owning process and of the process associated with the entity 1252 * receiving the duplicate. 1253 * 1254 * Duplication is a 3 stage process. Stage 1 is memory allocation for 1255 * the duplicate set, which is taken care of by rctl_set_dup_prealloc(). 1256 * Stage 2 consists of copying all rctls and values from the old set into 1257 * the new. Stage 3 completes the duplication by performing the appropriate 1258 * callbacks for each rctl in the new set. 1259 * 1260 * Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and 1261 * RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only 1262 * be supplied if the newp proc structure reflects the new task and 1263 * project linkage. 1264 * 1265 * Return value 1266 * A pointer to the duplicate set. 1267 * 1268 * Caller's context 1269 * The rcs_lock of the set to be duplicated must be held prior to entry. 1270 */ 1271 rctl_set_t * 1272 rctl_set_dup(rctl_set_t *set, struct proc *oldp, struct proc *newp, 1273 rctl_entity_p_t *e, rctl_set_t *dup, rctl_alloc_gp_t *ragp, int flag) 1274 { 1275 uint_t i; 1276 rctl_set_t *iter; 1277 1278 ASSERT((flag & RCD_DUP) || (flag & RCD_CALLBACK)); 1279 ASSERT(e); 1280 /* 1281 * When copying the old set, iterate over that. Otherwise, when 1282 * only callbacks have been requested, iterate over the dup set. 1283 */ 1284 if (flag & RCD_DUP) { 1285 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1286 iter = set; 1287 dup->rcs_entity = set->rcs_entity; 1288 } else { 1289 iter = dup; 1290 } 1291 1292 mutex_enter(&dup->rcs_lock); 1293 1294 for (i = 0; i < rctl_set_size; i++) { 1295 rctl_t *r = iter->rcs_ctls[i]; 1296 rctl_t *d; 1297 1298 while (r != NULL) { 1299 if (flag & RCD_DUP) { 1300 d = rctl_dup(r, ragp, oldp, newp); 1301 rctl_set_insert(dup, r->rc_id, d); 1302 } else { 1303 d = r; 1304 } 1305 1306 if (flag & RCD_CALLBACK) 1307 RCTLOP_SET(d, newp, e, 1308 rctl_model_value(d->rc_dict_entry, newp, 1309 d->rc_cursor->rcv_value)); 1310 1311 r = r->rc_next; 1312 } 1313 } 1314 1315 mutex_exit(&dup->rcs_lock); 1316 1317 return (dup); 1318 } 1319 1320 /* 1321 * void rctl_set_free(rctl_set_t *) 1322 * 1323 * Overview 1324 * Delete resource control set and all attached values. 1325 * 1326 * Return values 1327 * No value returned. 1328 * 1329 * Caller's context 1330 * No restrictions on context. 1331 */ 1332 void 1333 rctl_set_free(rctl_set_t *set) 1334 { 1335 uint_t i; 1336 1337 mutex_enter(&set->rcs_lock); 1338 for (i = 0; i < rctl_set_size; i++) { 1339 rctl_t *r = set->rcs_ctls[i]; 1340 1341 while (r != NULL) { 1342 rctl_val_t *v = r->rc_values; 1343 rctl_t *n = r->rc_next; 1344 1345 kmem_cache_free(rctl_cache, r); 1346 1347 rctl_val_list_free(v); 1348 1349 r = n; 1350 } 1351 } 1352 mutex_exit(&set->rcs_lock); 1353 1354 kmem_free(set->rcs_ctls, sizeof (rctl_t *) * rctl_set_size); 1355 kmem_free(set, sizeof (rctl_set_t)); 1356 } 1357 1358 /* 1359 * void rctl_set_reset(rctl_set_t *) 1360 * 1361 * Overview 1362 * Resets all rctls within the set such that the lowest value becomes active. 1363 * 1364 * Return values 1365 * No value returned. 1366 * 1367 * Caller's context 1368 * No restrictions on context. 1369 */ 1370 void 1371 rctl_set_reset(rctl_set_t *set, struct proc *p, rctl_entity_p_t *e) 1372 { 1373 uint_t i; 1374 1375 ASSERT(e); 1376 1377 mutex_enter(&set->rcs_lock); 1378 for (i = 0; i < rctl_set_size; i++) { 1379 rctl_t *r = set->rcs_ctls[i]; 1380 1381 while (r != NULL) { 1382 r->rc_cursor = r->rc_values; 1383 rctl_val_list_reset(r->rc_cursor); 1384 RCTLOP_SET(r, p, e, rctl_model_value(r->rc_dict_entry, 1385 p, r->rc_cursor->rcv_value)); 1386 1387 ASSERT(r->rc_cursor != NULL); 1388 1389 r = r->rc_next; 1390 } 1391 } 1392 1393 mutex_exit(&set->rcs_lock); 1394 } 1395 1396 /* 1397 * void rctl_set_tearoff(rctl_set *, struct proc *) 1398 * 1399 * Overview 1400 * Tear off any resource control values on this set with an action recipient 1401 * equal to the specified process (as they are becoming invalid with the 1402 * process's departure from this set as an observer). 1403 * 1404 * Return values 1405 * No value returned. 1406 * 1407 * Caller's context 1408 * No restrictions on context 1409 */ 1410 void 1411 rctl_set_tearoff(rctl_set_t *set, struct proc *p) 1412 { 1413 uint_t i; 1414 1415 mutex_enter(&set->rcs_lock); 1416 for (i = 0; i < rctl_set_size; i++) { 1417 rctl_t *r = set->rcs_ctls[i]; 1418 1419 while (r != NULL) { 1420 rctl_val_t *rval; 1421 1422 tearoff_rewalk_list: 1423 rval = r->rc_values; 1424 1425 while (rval != NULL) { 1426 if (rval->rcv_privilege == RCPRIV_BASIC && 1427 rval->rcv_action_recipient == p) { 1428 if (r->rc_cursor == rval) 1429 r->rc_cursor = rval->rcv_next; 1430 1431 (void) rctl_val_list_delete( 1432 &r->rc_values, rval); 1433 1434 goto tearoff_rewalk_list; 1435 } 1436 1437 rval = rval->rcv_next; 1438 } 1439 1440 ASSERT(r->rc_cursor != NULL); 1441 1442 r = r->rc_next; 1443 } 1444 } 1445 1446 mutex_exit(&set->rcs_lock); 1447 } 1448 1449 static int 1450 rctl_set_find(rctl_set_t *set, rctl_hndl_t hndl, rctl_t **rctl) 1451 { 1452 uint_t index = hndl % rctl_set_size; 1453 rctl_t *curr_ctl; 1454 1455 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1456 1457 for (curr_ctl = set->rcs_ctls[index]; curr_ctl != NULL; 1458 curr_ctl = curr_ctl->rc_next) { 1459 if (curr_ctl->rc_id == hndl) { 1460 *rctl = curr_ctl; 1461 1462 return (0); 1463 } 1464 } 1465 1466 return (-1); 1467 } 1468 1469 /* 1470 * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *) 1471 * 1472 * Overview 1473 * Given a process, get the next enforced value on the rctl of the specified 1474 * handle. 1475 * 1476 * Return value 1477 * The enforced value. 1478 * 1479 * Caller's context 1480 * For controls on process collectives, p->p_lock must be held across the 1481 * operation. 1482 */ 1483 /*ARGSUSED*/ 1484 rctl_qty_t 1485 rctl_enforced_value(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p) 1486 { 1487 rctl_t *rctl; 1488 rlim64_t ret; 1489 1490 mutex_enter(&rset->rcs_lock); 1491 1492 if (rctl_set_find(rset, hndl, &rctl) == -1) 1493 panic("unknown resource control handle %d requested", hndl); 1494 else 1495 ret = rctl_model_value(rctl->rc_dict_entry, p, 1496 rctl->rc_cursor->rcv_value); 1497 1498 mutex_exit(&rset->rcs_lock); 1499 1500 return (ret); 1501 } 1502 1503 /* 1504 * int rctl_global_get(const char *, rctl_dict_entry_t *) 1505 * 1506 * Overview 1507 * Copy a sanitized version of the global rctl for a given resource control 1508 * name. (By sanitization, we mean that the unsafe data pointers have been 1509 * zeroed.) 1510 * 1511 * Return value 1512 * -1 if name not defined, 0 otherwise. 1513 * 1514 * Caller's context 1515 * No restrictions on context. rctl_dict_lock must not be held. 1516 */ 1517 int 1518 rctl_global_get(const char *name, rctl_dict_entry_t *drde) 1519 { 1520 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1521 1522 if (rde == NULL) 1523 return (-1); 1524 1525 bcopy(rde, drde, sizeof (rctl_dict_entry_t)); 1526 1527 drde->rcd_next = NULL; 1528 drde->rcd_ops = NULL; 1529 1530 return (0); 1531 } 1532 1533 /* 1534 * int rctl_global_set(const char *, rctl_dict_entry_t *) 1535 * 1536 * Overview 1537 * Transfer the settable fields of the named rctl to the global rctl matching 1538 * the given resource control name. 1539 * 1540 * Return value 1541 * -1 if name not defined, 0 otherwise. 1542 * 1543 * Caller's context 1544 * No restrictions on context. rctl_dict_lock must not be held. 1545 */ 1546 int 1547 rctl_global_set(const char *name, rctl_dict_entry_t *drde) 1548 { 1549 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1550 1551 if (rde == NULL) 1552 return (-1); 1553 1554 rde->rcd_flagaction = drde->rcd_flagaction; 1555 rde->rcd_syslog_level = drde->rcd_syslog_level; 1556 rde->rcd_strlog_flags = drde->rcd_strlog_flags; 1557 1558 return (0); 1559 } 1560 1561 static int 1562 rctl_local_op(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1563 int (*cbop)(rctl_hndl_t, struct proc *p, rctl_entity_p_t *e, rctl_t *, 1564 rctl_val_t *, rctl_val_t *), struct proc *p) 1565 { 1566 rctl_t *rctl; 1567 rctl_set_t *rset; 1568 rctl_entity_p_t e; 1569 int ret = 0; 1570 rctl_dict_entry_t *rde = rctl_dict_lookup_hndl(hndl); 1571 1572 local_op_retry: 1573 1574 ASSERT(MUTEX_HELD(&p->p_lock)); 1575 1576 rset = rctl_entity_obtain_rset(rde, p); 1577 1578 if (rset == NULL) { 1579 return (-1); 1580 } 1581 rctl_entity_obtain_entity_p(rset->rcs_entity, p, &e); 1582 1583 mutex_enter(&rset->rcs_lock); 1584 1585 /* using rctl's hndl, get rctl from local set */ 1586 if (rctl_set_find(rset, hndl, &rctl) == -1) { 1587 mutex_exit(&rset->rcs_lock); 1588 return (-1); 1589 } 1590 1591 ret = cbop(hndl, p, &e, rctl, oval, nval); 1592 1593 mutex_exit(&rset->rcs_lock); 1594 return (ret); 1595 } 1596 1597 /*ARGSUSED*/ 1598 static int 1599 rctl_local_get_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1600 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1601 { 1602 if (oval == NULL) { 1603 /* 1604 * RCTL_FIRST 1605 */ 1606 bcopy(rctl->rc_values, nval, sizeof (rctl_val_t)); 1607 } else { 1608 /* 1609 * RCTL_NEXT 1610 */ 1611 rctl_val_t *tval = rctl_val_list_find(&rctl->rc_values, oval); 1612 1613 if (tval == NULL) 1614 return (ESRCH); 1615 else if (tval->rcv_next == NULL) 1616 return (ENOENT); 1617 else 1618 bcopy(tval->rcv_next, nval, sizeof (rctl_val_t)); 1619 } 1620 1621 return (0); 1622 } 1623 1624 /* 1625 * int rctl_local_get(rctl_hndl_t, rctl_val_t *) 1626 * 1627 * Overview 1628 * Get the rctl value for the given flags. 1629 * 1630 * Return values 1631 * 0 for successful get, errno otherwise. 1632 */ 1633 int 1634 rctl_local_get(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1635 struct proc *p) 1636 { 1637 return (rctl_local_op(hndl, oval, nval, rctl_local_get_cb, p)); 1638 } 1639 1640 /*ARGSUSED*/ 1641 static int 1642 rctl_local_delete_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1643 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1644 { 1645 if ((oval = rctl_val_list_find(&rctl->rc_values, nval)) == NULL) 1646 return (ESRCH); 1647 1648 if (rctl->rc_cursor == oval) { 1649 rctl->rc_cursor = oval->rcv_next; 1650 rctl_val_list_reset(rctl->rc_cursor); 1651 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1652 rctl->rc_cursor->rcv_value)); 1653 1654 ASSERT(rctl->rc_cursor != NULL); 1655 } 1656 1657 (void) rctl_val_list_delete(&rctl->rc_values, oval); 1658 1659 return (0); 1660 } 1661 1662 /* 1663 * int rctl_local_delete(rctl_hndl_t, rctl_val_t *) 1664 * 1665 * Overview 1666 * Delete the rctl value for the given flags. 1667 * 1668 * Return values 1669 * 0 for successful delete, errno otherwise. 1670 */ 1671 int 1672 rctl_local_delete(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1673 { 1674 return (rctl_local_op(hndl, NULL, val, rctl_local_delete_cb, p)); 1675 } 1676 1677 /* 1678 * rctl_local_insert_cb() 1679 * 1680 * Overview 1681 * Insert a new value into the rctl's val list. If an error occurs, 1682 * the val list must be left in the same state as when the function 1683 * was entered. 1684 * 1685 * Return Values 1686 * 0 for successful insert, EINVAL if the value is duplicated in the 1687 * existing list. 1688 */ 1689 /*ARGSUSED*/ 1690 static int 1691 rctl_local_insert_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1692 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1693 { 1694 /* 1695 * Before inserting, confirm there are no duplicates of this value 1696 * and flag level. If there is a duplicate, flag an error and do 1697 * nothing. 1698 */ 1699 if (rctl_val_list_insert(&rctl->rc_values, nval) != 0) 1700 return (EINVAL); 1701 1702 if (rctl_val_cmp(nval, rctl->rc_cursor, 0) < 0) { 1703 rctl->rc_cursor = nval; 1704 rctl_val_list_reset(rctl->rc_cursor); 1705 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1706 rctl->rc_cursor->rcv_value)); 1707 1708 ASSERT(rctl->rc_cursor != NULL); 1709 } 1710 1711 return (0); 1712 } 1713 1714 /* 1715 * int rctl_local_insert(rctl_hndl_t, rctl_val_t *) 1716 * 1717 * Overview 1718 * Insert the rctl value into the appropriate rctl set for the calling 1719 * process, given the handle. 1720 */ 1721 int 1722 rctl_local_insert(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1723 { 1724 return (rctl_local_op(hndl, NULL, val, rctl_local_insert_cb, p)); 1725 } 1726 1727 /* 1728 * rctl_local_insert_all_cb() 1729 * 1730 * Overview 1731 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1732 * 1733 * Inserts new values from the project database (new_values). alloc_values 1734 * should be a linked list of pre-allocated rctl_val_t, which are used to 1735 * populate (rc_projdb). 1736 * 1737 * Should the *new_values linked list match the contents of the rctl's 1738 * rp_projdb then we do nothing. 1739 * 1740 * Return Values 1741 * 0 is always returned. 1742 */ 1743 /*ARGSUSED*/ 1744 static int 1745 rctl_local_insert_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1746 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1747 { 1748 rctl_val_t *val; 1749 rctl_val_t *tmp_val; 1750 rctl_val_t *next; 1751 int modified = 0; 1752 1753 /* 1754 * If this the first time we've set this project rctl, then we delete 1755 * all the privilege values. These privilege values have been set by 1756 * rctl_add_default_limit(). 1757 * 1758 * We save some cycles here by not calling rctl_val_list_delete(). 1759 */ 1760 if (rctl->rc_projdb == NULL) { 1761 val = rctl->rc_values; 1762 1763 while (val != NULL) { 1764 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1765 if (val->rcv_prev != NULL) 1766 val->rcv_prev->rcv_next = val->rcv_next; 1767 else 1768 rctl->rc_values = val->rcv_next; 1769 1770 if (val->rcv_next != NULL) 1771 val->rcv_next->rcv_prev = val->rcv_prev; 1772 1773 tmp_val = val; 1774 val = val->rcv_next; 1775 kmem_cache_free(rctl_val_cache, tmp_val); 1776 } else { 1777 val = val->rcv_next; 1778 } 1779 } 1780 modified = 1; 1781 } 1782 1783 /* 1784 * Delete active values previously set through the project database. 1785 */ 1786 val = rctl->rc_projdb; 1787 1788 while (val != NULL) { 1789 1790 /* Is the old value found in the new values? */ 1791 if (rctl_val_list_find(&new_values, val) == NULL) { 1792 1793 /* 1794 * Delete from the active values if it originated from 1795 * the project database. 1796 */ 1797 if (((tmp_val = rctl_val_list_find(&rctl->rc_values, 1798 val)) != NULL) && 1799 (tmp_val->rcv_flagaction & RCTL_LOCAL_PROJDB)) { 1800 (void) rctl_val_list_delete(&rctl->rc_values, 1801 tmp_val); 1802 } 1803 1804 tmp_val = val->rcv_next; 1805 (void) rctl_val_list_delete(&rctl->rc_projdb, val); 1806 val = tmp_val; 1807 modified = 1; 1808 1809 } else 1810 val = val->rcv_next; 1811 } 1812 1813 /* 1814 * Insert new values from the project database. 1815 */ 1816 while (new_values != NULL) { 1817 next = new_values->rcv_next; 1818 1819 /* 1820 * Insert this new value into the rc_projdb, and duplicate this 1821 * entry to the active list. 1822 */ 1823 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1824 1825 tmp_val = alloc_values->rcv_next; 1826 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1827 alloc_values->rcv_next = tmp_val; 1828 1829 if (rctl_val_list_insert(&rctl->rc_values, 1830 alloc_values) == 0) { 1831 /* inserted move alloc_values on */ 1832 alloc_values = tmp_val; 1833 modified = 1; 1834 } 1835 } else { 1836 /* 1837 * Unlike setrctl() we don't want to return an error on 1838 * a duplicate entry; we are concerned solely with 1839 * ensuring that all the values specified are set. 1840 */ 1841 kmem_cache_free(rctl_val_cache, new_values); 1842 } 1843 new_values = next; 1844 } 1845 1846 /* Teardown any unused rctl_val_t */ 1847 while (alloc_values != NULL) { 1848 tmp_val = alloc_values; 1849 alloc_values = alloc_values->rcv_next; 1850 kmem_cache_free(rctl_val_cache, tmp_val); 1851 } 1852 1853 /* Reset the cursor if rctl values have been modified */ 1854 if (modified) { 1855 rctl->rc_cursor = rctl->rc_values; 1856 rctl_val_list_reset(rctl->rc_cursor); 1857 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1858 rctl->rc_cursor->rcv_value)); 1859 } 1860 1861 return (0); 1862 } 1863 1864 int 1865 rctl_local_insert_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1866 rctl_val_t *alloc_values, struct proc *p) 1867 { 1868 return (rctl_local_op(hndl, new_values, alloc_values, 1869 rctl_local_insert_all_cb, p)); 1870 } 1871 1872 /* 1873 * rctl_local_replace_all_cb() 1874 * 1875 * Overview 1876 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1877 * 1878 * Clears the active rctl values (rc_values), and stored values from the 1879 * previous insertions from the project database (rc_projdb). 1880 * 1881 * Inserts new values from the project database (new_values). alloc_values 1882 * should be a linked list of pre-allocated rctl_val_t, which are used to 1883 * populate (rc_projdb). 1884 * 1885 * Return Values 1886 * 0 is always returned. 1887 */ 1888 /*ARGSUSED*/ 1889 static int 1890 rctl_local_replace_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1891 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1892 { 1893 rctl_val_t *val; 1894 rctl_val_t *next; 1895 rctl_val_t *tmp_val; 1896 1897 /* Delete all the privilege vaules */ 1898 val = rctl->rc_values; 1899 1900 while (val != NULL) { 1901 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1902 if (val->rcv_prev != NULL) 1903 val->rcv_prev->rcv_next = val->rcv_next; 1904 else 1905 rctl->rc_values = val->rcv_next; 1906 1907 if (val->rcv_next != NULL) 1908 val->rcv_next->rcv_prev = val->rcv_prev; 1909 1910 tmp_val = val; 1911 val = val->rcv_next; 1912 kmem_cache_free(rctl_val_cache, tmp_val); 1913 } else { 1914 val = val->rcv_next; 1915 } 1916 } 1917 1918 /* Delete the contents of rc_projdb */ 1919 val = rctl->rc_projdb; 1920 while (val != NULL) { 1921 1922 tmp_val = val; 1923 val = val->rcv_next; 1924 kmem_cache_free(rctl_val_cache, tmp_val); 1925 } 1926 rctl->rc_projdb = NULL; 1927 1928 /* 1929 * Insert new values from the project database. 1930 */ 1931 while (new_values != NULL) { 1932 next = new_values->rcv_next; 1933 1934 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1935 tmp_val = alloc_values->rcv_next; 1936 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1937 alloc_values->rcv_next = tmp_val; 1938 1939 if (rctl_val_list_insert(&rctl->rc_values, 1940 alloc_values) == 0) { 1941 /* inserted, so move alloc_values on */ 1942 alloc_values = tmp_val; 1943 } 1944 } else { 1945 /* 1946 * Unlike setrctl() we don't want to return an error on 1947 * a duplicate entry; we are concerned solely with 1948 * ensuring that all the values specified are set. 1949 */ 1950 kmem_cache_free(rctl_val_cache, new_values); 1951 } 1952 1953 new_values = next; 1954 } 1955 1956 /* Teardown any unused rctl_val_t */ 1957 while (alloc_values != NULL) { 1958 tmp_val = alloc_values; 1959 alloc_values = alloc_values->rcv_next; 1960 kmem_cache_free(rctl_val_cache, tmp_val); 1961 } 1962 1963 /* Always reset the cursor */ 1964 rctl->rc_cursor = rctl->rc_values; 1965 rctl_val_list_reset(rctl->rc_cursor); 1966 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1967 rctl->rc_cursor->rcv_value)); 1968 1969 return (0); 1970 } 1971 1972 int 1973 rctl_local_replace_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1974 rctl_val_t *alloc_values, struct proc *p) 1975 { 1976 return (rctl_local_op(hndl, new_values, alloc_values, 1977 rctl_local_replace_all_cb, p)); 1978 } 1979 1980 static int 1981 rctl_local_replace_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1982 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1983 { 1984 int ret; 1985 rctl_val_t *tmp; 1986 1987 /* Verify that old will be delete-able */ 1988 tmp = rctl_val_list_find(&rctl->rc_values, oval); 1989 if (tmp == NULL) 1990 return (ESRCH); 1991 /* 1992 * Caller should verify that value being deleted is not the 1993 * system value. 1994 */ 1995 ASSERT(tmp->rcv_privilege != RCPRIV_SYSTEM); 1996 1997 /* 1998 * rctl_local_insert_cb() does the job of flagging an error 1999 * for any duplicate values. So, call rctl_local_insert_cb() 2000 * for the new value first, then do deletion of the old value. 2001 * Since this is a callback function to rctl_local_op, we can 2002 * count on rcs_lock being held at this point. This guarantees 2003 * that there is at no point a visible list which contains both 2004 * new and old values. 2005 */ 2006 if (ret = rctl_local_insert_cb(hndl, p, e, rctl, NULL, nval)) 2007 return (ret); 2008 2009 ret = rctl_local_delete_cb(hndl, p, e, rctl, NULL, oval); 2010 ASSERT(ret == 0); 2011 return (0); 2012 } 2013 2014 /* 2015 * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *) 2016 * 2017 * Overview 2018 * Replace the rctl value with a new one. 2019 * 2020 * Return values 2021 * 0 for successful replace, errno otherwise. 2022 */ 2023 int 2024 rctl_local_replace(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 2025 struct proc *p) 2026 { 2027 return (rctl_local_op(hndl, oval, nval, rctl_local_replace_cb, p)); 2028 } 2029 2030 /* 2031 * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *) 2032 * 2033 * Overview 2034 * To support rlimit compatibility, we need a function which takes a 64-bit 2035 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2036 * This operation is only intended for legacy rlimits. 2037 */ 2038 int 2039 rctl_rlimit_get(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64) 2040 { 2041 rctl_t *rctl; 2042 rctl_val_t *rval; 2043 rctl_set_t *rset = p->p_rctls; 2044 int soft_limit_seen = 0; 2045 int test_for_deny = 1; 2046 2047 mutex_enter(&rset->rcs_lock); 2048 if (rctl_set_find(rset, rc, &rctl) == -1) { 2049 mutex_exit(&rset->rcs_lock); 2050 return (-1); 2051 } 2052 2053 rval = rctl->rc_values; 2054 2055 if (rctl->rc_dict_entry->rcd_flagaction & (RCTL_GLOBAL_DENY_NEVER | 2056 RCTL_GLOBAL_DENY_ALWAYS)) 2057 test_for_deny = 0; 2058 2059 /* 2060 * 1. Find the first control value with the RCTL_LOCAL_DENY bit set. 2061 */ 2062 while (rval != NULL && rval->rcv_privilege != RCPRIV_SYSTEM) { 2063 if (test_for_deny && 2064 (rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0) { 2065 rval = rval->rcv_next; 2066 continue; 2067 } 2068 2069 /* 2070 * 2. If this is an RCPRIV_BASIC value, then we've found the 2071 * effective soft limit and should set rlim_cur. We should then 2072 * continue looking for another control value with the DENY bit 2073 * set. 2074 */ 2075 if (rval->rcv_privilege == RCPRIV_BASIC) { 2076 if (soft_limit_seen) { 2077 rval = rval->rcv_next; 2078 continue; 2079 } 2080 2081 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2082 rval->rcv_value < rctl_model_maximum( 2083 rctl->rc_dict_entry, p)) 2084 rlp64->rlim_cur = rval->rcv_value; 2085 else 2086 rlp64->rlim_cur = RLIM64_INFINITY; 2087 soft_limit_seen = 1; 2088 2089 rval = rval->rcv_next; 2090 continue; 2091 } 2092 2093 /* 2094 * 3. This is an RCPRIV_PRIVILEGED value. If we haven't found 2095 * a soft limit candidate, then we've found the effective hard 2096 * and soft limits and should set both If we had found a soft 2097 * limit, then this is only the hard limit and we need only set 2098 * rlim_max. 2099 */ 2100 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2101 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, 2102 p)) 2103 rlp64->rlim_max = rval->rcv_value; 2104 else 2105 rlp64->rlim_max = RLIM64_INFINITY; 2106 if (!soft_limit_seen) 2107 rlp64->rlim_cur = rlp64->rlim_max; 2108 2109 mutex_exit(&rset->rcs_lock); 2110 return (0); 2111 } 2112 2113 if (rval == NULL) { 2114 /* 2115 * This control sequence is corrupt, as it is not terminated by 2116 * a system privileged control value. 2117 */ 2118 mutex_exit(&rset->rcs_lock); 2119 return (-1); 2120 } 2121 2122 /* 2123 * 4. If we run into a RCPRIV_SYSTEM value, then the hard limit (and 2124 * the soft, if we haven't a soft candidate) should be the value of the 2125 * system control value. 2126 */ 2127 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2128 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, p)) 2129 rlp64->rlim_max = rval->rcv_value; 2130 else 2131 rlp64->rlim_max = RLIM64_INFINITY; 2132 2133 if (!soft_limit_seen) 2134 rlp64->rlim_cur = rlp64->rlim_max; 2135 2136 mutex_exit(&rset->rcs_lock); 2137 return (0); 2138 } 2139 2140 /* 2141 * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t) 2142 * 2143 * Overview 2144 * Before making a series of calls to rctl_rlimit_set(), we must have a 2145 * preallocated batch of resource control values, as rctl_rlimit_set() can 2146 * potentially consume two resource control values per call. 2147 * 2148 * Return values 2149 * A populated resource control allocation group with 2n resource control 2150 * values. 2151 * 2152 * Caller's context 2153 * Must be safe for KM_SLEEP allocations. 2154 */ 2155 rctl_alloc_gp_t * 2156 rctl_rlimit_set_prealloc(uint_t n) 2157 { 2158 rctl_alloc_gp_t *gp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 2159 2160 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 2161 2162 gp->rcag_nvals = 2 * n; 2163 2164 rctl_gp_alloc(gp); 2165 2166 return (gp); 2167 } 2168 2169 /* 2170 * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int, 2171 * int) 2172 * 2173 * Overview 2174 * To support rlimit compatibility, we need a function which takes a 64-bit 2175 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2176 * This operation is only intended for legacy rlimits. 2177 * 2178 * The implementation of rctl_rlimit_set() is a bit clever, as it tries to 2179 * minimize the number of values placed on the value sequence in various 2180 * cases. Furthermore, we don't allow multiple identical privilege-action 2181 * values on the same sequence. (That is, we don't want a sequence like 2182 * "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel 2183 * memory.) So we want to delete any values with the same privilege value and 2184 * action. 2185 * 2186 * Return values 2187 * 0 for successful set, errno otherwise. Errno will be either EINVAL 2188 * or EPERM, in keeping with defined errnos for ulimit() and setrlimit() 2189 * system calls. 2190 */ 2191 /*ARGSUSED*/ 2192 int 2193 rctl_rlimit_set(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64, 2194 rctl_alloc_gp_t *ragp, int flagaction, int signal, const cred_t *cr) 2195 { 2196 rctl_t *rctl; 2197 rctl_val_t *rval, *rval_priv, *rval_basic; 2198 rctl_set_t *rset = p->p_rctls; 2199 rctl_qty_t max; 2200 rctl_entity_p_t e; 2201 struct rlimit64 cur_rl; 2202 2203 e.rcep_t = RCENTITY_PROCESS; 2204 e.rcep_p.proc = p; 2205 2206 if (rlp64->rlim_cur > rlp64->rlim_max) 2207 return (EINVAL); 2208 2209 if (rctl_rlimit_get(rc, p, &cur_rl) == -1) 2210 return (EINVAL); 2211 2212 /* 2213 * If we are not privileged, we can only lower the hard limit. 2214 */ 2215 if ((rlp64->rlim_max > cur_rl.rlim_max) && 2216 cur_rl.rlim_max != RLIM64_INFINITY && 2217 secpolicy_resource(cr) != 0) 2218 return (EPERM); 2219 2220 mutex_enter(&rset->rcs_lock); 2221 2222 if (rctl_set_find(rset, rc, &rctl) == -1) { 2223 mutex_exit(&rset->rcs_lock); 2224 return (EINVAL); 2225 } 2226 2227 rval_priv = rctl_gp_detach_val(ragp); 2228 2229 rval = rctl->rc_values; 2230 2231 while (rval != NULL) { 2232 rctl_val_t *next = rval->rcv_next; 2233 2234 if (rval->rcv_privilege == RCPRIV_SYSTEM) 2235 break; 2236 2237 if ((rval->rcv_privilege == RCPRIV_BASIC) || 2238 (rval->rcv_flagaction & ~RCTL_LOCAL_ACTION_MASK) == 2239 (flagaction & ~RCTL_LOCAL_ACTION_MASK)) { 2240 if (rctl->rc_cursor == rval) { 2241 rctl->rc_cursor = rval->rcv_next; 2242 rctl_val_list_reset(rctl->rc_cursor); 2243 RCTLOP_SET(rctl, p, &e, rctl_model_value( 2244 rctl->rc_dict_entry, p, 2245 rctl->rc_cursor->rcv_value)); 2246 } 2247 (void) rctl_val_list_delete(&rctl->rc_values, rval); 2248 } 2249 2250 rval = next; 2251 } 2252 2253 rval_priv->rcv_privilege = RCPRIV_PRIVILEGED; 2254 rval_priv->rcv_flagaction = flagaction; 2255 if (rlp64->rlim_max == RLIM64_INFINITY) { 2256 rval_priv->rcv_flagaction |= RCTL_LOCAL_MAXIMAL; 2257 max = rctl->rc_dict_entry->rcd_max_native; 2258 } else { 2259 max = rlp64->rlim_max; 2260 } 2261 rval_priv->rcv_value = max; 2262 rval_priv->rcv_action_signal = signal; 2263 rval_priv->rcv_action_recipient = NULL; 2264 rval_priv->rcv_action_recip_pid = -1; 2265 rval_priv->rcv_firing_time = 0; 2266 rval_priv->rcv_prev = rval_priv->rcv_next = NULL; 2267 2268 (void) rctl_val_list_insert(&rctl->rc_values, rval_priv); 2269 rctl->rc_cursor = rval_priv; 2270 rctl_val_list_reset(rctl->rc_cursor); 2271 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2272 rctl->rc_cursor->rcv_value)); 2273 2274 if (rlp64->rlim_cur != RLIM64_INFINITY && rlp64->rlim_cur < max) { 2275 rval_basic = rctl_gp_detach_val(ragp); 2276 2277 rval_basic->rcv_privilege = RCPRIV_BASIC; 2278 rval_basic->rcv_value = rlp64->rlim_cur; 2279 rval_basic->rcv_flagaction = flagaction; 2280 rval_basic->rcv_action_signal = signal; 2281 rval_basic->rcv_action_recipient = p; 2282 rval_basic->rcv_action_recip_pid = p->p_pid; 2283 rval_basic->rcv_firing_time = 0; 2284 rval_basic->rcv_prev = rval_basic->rcv_next = NULL; 2285 2286 (void) rctl_val_list_insert(&rctl->rc_values, rval_basic); 2287 rctl->rc_cursor = rval_basic; 2288 rctl_val_list_reset(rctl->rc_cursor); 2289 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2290 rctl->rc_cursor->rcv_value)); 2291 } 2292 2293 ASSERT(rctl->rc_cursor != NULL); 2294 2295 mutex_exit(&rset->rcs_lock); 2296 return (0); 2297 } 2298 2299 2300 /* 2301 * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t, 2302 * rlim64_t, rctl_ops_t *) 2303 * 2304 * Overview 2305 * rctl_register() performs a look-up in the dictionary of rctls 2306 * active on the system; if a rctl of that name is absent, an entry is 2307 * made into the dictionary. The rctl is returned with its reference 2308 * count incremented by one. If the rctl name already exists, we panic. 2309 * (Were the resource control system to support dynamic loading and unloading, 2310 * which it is structured for, duplicate registration should lead to load 2311 * failure instead of panicking.) 2312 * 2313 * Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be 2314 * defined. This limit contains the highest possible value for this quantity 2315 * on the system. Furthermore, the registered control must provide infinite 2316 * values for all applicable address space models supported by the operating 2317 * system. Attempts to set resource control values beyond the system limit 2318 * will fail. 2319 * 2320 * Return values 2321 * The rctl's ID. 2322 * 2323 * Caller's context 2324 * Caller must be in a context suitable for KM_SLEEP allocations. 2325 */ 2326 rctl_hndl_t 2327 rctl_register( 2328 const char *name, 2329 rctl_entity_t entity, 2330 int global_flags, 2331 rlim64_t max_native, 2332 rlim64_t max_ilp32, 2333 rctl_ops_t *ops) 2334 { 2335 rctl_t *rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 2336 rctl_val_t *rctl_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 2337 rctl_dict_entry_t *rctl_de = kmem_zalloc(sizeof (rctl_dict_entry_t), 2338 KM_SLEEP); 2339 rctl_t *old_rctl; 2340 rctl_hndl_t rhndl; 2341 int localflags; 2342 2343 ASSERT(ops != NULL); 2344 2345 bzero(rctl, sizeof (rctl_t)); 2346 bzero(rctl_val, sizeof (rctl_val_t)); 2347 2348 if (global_flags & RCTL_GLOBAL_DENY_NEVER) 2349 localflags = RCTL_LOCAL_MAXIMAL; 2350 else 2351 localflags = RCTL_LOCAL_MAXIMAL | RCTL_LOCAL_DENY; 2352 2353 rctl_val->rcv_privilege = RCPRIV_SYSTEM; 2354 rctl_val->rcv_value = max_native; 2355 rctl_val->rcv_flagaction = localflags; 2356 rctl_val->rcv_action_signal = 0; 2357 rctl_val->rcv_action_recipient = NULL; 2358 rctl_val->rcv_action_recip_pid = -1; 2359 rctl_val->rcv_firing_time = 0; 2360 rctl_val->rcv_next = NULL; 2361 rctl_val->rcv_prev = NULL; 2362 2363 rctl_de->rcd_name = (char *)name; 2364 rctl_de->rcd_default_value = rctl_val; 2365 rctl_de->rcd_max_native = max_native; 2366 rctl_de->rcd_max_ilp32 = max_ilp32; 2367 rctl_de->rcd_entity = entity; 2368 rctl_de->rcd_ops = ops; 2369 rctl_de->rcd_flagaction = global_flags; 2370 2371 rctl->rc_dict_entry = rctl_de; 2372 rctl->rc_values = rctl_val; 2373 2374 /* 2375 * 1. Take global lock, validate nonexistence of name, get ID. 2376 */ 2377 mutex_enter(&rctl_dict_lock); 2378 2379 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 2380 (mod_hash_val_t *)&rhndl) != MH_ERR_NOTFOUND) 2381 panic("duplicate registration of rctl %s", name); 2382 2383 rhndl = rctl_de->rcd_id = rctl->rc_id = 2384 (rctl_hndl_t)id_alloc(rctl_ids); 2385 2386 /* 2387 * 2. Insert name-entry pair in rctl_dict_by_name. 2388 */ 2389 if (mod_hash_insert(rctl_dict_by_name, (mod_hash_key_t)name, 2390 (mod_hash_val_t)rctl_de)) 2391 panic("unable to insert rctl dict entry for %s (%u)", name, 2392 (uint_t)rctl->rc_id); 2393 2394 /* 2395 * 3. Insert ID-rctl_t * pair in rctl_dict. 2396 */ 2397 if (mod_hash_find(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2398 (mod_hash_val_t *)&old_rctl) != MH_ERR_NOTFOUND) 2399 panic("duplicate rctl ID %u registered", rctl->rc_id); 2400 2401 if (mod_hash_insert(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2402 (mod_hash_val_t)rctl)) 2403 panic("unable to insert rctl %s/%u (%p)", name, 2404 (uint_t)rctl->rc_id, rctl); 2405 2406 /* 2407 * 3a. Insert rctl_dict_entry_t * in appropriate entity list. 2408 */ 2409 2410 mutex_enter(&rctl_lists_lock); 2411 2412 switch (entity) { 2413 case RCENTITY_ZONE: 2414 case RCENTITY_PROJECT: 2415 case RCENTITY_TASK: 2416 case RCENTITY_PROCESS: 2417 rctl_de->rcd_next = rctl_lists[entity]; 2418 rctl_lists[entity] = rctl_de; 2419 break; 2420 default: 2421 panic("registering unknown rctl entity %d (%s)", entity, 2422 name); 2423 break; 2424 } 2425 2426 mutex_exit(&rctl_lists_lock); 2427 2428 /* 2429 * 4. Drop lock. 2430 */ 2431 mutex_exit(&rctl_dict_lock); 2432 2433 return (rhndl); 2434 } 2435 2436 /* 2437 * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, 2438 * rctl_val_t *v) 2439 * 2440 * Overview 2441 * rctl_global_action() takes, in according with the flags on the rctl_dict 2442 * entry for the given control, the appropriate actions on the exceeded 2443 * control value. Additionally, rctl_global_action() updates the firing time 2444 * on the exceeded value. 2445 * 2446 * Return values 2447 * A bitmask reflecting the actions actually taken. 2448 * 2449 * Caller's context 2450 * No restrictions on context. 2451 */ 2452 /*ARGSUSED*/ 2453 static int 2454 rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v) 2455 { 2456 rctl_dict_entry_t *rde = r->rc_dict_entry; 2457 const char *pr, *en, *idstr; 2458 id_t id; 2459 enum { 2460 SUFFIX_NONE, /* id consumed directly */ 2461 SUFFIX_NUMERIC, /* id consumed in suffix */ 2462 SUFFIX_STRING /* idstr consumed in suffix */ 2463 } suffix = SUFFIX_NONE; 2464 int ret = 0; 2465 2466 v->rcv_firing_time = gethrtime(); 2467 2468 switch (v->rcv_privilege) { 2469 case RCPRIV_BASIC: 2470 pr = "basic"; 2471 break; 2472 case RCPRIV_PRIVILEGED: 2473 pr = "privileged"; 2474 break; 2475 case RCPRIV_SYSTEM: 2476 pr = "system"; 2477 break; 2478 default: 2479 pr = "unknown"; 2480 break; 2481 } 2482 2483 switch (rde->rcd_entity) { 2484 case RCENTITY_PROCESS: 2485 en = "process"; 2486 id = p->p_pid; 2487 suffix = SUFFIX_NONE; 2488 break; 2489 case RCENTITY_TASK: 2490 en = "task"; 2491 id = p->p_task->tk_tkid; 2492 suffix = SUFFIX_NUMERIC; 2493 break; 2494 case RCENTITY_PROJECT: 2495 en = "project"; 2496 id = p->p_task->tk_proj->kpj_id; 2497 suffix = SUFFIX_NUMERIC; 2498 break; 2499 case RCENTITY_ZONE: 2500 en = "zone"; 2501 idstr = p->p_zone->zone_name; 2502 suffix = SUFFIX_STRING; 2503 break; 2504 default: 2505 en = "unknown entity associated with process"; 2506 id = p->p_pid; 2507 suffix = SUFFIX_NONE; 2508 break; 2509 } 2510 2511 if (rde->rcd_flagaction & RCTL_GLOBAL_SYSLOG) { 2512 switch (suffix) { 2513 default: 2514 case SUFFIX_NONE: 2515 (void) strlog(0, 0, 0, 2516 rde->rcd_strlog_flags | log_global.lz_active, 2517 "%s rctl %s (value %llu) exceeded by %s %d.", 2518 pr, rde->rcd_name, v->rcv_value, en, id); 2519 break; 2520 case SUFFIX_NUMERIC: 2521 (void) strlog(0, 0, 0, 2522 rde->rcd_strlog_flags | log_global.lz_active, 2523 "%s rctl %s (value %llu) exceeded by process %d" 2524 " in %s %d.", 2525 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2526 en, id); 2527 break; 2528 case SUFFIX_STRING: 2529 (void) strlog(0, 0, 0, 2530 rde->rcd_strlog_flags | log_global.lz_active, 2531 "%s rctl %s (value %llu) exceeded by process %d" 2532 " in %s %s.", 2533 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2534 en, idstr); 2535 break; 2536 } 2537 } 2538 2539 if (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) 2540 ret |= RCT_DENY; 2541 2542 return (ret); 2543 } 2544 2545 static int 2546 rctl_local_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v, 2547 uint_t safety) 2548 { 2549 int ret = 0; 2550 sigqueue_t *sqp = NULL; 2551 rctl_dict_entry_t *rde = r->rc_dict_entry; 2552 int unobservable = (rde->rcd_flagaction & RCTL_GLOBAL_UNOBSERVABLE); 2553 2554 proc_t *recipient = v->rcv_action_recipient; 2555 id_t recip_pid = v->rcv_action_recip_pid; 2556 int recip_signal = v->rcv_action_signal; 2557 uint_t flagaction = v->rcv_flagaction; 2558 2559 if (safety == RCA_UNSAFE_ALL) { 2560 if (flagaction & RCTL_LOCAL_DENY) { 2561 ret |= RCT_DENY; 2562 } 2563 return (ret); 2564 } 2565 2566 if (flagaction & RCTL_LOCAL_SIGNAL) { 2567 /* 2568 * We can build a siginfo only in the case that it is 2569 * safe for us to drop p_lock. (For asynchronous 2570 * checks this is currently not true.) 2571 */ 2572 if (safety == RCA_SAFE) { 2573 mutex_exit(&rset->rcs_lock); 2574 mutex_exit(&p->p_lock); 2575 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 2576 mutex_enter(&p->p_lock); 2577 mutex_enter(&rset->rcs_lock); 2578 2579 sqp->sq_info.si_signo = recip_signal; 2580 sqp->sq_info.si_code = SI_RCTL; 2581 sqp->sq_info.si_errno = 0; 2582 sqp->sq_info.si_entity = (int)rde->rcd_entity; 2583 } 2584 2585 if (recipient == NULL || recipient == p) { 2586 ret |= RCT_SIGNAL; 2587 2588 if (sqp == NULL) { 2589 sigtoproc(p, NULL, recip_signal); 2590 } else if (p == curproc) { 2591 /* 2592 * Then this is a synchronous test and we can 2593 * direct the signal at the violating thread. 2594 */ 2595 sigaddqa(curproc, curthread, sqp); 2596 } else { 2597 sigaddqa(p, NULL, sqp); 2598 } 2599 } else if (!unobservable) { 2600 proc_t *rp; 2601 2602 mutex_exit(&rset->rcs_lock); 2603 mutex_exit(&p->p_lock); 2604 2605 mutex_enter(&pidlock); 2606 if ((rp = prfind(recip_pid)) == recipient) { 2607 /* 2608 * Recipient process is still alive, but may not 2609 * be in this task or project any longer. In 2610 * this case, the recipient's resource control 2611 * set pertinent to this control will have 2612 * changed--and we will not deliver the signal, 2613 * as the recipient process is trying to tear 2614 * itself off of its former set. 2615 */ 2616 mutex_enter(&rp->p_lock); 2617 mutex_exit(&pidlock); 2618 2619 if (rctl_entity_obtain_rset(rde, rp) == rset) { 2620 ret |= RCT_SIGNAL; 2621 2622 if (sqp == NULL) 2623 sigtoproc(rp, NULL, 2624 recip_signal); 2625 else 2626 sigaddqa(rp, NULL, sqp); 2627 } else if (sqp) { 2628 kmem_free(sqp, sizeof (sigqueue_t)); 2629 } 2630 mutex_exit(&rp->p_lock); 2631 } else { 2632 mutex_exit(&pidlock); 2633 if (sqp) 2634 kmem_free(sqp, sizeof (sigqueue_t)); 2635 } 2636 2637 mutex_enter(&p->p_lock); 2638 /* 2639 * Since we dropped p_lock, we may no longer be in the 2640 * same task or project as we were at entry. It is thus 2641 * unsafe for us to reacquire the set lock at this 2642 * point; callers of rctl_local_action() must handle 2643 * this possibility. 2644 */ 2645 ret |= RCT_LK_ABANDONED; 2646 } else if (sqp) { 2647 kmem_free(sqp, sizeof (sigqueue_t)); 2648 } 2649 } 2650 2651 if ((flagaction & RCTL_LOCAL_DENY) && 2652 (recipient == NULL || recipient == p)) { 2653 ret |= RCT_DENY; 2654 } 2655 2656 return (ret); 2657 } 2658 2659 /* 2660 * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t) 2661 * 2662 * Overview 2663 * Take the action associated with the enforced value (as defined by 2664 * rctl_get_enforced_value()) being exceeded or encountered. Possibly perform 2665 * a restricted subset of the available actions, if circumstances dictate that 2666 * we cannot safely allocate memory (for a sigqueue_t) or guarantee process 2667 * persistence across the duration of the function (an asynchronous action). 2668 * 2669 * Return values 2670 * Actions taken, according to the rctl_test bitmask. 2671 * 2672 * Caller's context 2673 * Safe to acquire rcs_lock. 2674 */ 2675 int 2676 rctl_action(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, uint_t safety) 2677 { 2678 return (rctl_action_entity(hndl, rset, p, NULL, safety)); 2679 } 2680 2681 int 2682 rctl_action_entity(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, 2683 rctl_entity_p_t *e, uint_t safety) 2684 { 2685 int ret = RCT_NONE; 2686 rctl_t *lrctl; 2687 rctl_entity_p_t e_tmp; 2688 2689 rctl_action_acquire: 2690 mutex_enter(&rset->rcs_lock); 2691 if (rctl_set_find(rset, hndl, &lrctl) == -1) { 2692 mutex_exit(&rset->rcs_lock); 2693 return (ret); 2694 } 2695 2696 if (e == NULL) { 2697 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2698 p, &e_tmp); 2699 e = &e_tmp; 2700 } 2701 2702 if ((ret & RCT_LK_ABANDONED) == 0) { 2703 ret |= rctl_global_action(lrctl, rset, p, lrctl->rc_cursor); 2704 2705 RCTLOP_ACTION(lrctl, p, e); 2706 2707 ret |= rctl_local_action(lrctl, rset, p, 2708 lrctl->rc_cursor, safety); 2709 2710 if (ret & RCT_LK_ABANDONED) 2711 goto rctl_action_acquire; 2712 } 2713 2714 ret &= ~RCT_LK_ABANDONED; 2715 2716 if (!(ret & RCT_DENY) && 2717 lrctl->rc_cursor->rcv_next != NULL) { 2718 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2719 2720 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2721 p, lrctl->rc_cursor->rcv_value)); 2722 2723 } 2724 mutex_exit(&rset->rcs_lock); 2725 2726 return (ret); 2727 } 2728 2729 /* 2730 * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t) 2731 * 2732 * Overview 2733 * Increment the resource associated with the given handle, returning zero if 2734 * the incremented value does not exceed the threshold for the current limit 2735 * on the resource. 2736 * 2737 * Return values 2738 * Actions taken, according to the rctl_test bitmask. 2739 * 2740 * Caller's context 2741 * p_lock held by caller. 2742 */ 2743 /*ARGSUSED*/ 2744 int 2745 rctl_test(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2746 rctl_qty_t incr, uint_t flags) 2747 { 2748 return (rctl_test_entity(rhndl, rset, p, NULL, incr, flags)); 2749 } 2750 2751 int 2752 rctl_test_entity(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2753 rctl_entity_p_t *e, rctl_qty_t incr, uint_t flags) 2754 { 2755 rctl_t *lrctl; 2756 int ret = RCT_NONE; 2757 rctl_entity_p_t e_tmp; 2758 if (p == &p0) { 2759 /* 2760 * We don't enforce rctls on the kernel itself. 2761 */ 2762 return (ret); 2763 } 2764 2765 rctl_test_acquire: 2766 ASSERT(MUTEX_HELD(&p->p_lock)); 2767 2768 mutex_enter(&rset->rcs_lock); 2769 2770 /* 2771 * Dereference from rctl_set. We don't enforce newly loaded controls 2772 * that haven't been set on this entity (since the only valid value is 2773 * the infinite system value). 2774 */ 2775 if (rctl_set_find(rset, rhndl, &lrctl) == -1) { 2776 mutex_exit(&rset->rcs_lock); 2777 return (ret); 2778 } 2779 2780 /* 2781 * This control is currently unenforced: maximal value on control 2782 * supporting infinitely available resource. 2783 */ 2784 if ((lrctl->rc_dict_entry->rcd_flagaction & RCTL_GLOBAL_INFINITE) && 2785 (lrctl->rc_cursor->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) { 2786 2787 mutex_exit(&rset->rcs_lock); 2788 return (ret); 2789 } 2790 2791 /* 2792 * If we have been called by rctl_test, look up the entity pointer 2793 * from the proc pointer. 2794 */ 2795 if (e == NULL) { 2796 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2797 p, &e_tmp); 2798 e = &e_tmp; 2799 } 2800 2801 /* 2802 * Get enforced rctl value and current usage. Test the increment 2803 * with the current usage against the enforced value--take action as 2804 * necessary. 2805 */ 2806 while (RCTLOP_TEST(lrctl, p, e, lrctl->rc_cursor, incr, flags)) { 2807 if ((ret & RCT_LK_ABANDONED) == 0) { 2808 ret |= rctl_global_action(lrctl, rset, p, 2809 lrctl->rc_cursor); 2810 2811 RCTLOP_ACTION(lrctl, p, e); 2812 2813 ret |= rctl_local_action(lrctl, rset, p, 2814 lrctl->rc_cursor, flags); 2815 2816 if (ret & RCT_LK_ABANDONED) 2817 goto rctl_test_acquire; 2818 } 2819 2820 ret &= ~RCT_LK_ABANDONED; 2821 2822 if ((ret & RCT_DENY) == RCT_DENY || 2823 lrctl->rc_cursor->rcv_next == NULL) { 2824 ret |= RCT_DENY; 2825 break; 2826 } 2827 2828 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2829 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2830 p, lrctl->rc_cursor->rcv_value)); 2831 } 2832 2833 mutex_exit(&rset->rcs_lock); 2834 2835 return (ret); 2836 } 2837 2838 /* 2839 * void rctl_init(void) 2840 * 2841 * Overview 2842 * Initialize the rctl subsystem, including the primoridal rctls 2843 * provided by the system. New subsystem-specific rctls should _not_ be 2844 * initialized here. (Do it in your own file.) 2845 * 2846 * Return values 2847 * None. 2848 * 2849 * Caller's context 2850 * Safe for KM_SLEEP allocations. Must be called prior to any process model 2851 * initialization. 2852 */ 2853 void 2854 rctl_init(void) 2855 { 2856 rctl_cache = kmem_cache_create("rctl_cache", sizeof (rctl_t), 2857 0, NULL, NULL, NULL, NULL, NULL, 0); 2858 rctl_val_cache = kmem_cache_create("rctl_val_cache", 2859 sizeof (rctl_val_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 2860 2861 rctl_dict = mod_hash_create_extended("rctl_dict", 2862 rctl_dict_size, mod_hash_null_keydtor, rctl_dict_val_dtor, 2863 rctl_dict_hash_by_id, NULL, rctl_dict_id_cmp, KM_SLEEP); 2864 rctl_dict_by_name = mod_hash_create_strhash( 2865 "rctl_handles_by_name", rctl_dict_size, 2866 mod_hash_null_valdtor); 2867 rctl_ids = id_space_create("rctl_ids", 1, max_rctl_hndl); 2868 bzero(rctl_lists, (RC_MAX_ENTITY + 1) * sizeof (rctl_dict_entry_t *)); 2869 2870 rctlproc_init(); 2871 } 2872 2873 /* 2874 * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc) 2875 * 2876 * Increments the amount of locked memory on a project, and 2877 * zone. If proj is NULL, the proj and zone of proc_t p is used. If 2878 * chargeproc is non-zero, then the charged amount is cached on p->p_locked_mem 2879 * so that the charge can be migrated when a process changes projects. 2880 * 2881 * Return values 2882 * 0 - success 2883 * EAGAIN - attempting to increment locked memory is denied by one 2884 * or more resource entities. 2885 */ 2886 int 2887 rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2888 int chargeproc) 2889 { 2890 kproject_t *projp; 2891 zone_t *zonep; 2892 rctl_entity_p_t e; 2893 int ret = 0; 2894 2895 ASSERT(p != NULL); 2896 ASSERT(MUTEX_HELD(&p->p_lock)); 2897 if (proj != NULL) { 2898 projp = proj; 2899 zonep = zone_find_by_id(projp->kpj_zoneid); 2900 } else { 2901 projp = p->p_task->tk_proj; 2902 zonep = p->p_zone; 2903 } 2904 2905 mutex_enter(&zonep->zone_mem_lock); 2906 2907 e.rcep_p.proj = projp; 2908 e.rcep_t = RCENTITY_PROJECT; 2909 if (projp->kpj_data.kpd_locked_mem + inc > 2910 projp->kpj_data.kpd_locked_mem_ctl) { 2911 if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls, 2912 p, &e, inc, 0) & RCT_DENY) { 2913 ret = EAGAIN; 2914 goto out; 2915 } 2916 } 2917 e.rcep_p.zone = zonep; 2918 e.rcep_t = RCENTITY_ZONE; 2919 if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) { 2920 if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls, 2921 p, &e, inc, 0) & RCT_DENY) { 2922 ret = EAGAIN; 2923 goto out; 2924 } 2925 } 2926 2927 zonep->zone_locked_mem += inc; 2928 projp->kpj_data.kpd_locked_mem += inc; 2929 if (chargeproc != 0) { 2930 p->p_locked_mem += inc; 2931 } 2932 out: 2933 mutex_exit(&zonep->zone_mem_lock); 2934 if (proj != NULL) 2935 zone_rele(zonep); 2936 return (ret); 2937 } 2938 2939 /* 2940 * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc) 2941 * 2942 * Decrements the amount of locked memory on a project and 2943 * zone. If proj is NULL, the proj and zone of proc_t p is used. If 2944 * creditproc is non-zero, then the quantity of locked memory is subtracted 2945 * from p->p_locked_mem. 2946 * 2947 * Return values 2948 * none 2949 */ 2950 void 2951 rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2952 int creditproc) 2953 { 2954 kproject_t *projp; 2955 zone_t *zonep; 2956 2957 if (proj != NULL) { 2958 projp = proj; 2959 zonep = zone_find_by_id(projp->kpj_zoneid); 2960 } else { 2961 ASSERT(p != NULL); 2962 ASSERT(MUTEX_HELD(&p->p_lock)); 2963 projp = p->p_task->tk_proj; 2964 zonep = p->p_zone; 2965 } 2966 2967 mutex_enter(&zonep->zone_mem_lock); 2968 zonep->zone_locked_mem -= inc; 2969 projp->kpj_data.kpd_locked_mem -= inc; 2970 if (creditproc != 0) { 2971 ASSERT(p != NULL); 2972 ASSERT(MUTEX_HELD(&p->p_lock)); 2973 p->p_locked_mem -= inc; 2974 } 2975 mutex_exit(&zonep->zone_mem_lock); 2976 if (proj != NULL) 2977 zone_rele(zonep); 2978 } 2979 2980 /* 2981 * rctl_incr_swap(proc_t *, zone_t *, size_t) 2982 * 2983 * Overview 2984 * Increments the swap charge on the specified zone. 2985 * 2986 * Return values 2987 * 0 on success. EAGAIN if swap increment fails due an rctl value 2988 * on the zone. 2989 * 2990 * Callers context 2991 * p_lock held on specified proc. 2992 * swap must be even multiple of PAGESIZE 2993 */ 2994 int 2995 rctl_incr_swap(proc_t *proc, zone_t *zone, size_t swap) 2996 { 2997 rctl_entity_p_t e; 2998 2999 ASSERT(MUTEX_HELD(&proc->p_lock)); 3000 ASSERT((swap & PAGEOFFSET) == 0); 3001 e.rcep_p.zone = zone; 3002 e.rcep_t = RCENTITY_ZONE; 3003 3004 mutex_enter(&zone->zone_mem_lock); 3005 3006 if ((zone->zone_max_swap + swap) > 3007 zone->zone_max_swap_ctl) { 3008 3009 if (rctl_test_entity(rc_zone_max_swap, zone->zone_rctls, 3010 proc, &e, swap, 0) & RCT_DENY) { 3011 mutex_exit(&zone->zone_mem_lock); 3012 return (EAGAIN); 3013 } 3014 } 3015 zone->zone_max_swap += swap; 3016 mutex_exit(&zone->zone_mem_lock); 3017 return (0); 3018 } 3019 3020 /* 3021 * rctl_decr_swap(zone_t *, size_t) 3022 * 3023 * Overview 3024 * Decrements the swap charge on the specified zone. 3025 * 3026 * Return values 3027 * None 3028 * 3029 * Callers context 3030 * swap must be even multiple of PAGESIZE 3031 */ 3032 void 3033 rctl_decr_swap(zone_t *zone, size_t swap) 3034 { 3035 ASSERT((swap & PAGEOFFSET) == 0); 3036 mutex_enter(&zone->zone_mem_lock); 3037 ASSERT(zone->zone_max_swap >= swap); 3038 zone->zone_max_swap -= swap; 3039 mutex_exit(&zone->zone_mem_lock); 3040 } 3041 3042 /* 3043 * Create resource kstat 3044 */ 3045 static kstat_t * 3046 rctl_kstat_create_common(char *ks_name, int ks_instance, char *ks_class, 3047 uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, int ks_zoneid) 3048 { 3049 kstat_t *ksp = NULL; 3050 char name[KSTAT_STRLEN]; 3051 3052 (void) snprintf(name, KSTAT_STRLEN, "%s_%d", ks_name, ks_instance); 3053 3054 if ((ksp = kstat_create_zone("caps", ks_zoneid, 3055 name, ks_class, ks_type, 3056 ks_ndata, ks_flags, ks_zoneid)) != NULL) { 3057 if (ks_zoneid != GLOBAL_ZONEID) 3058 kstat_zone_add(ksp, GLOBAL_ZONEID); 3059 } 3060 return (ksp); 3061 } 3062 3063 /* 3064 * Create zone-specific resource kstat 3065 */ 3066 kstat_t * 3067 rctl_kstat_create_zone(zone_t *zone, char *ks_name, uchar_t ks_type, 3068 uint_t ks_ndata, uchar_t ks_flags) 3069 { 3070 char name[KSTAT_STRLEN]; 3071 3072 (void) snprintf(name, KSTAT_STRLEN, "%s_zone", ks_name); 3073 3074 return (rctl_kstat_create_common(name, zone->zone_id, "zone_caps", 3075 ks_type, ks_ndata, ks_flags, zone->zone_id)); 3076 } 3077 3078 /* 3079 * Create project-specific resource kstat 3080 */ 3081 kstat_t * 3082 rctl_kstat_create_project(kproject_t *kpj, char *ks_name, uchar_t ks_type, 3083 uint_t ks_ndata, uchar_t ks_flags) 3084 { 3085 char name[KSTAT_STRLEN]; 3086 3087 (void) snprintf(name, KSTAT_STRLEN, "%s_project", ks_name); 3088 3089 return (rctl_kstat_create_common(name, kpj->kpj_id, "project_caps", 3090 ks_type, ks_ndata, ks_flags, kpj->kpj_zoneid)); 3091 } 3092