1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/atomic.h> 27 #include <sys/cmn_err.h> 28 #include <sys/id_space.h> 29 #include <sys/kmem.h> 30 #include <sys/kstat.h> 31 #include <sys/log.h> 32 #include <sys/modctl.h> 33 #include <sys/modhash.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/procset.h> 37 #include <sys/project.h> 38 #include <sys/resource.h> 39 #include <sys/rctl.h> 40 #include <sys/siginfo.h> 41 #include <sys/strlog.h> 42 #include <sys/systm.h> 43 #include <sys/task.h> 44 #include <sys/types.h> 45 #include <sys/policy.h> 46 #include <sys/zone.h> 47 48 /* 49 * Resource controls (rctls) 50 * 51 * The rctl subsystem provides a mechanism for kernel components to 52 * register their individual resource controls with the system as a whole, 53 * such that those controls can subscribe to specific actions while being 54 * associated with the various process-model entities provided by the kernel: 55 * the process, the task, the project, and the zone. (In principle, only 56 * minor modifications would be required to connect the resource control 57 * functionality to non-process-model entities associated with the system.) 58 * 59 * Subsystems register their rctls via rctl_register(). Subsystems 60 * also wishing to provide additional limits on a given rctl can modify 61 * them once they have the rctl handle. Each subsystem should store the 62 * handle to their rctl for direct access. 63 * 64 * A primary dictionary, rctl_dict, contains a hash of id to the default 65 * control definition for each controlled resource-entity pair on the system. 66 * A secondary dictionary, rctl_dict_by_name, contains a hash of name to 67 * resource control handles. The resource control handles are distributed by 68 * the rctl_ids ID space. The handles are private and not to be 69 * advertised to userland; all userland interactions are via the rctl 70 * names. 71 * 72 * Entities inherit their rctls from their predecessor. Since projects have 73 * no ancestor, they inherit their rctls from the rctl dict for project 74 * rctls. It is expected that project controls will be set to their 75 * appropriate values shortly after project creation, presumably from a 76 * policy source such as the project database. 77 * 78 * Data structures 79 * The rctl_set_t attached to each of the process model entities is a simple 80 * hash table keyed on the rctl handle assigned at registration. The entries 81 * in the hash table are rctl_t's, whose relationship with the active control 82 * values on that resource and with the global state of the resource we 83 * illustrate below: 84 * 85 * rctl_dict[key] --> rctl_dict_entry 86 * ^ 87 * | 88 * +--+---+ 89 * rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL 90 * +--+---+ ^ 91 * | | 92 * +------- cursor ------+ 93 * 94 * That is, the rctl contains a back pointer to the global resource control 95 * state for this resource, which is also available in the rctl_dict hash 96 * table mentioned earlier. The rctl contains two pointers to resource 97 * control values: one, values, indicates the entire sequence of control 98 * values; the other, cursor, indicates the currently active control 99 * value--the next value to be enforced. The value list itself is an open, 100 * doubly-linked list, the last non-NULL member of which is the system value 101 * for that resource (being the theoretical/conventional maximum allowable 102 * value for the resource on this OS instance). 103 * 104 * Ops Vector 105 * Subsystems publishing rctls need not provide instances of all of the 106 * functions specified by the ops vector. In particular, if general 107 * rctl_*() entry points are not being called, certain functions can be 108 * omitted. These align as follows: 109 * 110 * rctl_set() 111 * You may wish to provide a set callback if locking circumstances prevent 112 * it or if the performance cost of requesting the enforced value from the 113 * resource control is prohibitively expensive. For instance, the currently 114 * enforced file size limit is stored on the process in the p_fsz_ctl to 115 * maintain read()/write() performance. 116 * 117 * rctl_test() 118 * You must provide a test callback if you are using the rctl_test() 119 * interface. An action callback is optional. 120 * 121 * rctl_action() 122 * You may wish to provide an action callback. 123 * 124 * Registration 125 * New resource controls can be added to a running instance by loaded modules 126 * via registration. (The current implementation does not support unloadable 127 * modules; this functionality can be added if needed, via an 128 * activation/deactivation interface involving the manipulation of the 129 * ops vector for the resource control(s) needing to support unloading.) 130 * 131 * Control value ordering 132 * Because the rctl_val chain on each rctl must be navigable in a 133 * deterministic way, we have to define an ordering on the rctl_val_t's. The 134 * defined order is (flags & [maximal], value, flags & [deny-action], 135 * privilege). 136 * 137 * Locking 138 * rctl_dict_lock must be acquired prior to rctl_lists_lock. Since 139 * rctl_dict_lock or rctl_lists_lock can be called at the enforcement point 140 * of any subsystem, holding subsystem locks, it is at all times inappropriate 141 * to call kmem_alloc(., KM_SLEEP) while holding either of these locks. 142 * Traversing any of the various resource control entity lists requires 143 * holding rctl_lists_lock. 144 * 145 * Each individual resource control set associated with an entity must have 146 * its rcs_lock held for the duration of any operations that would add 147 * resource controls or control values to the set. 148 * 149 * The locking subsequence of interest is: p_lock, rctl_dict_lock, 150 * rctl_lists_lock, entity->rcs_lock. 151 * 152 * The projects(4) database and project entity resource controls 153 * A special case is made for RCENTITY_PROJECT values set through the 154 * setproject(3PROJECT) interface. setproject() makes use of a private 155 * interface, setprojrctl(), which passes through an array of resource control 156 * blocks that need to be set while holding the entity->rcs_lock. This 157 * ensures that the act of modifying a project's resource controls is 158 * "atomic" within the kernel. 159 * 160 * Within the rctl sub-system, we provide two interfaces that are only used by 161 * the setprojrctl() code path - rctl_local_insert_all() and 162 * rctl_local_replace_all(). rctl_local_insert_all() will ensure that the 163 * resource values specified in *new_values are applied. 164 * rctl_local_replace_all() will purge the current rctl->rc_projdb and 165 * rctl->rc_values entries, and apply the *new_values. 166 * 167 * These functions modify not only the linked list of active resource controls 168 * (rctl->rc_values), but also a "cached" linked list (rctl->rc_projdb) of 169 * values set through these interfaces. To clarify: 170 * 171 * rctl->rc_values - a linked list of rctl_val_t. These are the active 172 * resource values associated with this rctl, and may have been set by 173 * setrctl() - via prctl(1M), or by setprojrctl() - via 174 * setproject(3PROJECT). 175 * 176 * rctl->rc_projdb - a linked list of rctl_val_t. These reflect the 177 * resource values set by the setprojrctl() code path. rc_projdb is not 178 * referenced by any other component of the rctl sub-system. 179 * 180 * As various locks are held when calling these functions, we ensure that all 181 * the possible memory allocations are performed prior to calling the 182 * function. *alloc_values is a linked list of uninitialized rctl_val_t, 183 * which may be used to duplicate a new resource control value (passed in as 184 * one of the members of the *new_values linked list), in order to populate 185 * rctl->rc_values. 186 */ 187 188 id_t max_rctl_hndl = 32768; 189 int rctl_dict_size = 64; 190 int rctl_set_size = 8; 191 kmutex_t rctl_dict_lock; 192 mod_hash_t *rctl_dict; 193 mod_hash_t *rctl_dict_by_name; 194 id_space_t *rctl_ids; 195 kmem_cache_t *rctl_cache; /* kmem cache for rctl structures */ 196 kmem_cache_t *rctl_val_cache; /* kmem cache for rctl values */ 197 198 kmutex_t rctl_lists_lock; 199 rctl_dict_entry_t *rctl_lists[RC_MAX_ENTITY + 1]; 200 201 /* 202 * Default resource control operations and ops vector 203 * To be used if the particular rcontrol has no specific actions defined, or 204 * if the subsystem providing the control is quiescing (in preparation for 205 * unloading, presumably.) 206 * 207 * Resource controls with callbacks should fill the unused operations with the 208 * appropriate default impotent callback. 209 */ 210 /*ARGSUSED*/ 211 void 212 rcop_no_action(struct rctl *r, struct proc *p, rctl_entity_p_t *e) 213 { 214 } 215 216 /*ARGSUSED*/ 217 rctl_qty_t 218 rcop_no_usage(struct rctl *r, struct proc *p) 219 { 220 return (0); 221 } 222 223 /*ARGSUSED*/ 224 int 225 rcop_no_set(struct rctl *r, struct proc *p, rctl_entity_p_t *e, rctl_qty_t l) 226 { 227 return (0); 228 } 229 230 /*ARGSUSED*/ 231 int 232 rcop_no_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 233 struct rctl_val *rv, rctl_qty_t i, uint_t f) 234 { 235 return (0); 236 } 237 238 rctl_ops_t rctl_default_ops = { 239 rcop_no_action, 240 rcop_no_usage, 241 rcop_no_set, 242 rcop_no_test 243 }; 244 245 /* 246 * Default "absolute" resource control operation and ops vector 247 * Useful if there is no usage associated with the 248 * resource control. 249 */ 250 /*ARGSUSED*/ 251 int 252 rcop_absolute_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 253 struct rctl_val *rv, rctl_qty_t i, uint_t f) 254 { 255 return (i > rv->rcv_value); 256 } 257 258 rctl_ops_t rctl_absolute_ops = { 259 rcop_no_action, 260 rcop_no_usage, 261 rcop_no_set, 262 rcop_absolute_test 263 }; 264 265 /*ARGSUSED*/ 266 static uint_t 267 rctl_dict_hash_by_id(void *hash_data, mod_hash_key_t key) 268 { 269 return ((uint_t)(uintptr_t)key % rctl_dict_size); 270 } 271 272 static int 273 rctl_dict_id_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 274 { 275 uint_t u1 = (uint_t)(uintptr_t)key1; 276 uint_t u2 = (uint_t)(uintptr_t)key2; 277 278 if (u1 > u2) 279 return (1); 280 281 if (u1 == u2) 282 return (0); 283 284 return (-1); 285 } 286 287 static void 288 rctl_dict_val_dtor(mod_hash_val_t val) 289 { 290 rctl_dict_entry_t *kr = (rctl_dict_entry_t *)val; 291 292 kmem_free(kr, sizeof (rctl_dict_entry_t)); 293 } 294 295 /* 296 * size_t rctl_build_name_buf() 297 * 298 * Overview 299 * rctl_build_name_buf() walks all active resource controls in the dictionary, 300 * building a buffer of continguous NUL-terminated strings. 301 * 302 * Return values 303 * The size of the buffer is returned, the passed pointer's contents are 304 * modified to that of the location of the buffer. 305 * 306 * Caller's context 307 * Caller must be in a context suitable for KM_SLEEP allocations. 308 */ 309 size_t 310 rctl_build_name_buf(char **rbufp) 311 { 312 size_t req_size, cpy_size; 313 char *rbufloc; 314 int i; 315 316 rctl_rebuild_name_buf: 317 req_size = cpy_size = 0; 318 319 /* 320 * Calculate needed buffer length. 321 */ 322 mutex_enter(&rctl_lists_lock); 323 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 324 rctl_dict_entry_t *rde; 325 326 for (rde = rctl_lists[i]; 327 rde != NULL; 328 rde = rde->rcd_next) 329 req_size += strlen(rde->rcd_name) + 1; 330 } 331 mutex_exit(&rctl_lists_lock); 332 333 rbufloc = *rbufp = kmem_alloc(req_size, KM_SLEEP); 334 335 /* 336 * Copy rctl names into our buffer. If the copy length exceeds the 337 * allocate length (due to registration changes), stop copying, free the 338 * buffer, and start again. 339 */ 340 mutex_enter(&rctl_lists_lock); 341 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 342 rctl_dict_entry_t *rde; 343 344 for (rde = rctl_lists[i]; 345 rde != NULL; 346 rde = rde->rcd_next) { 347 size_t length = strlen(rde->rcd_name) + 1; 348 349 cpy_size += length; 350 351 if (cpy_size > req_size) { 352 kmem_free(*rbufp, req_size); 353 mutex_exit(&rctl_lists_lock); 354 goto rctl_rebuild_name_buf; 355 } 356 357 bcopy(rde->rcd_name, rbufloc, length); 358 rbufloc += length; 359 } 360 } 361 mutex_exit(&rctl_lists_lock); 362 363 return (req_size); 364 } 365 366 /* 367 * rctl_dict_entry_t *rctl_dict_lookup(const char *) 368 * 369 * Overview 370 * rctl_dict_lookup() returns the resource control dictionary entry for the 371 * named resource control. 372 * 373 * Return values 374 * A pointer to the appropriate resource control dictionary entry, or NULL if 375 * no such named entry exists. 376 * 377 * Caller's context 378 * Caller must not be holding rctl_dict_lock. 379 */ 380 rctl_dict_entry_t * 381 rctl_dict_lookup(const char *name) 382 { 383 rctl_dict_entry_t *rde; 384 385 mutex_enter(&rctl_dict_lock); 386 387 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 388 (mod_hash_val_t *)&rde) == MH_ERR_NOTFOUND) { 389 mutex_exit(&rctl_dict_lock); 390 return (NULL); 391 } 392 393 mutex_exit(&rctl_dict_lock); 394 395 return (rde); 396 } 397 398 /* 399 * rctl_hndl_t rctl_hndl_lookup(const char *) 400 * 401 * Overview 402 * rctl_hndl_lookup() returns the resource control id (the "handle") for the 403 * named resource control. 404 * 405 * Return values 406 * The appropriate id, or -1 if no such named entry exists. 407 * 408 * Caller's context 409 * Caller must not be holding rctl_dict_lock. 410 */ 411 rctl_hndl_t 412 rctl_hndl_lookup(const char *name) 413 { 414 rctl_dict_entry_t *rde; 415 416 if ((rde = rctl_dict_lookup(name)) == NULL) 417 return (-1); 418 419 return (rde->rcd_id); 420 } 421 422 /* 423 * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t) 424 * 425 * Overview 426 * rctl_dict_lookup_hndl() completes the public lookup functions, by returning 427 * the resource control dictionary entry matching a given resource control id. 428 * 429 * Return values 430 * A pointer to the matching resource control dictionary entry, or NULL if the 431 * id does not match any existing entries. 432 * 433 * Caller's context 434 * Caller must not be holding rctl_lists_lock. 435 */ 436 rctl_dict_entry_t * 437 rctl_dict_lookup_hndl(rctl_hndl_t hndl) 438 { 439 uint_t i; 440 441 mutex_enter(&rctl_lists_lock); 442 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 443 rctl_dict_entry_t *rde; 444 445 for (rde = rctl_lists[i]; 446 rde != NULL; 447 rde = rde->rcd_next) 448 if (rde->rcd_id == hndl) { 449 mutex_exit(&rctl_lists_lock); 450 return (rde); 451 } 452 } 453 mutex_exit(&rctl_lists_lock); 454 455 return (NULL); 456 } 457 458 /* 459 * void rctl_add_default_limit(const char *name, rctl_qty_t value, 460 * rctl_priv_t privilege, uint_t action) 461 * 462 * Overview 463 * Create a default limit with specified value, privilege, and action. 464 * 465 * Return value 466 * No value returned. 467 */ 468 void 469 rctl_add_default_limit(const char *name, rctl_qty_t value, 470 rctl_priv_t privilege, uint_t action) 471 { 472 rctl_val_t *dval; 473 rctl_dict_entry_t *rde; 474 475 dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 476 bzero(dval, sizeof (rctl_val_t)); 477 dval->rcv_value = value; 478 dval->rcv_privilege = privilege; 479 dval->rcv_flagaction = action; 480 dval->rcv_action_recip_pid = -1; 481 482 rde = rctl_dict_lookup(name); 483 (void) rctl_val_list_insert(&rde->rcd_default_value, dval); 484 } 485 486 /* 487 * void rctl_add_legacy_limit(const char *name, const char *mname, 488 * const char *lname, rctl_qty_t dflt) 489 * 490 * Overview 491 * Create a default privileged limit, using the value obtained from 492 * /etc/system if it exists and is greater than the specified default 493 * value. Exists primarily for System V IPC. 494 * 495 * Return value 496 * No value returned. 497 */ 498 void 499 rctl_add_legacy_limit(const char *name, const char *mname, const char *lname, 500 rctl_qty_t dflt, rctl_qty_t max) 501 { 502 rctl_qty_t qty; 503 504 if (!mod_sysvar(mname, lname, &qty) || (qty < dflt)) 505 qty = dflt; 506 507 if (qty > max) 508 qty = max; 509 510 rctl_add_default_limit(name, qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY); 511 } 512 513 static rctl_set_t * 514 rctl_entity_obtain_rset(rctl_dict_entry_t *rcd, struct proc *p) 515 { 516 rctl_set_t *rset = NULL; 517 518 if (rcd == NULL) 519 return (NULL); 520 521 switch (rcd->rcd_entity) { 522 case RCENTITY_PROCESS: 523 rset = p->p_rctls; 524 break; 525 case RCENTITY_TASK: 526 ASSERT(MUTEX_HELD(&p->p_lock)); 527 if (p->p_task != NULL) 528 rset = p->p_task->tk_rctls; 529 break; 530 case RCENTITY_PROJECT: 531 ASSERT(MUTEX_HELD(&p->p_lock)); 532 if (p->p_task != NULL && 533 p->p_task->tk_proj != NULL) 534 rset = p->p_task->tk_proj->kpj_rctls; 535 break; 536 case RCENTITY_ZONE: 537 ASSERT(MUTEX_HELD(&p->p_lock)); 538 if (p->p_zone != NULL) 539 rset = p->p_zone->zone_rctls; 540 break; 541 default: 542 panic("unknown rctl entity type %d seen", rcd->rcd_entity); 543 break; 544 } 545 546 return (rset); 547 } 548 549 static void 550 rctl_entity_obtain_entity_p(rctl_entity_t entity, struct proc *p, 551 rctl_entity_p_t *e) 552 { 553 e->rcep_p.proc = NULL; 554 e->rcep_t = entity; 555 556 switch (entity) { 557 case RCENTITY_PROCESS: 558 e->rcep_p.proc = p; 559 break; 560 case RCENTITY_TASK: 561 ASSERT(MUTEX_HELD(&p->p_lock)); 562 if (p->p_task != NULL) 563 e->rcep_p.task = p->p_task; 564 break; 565 case RCENTITY_PROJECT: 566 ASSERT(MUTEX_HELD(&p->p_lock)); 567 if (p->p_task != NULL && 568 p->p_task->tk_proj != NULL) 569 e->rcep_p.proj = p->p_task->tk_proj; 570 break; 571 case RCENTITY_ZONE: 572 ASSERT(MUTEX_HELD(&p->p_lock)); 573 if (p->p_zone != NULL) 574 e->rcep_p.zone = p->p_zone; 575 break; 576 default: 577 panic("unknown rctl entity type %d seen", entity); 578 break; 579 } 580 } 581 582 static void 583 rctl_gp_alloc(rctl_alloc_gp_t *rcgp) 584 { 585 uint_t i; 586 587 if (rcgp->rcag_nctls > 0) { 588 rctl_t *prev = kmem_cache_alloc(rctl_cache, KM_SLEEP); 589 rctl_t *rctl = prev; 590 591 rcgp->rcag_ctls = prev; 592 593 for (i = 1; i < rcgp->rcag_nctls; i++) { 594 rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 595 prev->rc_next = rctl; 596 prev = rctl; 597 } 598 599 rctl->rc_next = NULL; 600 } 601 602 if (rcgp->rcag_nvals > 0) { 603 rctl_val_t *prev = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 604 rctl_val_t *rval = prev; 605 606 rcgp->rcag_vals = prev; 607 608 for (i = 1; i < rcgp->rcag_nvals; i++) { 609 rval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 610 prev->rcv_next = rval; 611 prev = rval; 612 } 613 614 rval->rcv_next = NULL; 615 } 616 617 } 618 619 static rctl_val_t * 620 rctl_gp_detach_val(rctl_alloc_gp_t *rcgp) 621 { 622 rctl_val_t *rval = rcgp->rcag_vals; 623 624 ASSERT(rcgp->rcag_nvals > 0); 625 rcgp->rcag_nvals--; 626 rcgp->rcag_vals = rval->rcv_next; 627 628 rval->rcv_next = NULL; 629 630 return (rval); 631 } 632 633 static rctl_t * 634 rctl_gp_detach_ctl(rctl_alloc_gp_t *rcgp) 635 { 636 rctl_t *rctl = rcgp->rcag_ctls; 637 638 ASSERT(rcgp->rcag_nctls > 0); 639 rcgp->rcag_nctls--; 640 rcgp->rcag_ctls = rctl->rc_next; 641 642 rctl->rc_next = NULL; 643 644 return (rctl); 645 646 } 647 648 static void 649 rctl_gp_free(rctl_alloc_gp_t *rcgp) 650 { 651 rctl_val_t *rval = rcgp->rcag_vals; 652 rctl_t *rctl = rcgp->rcag_ctls; 653 654 while (rval != NULL) { 655 rctl_val_t *next = rval->rcv_next; 656 657 kmem_cache_free(rctl_val_cache, rval); 658 rval = next; 659 } 660 661 while (rctl != NULL) { 662 rctl_t *next = rctl->rc_next; 663 664 kmem_cache_free(rctl_cache, rctl); 665 rctl = next; 666 } 667 } 668 669 /* 670 * void rctl_prealloc_destroy(rctl_alloc_gp_t *) 671 * 672 * Overview 673 * Release all unused memory allocated via one of the "prealloc" functions: 674 * rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc. 675 * 676 * Return values 677 * None. 678 * 679 * Caller's context 680 * No restrictions on context. 681 */ 682 void 683 rctl_prealloc_destroy(rctl_alloc_gp_t *gp) 684 { 685 rctl_gp_free(gp); 686 kmem_free(gp, sizeof (rctl_alloc_gp_t)); 687 } 688 689 /* 690 * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int) 691 * 692 * Overview 693 * This function defines an ordering to rctl_val_t's in order to allow 694 * for correct placement in value lists. When the imprecise flag is set, 695 * the action recipient is ignored. This is to facilitate insert, 696 * delete, and replace operations by rctlsys. 697 * 698 * Return values 699 * 0 if the val_t's are are considered identical 700 * -1 if a is ordered lower than b 701 * 1 if a is lowered higher than b 702 * 703 * Caller's context 704 * No restrictions on context. 705 */ 706 int 707 rctl_val_cmp(rctl_val_t *a, rctl_val_t *b, int imprecise) 708 { 709 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) < 710 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 711 return (-1); 712 713 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) > 714 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 715 return (1); 716 717 if (a->rcv_value < b->rcv_value) 718 return (-1); 719 720 if (a->rcv_value > b->rcv_value) 721 return (1); 722 723 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) < 724 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 725 return (-1); 726 727 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) > 728 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 729 return (1); 730 731 if (a->rcv_privilege < b->rcv_privilege) 732 return (-1); 733 734 if (a->rcv_privilege > b->rcv_privilege) 735 return (1); 736 737 if (imprecise) 738 return (0); 739 740 if (a->rcv_action_recip_pid < b->rcv_action_recip_pid) 741 return (-1); 742 743 if (a->rcv_action_recip_pid > b->rcv_action_recip_pid) 744 return (1); 745 746 return (0); 747 } 748 749 static rctl_val_t * 750 rctl_val_list_find(rctl_val_t **head, rctl_val_t *cval) 751 { 752 rctl_val_t *rval = *head; 753 754 while (rval != NULL) { 755 if (rctl_val_cmp(cval, rval, 0) == 0) 756 return (rval); 757 758 rval = rval->rcv_next; 759 } 760 761 return (NULL); 762 763 } 764 765 /* 766 * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *) 767 * 768 * Overview 769 * This function inserts the rctl_val_t into the value list provided. 770 * The insert is always successful unless if the value is a duplicate 771 * of one already in the list. 772 * 773 * Return values 774 * 1 if the value was a duplicate of an existing value in the list. 775 * 0 if the insert was successful. 776 */ 777 int 778 rctl_val_list_insert(rctl_val_t **root, rctl_val_t *rval) 779 { 780 rctl_val_t *prev; 781 int equiv; 782 783 rval->rcv_next = NULL; 784 rval->rcv_prev = NULL; 785 786 if (*root == NULL) { 787 *root = rval; 788 return (0); 789 } 790 791 equiv = rctl_val_cmp(rval, *root, 0); 792 793 if (equiv == 0) 794 return (1); 795 796 if (equiv < 0) { 797 rval->rcv_next = *root; 798 rval->rcv_next->rcv_prev = rval; 799 *root = rval; 800 801 return (0); 802 } 803 804 prev = *root; 805 while (prev->rcv_next != NULL && 806 (equiv = rctl_val_cmp(rval, prev->rcv_next, 0)) > 0) { 807 prev = prev->rcv_next; 808 } 809 810 if (equiv == 0) 811 return (1); 812 813 rval->rcv_next = prev->rcv_next; 814 if (rval->rcv_next != NULL) 815 rval->rcv_next->rcv_prev = rval; 816 prev->rcv_next = rval; 817 rval->rcv_prev = prev; 818 819 return (0); 820 } 821 822 static int 823 rctl_val_list_delete(rctl_val_t **root, rctl_val_t *rval) 824 { 825 rctl_val_t *prev; 826 827 if (*root == NULL) 828 return (-1); 829 830 prev = *root; 831 if (rctl_val_cmp(rval, prev, 0) == 0) { 832 *root = prev->rcv_next; 833 if (*root != NULL) 834 (*root)->rcv_prev = NULL; 835 836 kmem_cache_free(rctl_val_cache, prev); 837 838 return (0); 839 } 840 841 while (prev->rcv_next != NULL && 842 rctl_val_cmp(rval, prev->rcv_next, 0) != 0) { 843 prev = prev->rcv_next; 844 } 845 846 if (prev->rcv_next == NULL) { 847 /* 848 * If we navigate the entire list and cannot find a match, then 849 * return failure. 850 */ 851 return (-1); 852 } 853 854 prev = prev->rcv_next; 855 prev->rcv_prev->rcv_next = prev->rcv_next; 856 if (prev->rcv_next != NULL) 857 prev->rcv_next->rcv_prev = prev->rcv_prev; 858 859 kmem_cache_free(rctl_val_cache, prev); 860 861 return (0); 862 } 863 864 static rctl_val_t * 865 rctl_val_list_dup(rctl_val_t *rval, rctl_alloc_gp_t *ragp, struct proc *oldp, 866 struct proc *newp) 867 { 868 rctl_val_t *head = NULL; 869 870 for (; rval != NULL; rval = rval->rcv_next) { 871 rctl_val_t *dval = rctl_gp_detach_val(ragp); 872 873 bcopy(rval, dval, sizeof (rctl_val_t)); 874 dval->rcv_prev = dval->rcv_next = NULL; 875 876 if (oldp == NULL || 877 rval->rcv_action_recipient == NULL || 878 rval->rcv_action_recipient == oldp) { 879 if (rval->rcv_privilege == RCPRIV_BASIC) { 880 dval->rcv_action_recipient = newp; 881 dval->rcv_action_recip_pid = newp->p_pid; 882 } else { 883 dval->rcv_action_recipient = NULL; 884 dval->rcv_action_recip_pid = -1; 885 } 886 887 (void) rctl_val_list_insert(&head, dval); 888 } else { 889 kmem_cache_free(rctl_val_cache, dval); 890 } 891 } 892 893 return (head); 894 } 895 896 static void 897 rctl_val_list_reset(rctl_val_t *rval) 898 { 899 for (; rval != NULL; rval = rval->rcv_next) 900 rval->rcv_firing_time = 0; 901 } 902 903 static uint_t 904 rctl_val_list_count(rctl_val_t *rval) 905 { 906 uint_t n = 0; 907 908 for (; rval != NULL; rval = rval->rcv_next) 909 n++; 910 911 return (n); 912 } 913 914 915 static void 916 rctl_val_list_free(rctl_val_t *rval) 917 { 918 while (rval != NULL) { 919 rctl_val_t *next = rval->rcv_next; 920 921 kmem_cache_free(rctl_val_cache, rval); 922 923 rval = next; 924 } 925 } 926 927 /* 928 * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *) 929 * 930 * Overview 931 * In cases where the operating system supports more than one process 932 * addressing model, the operating system capabilities will exceed those of 933 * one or more of these models. Processes in a less capable model must have 934 * their resources accurately controlled, without diluting those of their 935 * descendants reached via exec(). rctl_model_maximum() returns the governing 936 * value for the specified process with respect to a resource control, such 937 * that the value can used for the RCTLOP_SET callback or compatability 938 * support. 939 * 940 * Return values 941 * The maximum value for the given process for the specified resource control. 942 * 943 * Caller's context 944 * No restrictions on context. 945 */ 946 rctl_qty_t 947 rctl_model_maximum(rctl_dict_entry_t *rde, struct proc *p) 948 { 949 if (p->p_model == DATAMODEL_NATIVE) 950 return (rde->rcd_max_native); 951 952 return (rde->rcd_max_ilp32); 953 } 954 955 /* 956 * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t) 957 * 958 * Overview 959 * Convenience function wrapping the rctl_model_maximum() functionality. 960 * 961 * Return values 962 * The lesser of the process's maximum value and the given value for the 963 * specified resource control. 964 * 965 * Caller's context 966 * No restrictions on context. 967 */ 968 rctl_qty_t 969 rctl_model_value(rctl_dict_entry_t *rde, struct proc *p, rctl_qty_t value) 970 { 971 rctl_qty_t max = rctl_model_maximum(rde, p); 972 973 return (value < max ? value : max); 974 } 975 976 static void 977 rctl_set_insert(rctl_set_t *set, rctl_hndl_t hndl, rctl_t *rctl) 978 { 979 uint_t index = hndl % rctl_set_size; 980 rctl_t *next_ctl, *prev_ctl; 981 982 ASSERT(MUTEX_HELD(&set->rcs_lock)); 983 984 rctl->rc_next = NULL; 985 986 if (set->rcs_ctls[index] == NULL) { 987 set->rcs_ctls[index] = rctl; 988 return; 989 } 990 991 if (hndl < set->rcs_ctls[index]->rc_id) { 992 rctl->rc_next = set->rcs_ctls[index]; 993 set->rcs_ctls[index] = rctl; 994 995 return; 996 } 997 998 for (next_ctl = set->rcs_ctls[index]->rc_next, 999 prev_ctl = set->rcs_ctls[index]; 1000 next_ctl != NULL; 1001 prev_ctl = next_ctl, 1002 next_ctl = next_ctl->rc_next) { 1003 if (next_ctl->rc_id > hndl) { 1004 rctl->rc_next = next_ctl; 1005 prev_ctl->rc_next = rctl; 1006 1007 return; 1008 } 1009 } 1010 1011 rctl->rc_next = next_ctl; 1012 prev_ctl->rc_next = rctl; 1013 } 1014 1015 /* 1016 * rctl_set_t *rctl_set_create() 1017 * 1018 * Overview 1019 * Create an empty resource control set, suitable for attaching to a 1020 * controlled entity. 1021 * 1022 * Return values 1023 * A pointer to the newly created set. 1024 * 1025 * Caller's context 1026 * Safe for KM_SLEEP allocations. 1027 */ 1028 rctl_set_t * 1029 rctl_set_create() 1030 { 1031 rctl_set_t *rset = kmem_zalloc(sizeof (rctl_set_t), KM_SLEEP); 1032 1033 mutex_init(&rset->rcs_lock, NULL, MUTEX_DEFAULT, NULL); 1034 rset->rcs_ctls = kmem_zalloc(rctl_set_size * sizeof (rctl_t *), 1035 KM_SLEEP); 1036 rset->rcs_entity = -1; 1037 1038 return (rset); 1039 } 1040 1041 /* 1042 * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t) 1043 * 1044 * Overview 1045 * rctl_set_init_prealloc() examines the globally defined resource controls 1046 * and their default values and returns a resource control allocation group 1047 * populated with sufficient controls and values to form a representative 1048 * resource control set for the specified entity. 1049 * 1050 * Return values 1051 * A pointer to the newly created allocation group. 1052 * 1053 * Caller's context 1054 * Caller must be in a context suitable for KM_SLEEP allocations. 1055 */ 1056 rctl_alloc_gp_t * 1057 rctl_set_init_prealloc(rctl_entity_t entity) 1058 { 1059 rctl_dict_entry_t *rde; 1060 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1061 1062 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1063 1064 if (rctl_lists[entity] == NULL) 1065 return (ragp); 1066 1067 mutex_enter(&rctl_lists_lock); 1068 1069 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1070 ragp->rcag_nctls++; 1071 ragp->rcag_nvals += rctl_val_list_count(rde->rcd_default_value); 1072 } 1073 1074 mutex_exit(&rctl_lists_lock); 1075 1076 rctl_gp_alloc(ragp); 1077 1078 return (ragp); 1079 } 1080 1081 /* 1082 * rctl_set_t *rctl_set_init(rctl_entity_t) 1083 * 1084 * Overview 1085 * rctl_set_create() creates a resource control set, initialized with the 1086 * system infinite values on all registered controls, for attachment to a 1087 * system entity requiring resource controls, such as a process or a task. 1088 * 1089 * Return values 1090 * A pointer to the newly filled set. 1091 * 1092 * Caller's context 1093 * Caller must be holding p_lock on entry so that RCTLOP_SET() functions 1094 * may modify task and project members based on the proc structure 1095 * they are passed. 1096 */ 1097 rctl_set_t * 1098 rctl_set_init(rctl_entity_t entity, struct proc *p, rctl_entity_p_t *e, 1099 rctl_set_t *rset, rctl_alloc_gp_t *ragp) 1100 { 1101 rctl_dict_entry_t *rde; 1102 1103 ASSERT(MUTEX_HELD(&p->p_lock)); 1104 ASSERT(e); 1105 rset->rcs_entity = entity; 1106 1107 if (rctl_lists[entity] == NULL) 1108 return (rset); 1109 1110 mutex_enter(&rctl_lists_lock); 1111 mutex_enter(&rset->rcs_lock); 1112 1113 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1114 rctl_t *rctl = rctl_gp_detach_ctl(ragp); 1115 1116 rctl->rc_dict_entry = rde; 1117 rctl->rc_id = rde->rcd_id; 1118 rctl->rc_projdb = NULL; 1119 1120 rctl->rc_values = rctl_val_list_dup(rde->rcd_default_value, 1121 ragp, NULL, p); 1122 rctl->rc_cursor = rctl->rc_values; 1123 1124 ASSERT(rctl->rc_cursor != NULL); 1125 1126 rctl_set_insert(rset, rde->rcd_id, rctl); 1127 1128 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1129 rctl->rc_cursor->rcv_value)); 1130 } 1131 1132 mutex_exit(&rset->rcs_lock); 1133 mutex_exit(&rctl_lists_lock); 1134 1135 return (rset); 1136 } 1137 1138 static rctl_t * 1139 rctl_dup(rctl_t *rctl, rctl_alloc_gp_t *ragp, struct proc *oldp, 1140 struct proc *newp) 1141 { 1142 rctl_t *dup = rctl_gp_detach_ctl(ragp); 1143 rctl_val_t *dval; 1144 1145 dup->rc_id = rctl->rc_id; 1146 dup->rc_dict_entry = rctl->rc_dict_entry; 1147 dup->rc_next = NULL; 1148 dup->rc_cursor = NULL; 1149 dup->rc_values = rctl_val_list_dup(rctl->rc_values, ragp, oldp, newp); 1150 1151 for (dval = dup->rc_values; 1152 dval != NULL; dval = dval->rcv_next) { 1153 if (rctl_val_cmp(rctl->rc_cursor, dval, 0) >= 0) { 1154 dup->rc_cursor = dval; 1155 break; 1156 } 1157 } 1158 1159 if (dup->rc_cursor == NULL) 1160 dup->rc_cursor = dup->rc_values; 1161 1162 return (dup); 1163 } 1164 1165 static void 1166 rctl_set_fill_alloc_gp(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1167 { 1168 uint_t i; 1169 1170 bzero(ragp, sizeof (rctl_alloc_gp_t)); 1171 1172 for (i = 0; i < rctl_set_size; i++) { 1173 rctl_t *r = set->rcs_ctls[i]; 1174 1175 while (r != NULL) { 1176 ragp->rcag_nctls++; 1177 1178 ragp->rcag_nvals += rctl_val_list_count(r->rc_values); 1179 1180 r = r->rc_next; 1181 } 1182 } 1183 } 1184 1185 /* 1186 * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *) 1187 * 1188 * Overview 1189 * Given a resource control set, allocate a sufficiently large allocation 1190 * group to contain a duplicate of the set. 1191 * 1192 * Return value 1193 * A pointer to the newly created allocation group. 1194 * 1195 * Caller's context 1196 * Safe for KM_SLEEP allocations. 1197 */ 1198 rctl_alloc_gp_t * 1199 rctl_set_dup_prealloc(rctl_set_t *set) 1200 { 1201 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1202 1203 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1204 1205 mutex_enter(&set->rcs_lock); 1206 rctl_set_fill_alloc_gp(set, ragp); 1207 mutex_exit(&set->rcs_lock); 1208 1209 rctl_gp_alloc(ragp); 1210 1211 return (ragp); 1212 } 1213 1214 /* 1215 * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *) 1216 * 1217 * Overview 1218 * Verify that the allocation group provided is large enough to allow a 1219 * duplicate of the given resource control set to be constructed from its 1220 * contents. 1221 * 1222 * Return values 1223 * 1 if the allocation group is sufficiently large, 0 otherwise. 1224 * 1225 * Caller's context 1226 * rcs_lock must be held prior to entry. 1227 */ 1228 int 1229 rctl_set_dup_ready(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1230 { 1231 rctl_alloc_gp_t curr_gp; 1232 1233 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1234 1235 rctl_set_fill_alloc_gp(set, &curr_gp); 1236 1237 if (curr_gp.rcag_nctls <= ragp->rcag_nctls && 1238 curr_gp.rcag_nvals <= ragp->rcag_nvals) 1239 return (1); 1240 1241 return (0); 1242 } 1243 1244 /* 1245 * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *, 1246 * rctl_set_t *, rctl_alloc_gp_t *, int) 1247 * 1248 * Overview 1249 * Make a duplicate of the resource control set. The proc pointers are those 1250 * of the owning process and of the process associated with the entity 1251 * receiving the duplicate. 1252 * 1253 * Duplication is a 3 stage process. Stage 1 is memory allocation for 1254 * the duplicate set, which is taken care of by rctl_set_dup_prealloc(). 1255 * Stage 2 consists of copying all rctls and values from the old set into 1256 * the new. Stage 3 completes the duplication by performing the appropriate 1257 * callbacks for each rctl in the new set. 1258 * 1259 * Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and 1260 * RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only 1261 * be supplied if the newp proc structure reflects the new task and 1262 * project linkage. 1263 * 1264 * Return value 1265 * A pointer to the duplicate set. 1266 * 1267 * Caller's context 1268 * The rcs_lock of the set to be duplicated must be held prior to entry. 1269 */ 1270 rctl_set_t * 1271 rctl_set_dup(rctl_set_t *set, struct proc *oldp, struct proc *newp, 1272 rctl_entity_p_t *e, rctl_set_t *dup, rctl_alloc_gp_t *ragp, int flag) 1273 { 1274 uint_t i; 1275 rctl_set_t *iter; 1276 1277 ASSERT((flag & RCD_DUP) || (flag & RCD_CALLBACK)); 1278 ASSERT(e); 1279 /* 1280 * When copying the old set, iterate over that. Otherwise, when 1281 * only callbacks have been requested, iterate over the dup set. 1282 */ 1283 if (flag & RCD_DUP) { 1284 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1285 iter = set; 1286 dup->rcs_entity = set->rcs_entity; 1287 } else { 1288 iter = dup; 1289 } 1290 1291 mutex_enter(&dup->rcs_lock); 1292 1293 for (i = 0; i < rctl_set_size; i++) { 1294 rctl_t *r = iter->rcs_ctls[i]; 1295 rctl_t *d; 1296 1297 while (r != NULL) { 1298 if (flag & RCD_DUP) { 1299 d = rctl_dup(r, ragp, oldp, newp); 1300 rctl_set_insert(dup, r->rc_id, d); 1301 } else { 1302 d = r; 1303 } 1304 1305 if (flag & RCD_CALLBACK) 1306 RCTLOP_SET(d, newp, e, 1307 rctl_model_value(d->rc_dict_entry, newp, 1308 d->rc_cursor->rcv_value)); 1309 1310 r = r->rc_next; 1311 } 1312 } 1313 1314 mutex_exit(&dup->rcs_lock); 1315 1316 return (dup); 1317 } 1318 1319 /* 1320 * void rctl_set_free(rctl_set_t *) 1321 * 1322 * Overview 1323 * Delete resource control set and all attached values. 1324 * 1325 * Return values 1326 * No value returned. 1327 * 1328 * Caller's context 1329 * No restrictions on context. 1330 */ 1331 void 1332 rctl_set_free(rctl_set_t *set) 1333 { 1334 uint_t i; 1335 1336 mutex_enter(&set->rcs_lock); 1337 for (i = 0; i < rctl_set_size; i++) { 1338 rctl_t *r = set->rcs_ctls[i]; 1339 1340 while (r != NULL) { 1341 rctl_val_t *v = r->rc_values; 1342 rctl_t *n = r->rc_next; 1343 1344 kmem_cache_free(rctl_cache, r); 1345 1346 rctl_val_list_free(v); 1347 1348 r = n; 1349 } 1350 } 1351 mutex_exit(&set->rcs_lock); 1352 1353 kmem_free(set->rcs_ctls, sizeof (rctl_t *) * rctl_set_size); 1354 kmem_free(set, sizeof (rctl_set_t)); 1355 } 1356 1357 /* 1358 * void rctl_set_reset(rctl_set_t *) 1359 * 1360 * Overview 1361 * Resets all rctls within the set such that the lowest value becomes active. 1362 * 1363 * Return values 1364 * No value returned. 1365 * 1366 * Caller's context 1367 * No restrictions on context. 1368 */ 1369 void 1370 rctl_set_reset(rctl_set_t *set, struct proc *p, rctl_entity_p_t *e) 1371 { 1372 uint_t i; 1373 1374 ASSERT(e); 1375 1376 mutex_enter(&set->rcs_lock); 1377 for (i = 0; i < rctl_set_size; i++) { 1378 rctl_t *r = set->rcs_ctls[i]; 1379 1380 while (r != NULL) { 1381 r->rc_cursor = r->rc_values; 1382 rctl_val_list_reset(r->rc_cursor); 1383 RCTLOP_SET(r, p, e, rctl_model_value(r->rc_dict_entry, 1384 p, r->rc_cursor->rcv_value)); 1385 1386 ASSERT(r->rc_cursor != NULL); 1387 1388 r = r->rc_next; 1389 } 1390 } 1391 1392 mutex_exit(&set->rcs_lock); 1393 } 1394 1395 /* 1396 * void rctl_set_tearoff(rctl_set *, struct proc *) 1397 * 1398 * Overview 1399 * Tear off any resource control values on this set with an action recipient 1400 * equal to the specified process (as they are becoming invalid with the 1401 * process's departure from this set as an observer). 1402 * 1403 * Return values 1404 * No value returned. 1405 * 1406 * Caller's context 1407 * No restrictions on context 1408 */ 1409 void 1410 rctl_set_tearoff(rctl_set_t *set, struct proc *p) 1411 { 1412 uint_t i; 1413 1414 mutex_enter(&set->rcs_lock); 1415 for (i = 0; i < rctl_set_size; i++) { 1416 rctl_t *r = set->rcs_ctls[i]; 1417 1418 while (r != NULL) { 1419 rctl_val_t *rval; 1420 1421 tearoff_rewalk_list: 1422 rval = r->rc_values; 1423 1424 while (rval != NULL) { 1425 if (rval->rcv_privilege == RCPRIV_BASIC && 1426 rval->rcv_action_recipient == p) { 1427 if (r->rc_cursor == rval) 1428 r->rc_cursor = rval->rcv_next; 1429 1430 (void) rctl_val_list_delete( 1431 &r->rc_values, rval); 1432 1433 goto tearoff_rewalk_list; 1434 } 1435 1436 rval = rval->rcv_next; 1437 } 1438 1439 ASSERT(r->rc_cursor != NULL); 1440 1441 r = r->rc_next; 1442 } 1443 } 1444 1445 mutex_exit(&set->rcs_lock); 1446 } 1447 1448 static int 1449 rctl_set_find(rctl_set_t *set, rctl_hndl_t hndl, rctl_t **rctl) 1450 { 1451 uint_t index = hndl % rctl_set_size; 1452 rctl_t *curr_ctl; 1453 1454 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1455 1456 for (curr_ctl = set->rcs_ctls[index]; curr_ctl != NULL; 1457 curr_ctl = curr_ctl->rc_next) { 1458 if (curr_ctl->rc_id == hndl) { 1459 *rctl = curr_ctl; 1460 1461 return (0); 1462 } 1463 } 1464 1465 return (-1); 1466 } 1467 1468 /* 1469 * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *) 1470 * 1471 * Overview 1472 * Given a process, get the next enforced value on the rctl of the specified 1473 * handle. 1474 * 1475 * Return value 1476 * The enforced value. 1477 * 1478 * Caller's context 1479 * For controls on process collectives, p->p_lock must be held across the 1480 * operation. 1481 */ 1482 /*ARGSUSED*/ 1483 rctl_qty_t 1484 rctl_enforced_value(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p) 1485 { 1486 rctl_t *rctl; 1487 rlim64_t ret; 1488 1489 mutex_enter(&rset->rcs_lock); 1490 1491 if (rctl_set_find(rset, hndl, &rctl) == -1) 1492 panic("unknown resource control handle %d requested", hndl); 1493 else 1494 ret = rctl_model_value(rctl->rc_dict_entry, p, 1495 rctl->rc_cursor->rcv_value); 1496 1497 mutex_exit(&rset->rcs_lock); 1498 1499 return (ret); 1500 } 1501 1502 /* 1503 * int rctl_global_get(const char *, rctl_dict_entry_t *) 1504 * 1505 * Overview 1506 * Copy a sanitized version of the global rctl for a given resource control 1507 * name. (By sanitization, we mean that the unsafe data pointers have been 1508 * zeroed.) 1509 * 1510 * Return value 1511 * -1 if name not defined, 0 otherwise. 1512 * 1513 * Caller's context 1514 * No restrictions on context. rctl_dict_lock must not be held. 1515 */ 1516 int 1517 rctl_global_get(const char *name, rctl_dict_entry_t *drde) 1518 { 1519 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1520 1521 if (rde == NULL) 1522 return (-1); 1523 1524 bcopy(rde, drde, sizeof (rctl_dict_entry_t)); 1525 1526 drde->rcd_next = NULL; 1527 drde->rcd_ops = NULL; 1528 1529 return (0); 1530 } 1531 1532 /* 1533 * int rctl_global_set(const char *, rctl_dict_entry_t *) 1534 * 1535 * Overview 1536 * Transfer the settable fields of the named rctl to the global rctl matching 1537 * the given resource control name. 1538 * 1539 * Return value 1540 * -1 if name not defined, 0 otherwise. 1541 * 1542 * Caller's context 1543 * No restrictions on context. rctl_dict_lock must not be held. 1544 */ 1545 int 1546 rctl_global_set(const char *name, rctl_dict_entry_t *drde) 1547 { 1548 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1549 1550 if (rde == NULL) 1551 return (-1); 1552 1553 rde->rcd_flagaction = drde->rcd_flagaction; 1554 rde->rcd_syslog_level = drde->rcd_syslog_level; 1555 rde->rcd_strlog_flags = drde->rcd_strlog_flags; 1556 1557 return (0); 1558 } 1559 1560 static int 1561 rctl_local_op(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1562 int (*cbop)(rctl_hndl_t, struct proc *p, rctl_entity_p_t *e, rctl_t *, 1563 rctl_val_t *, rctl_val_t *), struct proc *p) 1564 { 1565 rctl_t *rctl; 1566 rctl_set_t *rset; 1567 rctl_entity_p_t e; 1568 int ret = 0; 1569 rctl_dict_entry_t *rde = rctl_dict_lookup_hndl(hndl); 1570 1571 local_op_retry: 1572 1573 ASSERT(MUTEX_HELD(&p->p_lock)); 1574 1575 rset = rctl_entity_obtain_rset(rde, p); 1576 1577 if (rset == NULL) { 1578 return (-1); 1579 } 1580 rctl_entity_obtain_entity_p(rset->rcs_entity, p, &e); 1581 1582 mutex_enter(&rset->rcs_lock); 1583 1584 /* using rctl's hndl, get rctl from local set */ 1585 if (rctl_set_find(rset, hndl, &rctl) == -1) { 1586 mutex_exit(&rset->rcs_lock); 1587 return (-1); 1588 } 1589 1590 ret = cbop(hndl, p, &e, rctl, oval, nval); 1591 1592 mutex_exit(&rset->rcs_lock); 1593 return (ret); 1594 } 1595 1596 /*ARGSUSED*/ 1597 static int 1598 rctl_local_get_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1599 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1600 { 1601 if (oval == NULL) { 1602 /* 1603 * RCTL_FIRST 1604 */ 1605 bcopy(rctl->rc_values, nval, sizeof (rctl_val_t)); 1606 } else { 1607 /* 1608 * RCTL_NEXT 1609 */ 1610 rctl_val_t *tval = rctl_val_list_find(&rctl->rc_values, oval); 1611 1612 if (tval == NULL) 1613 return (ESRCH); 1614 else if (tval->rcv_next == NULL) 1615 return (ENOENT); 1616 else 1617 bcopy(tval->rcv_next, nval, sizeof (rctl_val_t)); 1618 } 1619 1620 return (0); 1621 } 1622 1623 /* 1624 * int rctl_local_get(rctl_hndl_t, rctl_val_t *) 1625 * 1626 * Overview 1627 * Get the rctl value for the given flags. 1628 * 1629 * Return values 1630 * 0 for successful get, errno otherwise. 1631 */ 1632 int 1633 rctl_local_get(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1634 struct proc *p) 1635 { 1636 return (rctl_local_op(hndl, oval, nval, rctl_local_get_cb, p)); 1637 } 1638 1639 /*ARGSUSED*/ 1640 static int 1641 rctl_local_delete_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1642 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1643 { 1644 if ((oval = rctl_val_list_find(&rctl->rc_values, nval)) == NULL) 1645 return (ESRCH); 1646 1647 if (rctl->rc_cursor == oval) { 1648 rctl->rc_cursor = oval->rcv_next; 1649 rctl_val_list_reset(rctl->rc_cursor); 1650 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1651 rctl->rc_cursor->rcv_value)); 1652 1653 ASSERT(rctl->rc_cursor != NULL); 1654 } 1655 1656 (void) rctl_val_list_delete(&rctl->rc_values, oval); 1657 1658 return (0); 1659 } 1660 1661 /* 1662 * int rctl_local_delete(rctl_hndl_t, rctl_val_t *) 1663 * 1664 * Overview 1665 * Delete the rctl value for the given flags. 1666 * 1667 * Return values 1668 * 0 for successful delete, errno otherwise. 1669 */ 1670 int 1671 rctl_local_delete(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1672 { 1673 return (rctl_local_op(hndl, NULL, val, rctl_local_delete_cb, p)); 1674 } 1675 1676 /* 1677 * rctl_local_insert_cb() 1678 * 1679 * Overview 1680 * Insert a new value into the rctl's val list. If an error occurs, 1681 * the val list must be left in the same state as when the function 1682 * was entered. 1683 * 1684 * Return Values 1685 * 0 for successful insert, EINVAL if the value is duplicated in the 1686 * existing list. 1687 */ 1688 /*ARGSUSED*/ 1689 static int 1690 rctl_local_insert_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1691 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1692 { 1693 /* 1694 * Before inserting, confirm there are no duplicates of this value 1695 * and flag level. If there is a duplicate, flag an error and do 1696 * nothing. 1697 */ 1698 if (rctl_val_list_insert(&rctl->rc_values, nval) != 0) 1699 return (EINVAL); 1700 1701 if (rctl_val_cmp(nval, rctl->rc_cursor, 0) < 0) { 1702 rctl->rc_cursor = nval; 1703 rctl_val_list_reset(rctl->rc_cursor); 1704 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1705 rctl->rc_cursor->rcv_value)); 1706 1707 ASSERT(rctl->rc_cursor != NULL); 1708 } 1709 1710 return (0); 1711 } 1712 1713 /* 1714 * int rctl_local_insert(rctl_hndl_t, rctl_val_t *) 1715 * 1716 * Overview 1717 * Insert the rctl value into the appropriate rctl set for the calling 1718 * process, given the handle. 1719 */ 1720 int 1721 rctl_local_insert(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1722 { 1723 return (rctl_local_op(hndl, NULL, val, rctl_local_insert_cb, p)); 1724 } 1725 1726 /* 1727 * rctl_local_insert_all_cb() 1728 * 1729 * Overview 1730 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1731 * 1732 * Inserts new values from the project database (new_values). alloc_values 1733 * should be a linked list of pre-allocated rctl_val_t, which are used to 1734 * populate (rc_projdb). 1735 * 1736 * Should the *new_values linked list match the contents of the rctl's 1737 * rp_projdb then we do nothing. 1738 * 1739 * Return Values 1740 * 0 is always returned. 1741 */ 1742 /*ARGSUSED*/ 1743 static int 1744 rctl_local_insert_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1745 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1746 { 1747 rctl_val_t *val; 1748 rctl_val_t *tmp_val; 1749 rctl_val_t *next; 1750 int modified = 0; 1751 1752 /* 1753 * If this the first time we've set this project rctl, then we delete 1754 * all the privilege values. These privilege values have been set by 1755 * rctl_add_default_limit(). 1756 * 1757 * We save some cycles here by not calling rctl_val_list_delete(). 1758 */ 1759 if (rctl->rc_projdb == NULL) { 1760 val = rctl->rc_values; 1761 1762 while (val != NULL) { 1763 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1764 if (val->rcv_prev != NULL) 1765 val->rcv_prev->rcv_next = val->rcv_next; 1766 else 1767 rctl->rc_values = val->rcv_next; 1768 1769 if (val->rcv_next != NULL) 1770 val->rcv_next->rcv_prev = val->rcv_prev; 1771 1772 tmp_val = val; 1773 val = val->rcv_next; 1774 kmem_cache_free(rctl_val_cache, tmp_val); 1775 } else { 1776 val = val->rcv_next; 1777 } 1778 } 1779 modified = 1; 1780 } 1781 1782 /* 1783 * Delete active values previously set through the project database. 1784 */ 1785 val = rctl->rc_projdb; 1786 1787 while (val != NULL) { 1788 1789 /* Is the old value found in the new values? */ 1790 if (rctl_val_list_find(&new_values, val) == NULL) { 1791 1792 /* 1793 * Delete from the active values if it originated from 1794 * the project database. 1795 */ 1796 if (((tmp_val = rctl_val_list_find(&rctl->rc_values, 1797 val)) != NULL) && 1798 (tmp_val->rcv_flagaction & RCTL_LOCAL_PROJDB)) { 1799 (void) rctl_val_list_delete(&rctl->rc_values, 1800 tmp_val); 1801 } 1802 1803 tmp_val = val->rcv_next; 1804 (void) rctl_val_list_delete(&rctl->rc_projdb, val); 1805 val = tmp_val; 1806 modified = 1; 1807 1808 } else 1809 val = val->rcv_next; 1810 } 1811 1812 /* 1813 * Insert new values from the project database. 1814 */ 1815 while (new_values != NULL) { 1816 next = new_values->rcv_next; 1817 1818 /* 1819 * Insert this new value into the rc_projdb, and duplicate this 1820 * entry to the active list. 1821 */ 1822 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1823 1824 tmp_val = alloc_values->rcv_next; 1825 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1826 alloc_values->rcv_next = tmp_val; 1827 1828 if (rctl_val_list_insert(&rctl->rc_values, 1829 alloc_values) == 0) { 1830 /* inserted move alloc_values on */ 1831 alloc_values = tmp_val; 1832 modified = 1; 1833 } 1834 } else { 1835 /* 1836 * Unlike setrctl() we don't want to return an error on 1837 * a duplicate entry; we are concerned solely with 1838 * ensuring that all the values specified are set. 1839 */ 1840 kmem_cache_free(rctl_val_cache, new_values); 1841 } 1842 new_values = next; 1843 } 1844 1845 /* Teardown any unused rctl_val_t */ 1846 while (alloc_values != NULL) { 1847 tmp_val = alloc_values; 1848 alloc_values = alloc_values->rcv_next; 1849 kmem_cache_free(rctl_val_cache, tmp_val); 1850 } 1851 1852 /* Reset the cursor if rctl values have been modified */ 1853 if (modified) { 1854 rctl->rc_cursor = rctl->rc_values; 1855 rctl_val_list_reset(rctl->rc_cursor); 1856 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1857 rctl->rc_cursor->rcv_value)); 1858 } 1859 1860 return (0); 1861 } 1862 1863 int 1864 rctl_local_insert_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1865 rctl_val_t *alloc_values, struct proc *p) 1866 { 1867 return (rctl_local_op(hndl, new_values, alloc_values, 1868 rctl_local_insert_all_cb, p)); 1869 } 1870 1871 /* 1872 * rctl_local_replace_all_cb() 1873 * 1874 * Overview 1875 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1876 * 1877 * Clears the active rctl values (rc_values), and stored values from the 1878 * previous insertions from the project database (rc_projdb). 1879 * 1880 * Inserts new values from the project database (new_values). alloc_values 1881 * should be a linked list of pre-allocated rctl_val_t, which are used to 1882 * populate (rc_projdb). 1883 * 1884 * Return Values 1885 * 0 is always returned. 1886 */ 1887 /*ARGSUSED*/ 1888 static int 1889 rctl_local_replace_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1890 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1891 { 1892 rctl_val_t *val; 1893 rctl_val_t *next; 1894 rctl_val_t *tmp_val; 1895 1896 /* Delete all the privilege vaules */ 1897 val = rctl->rc_values; 1898 1899 while (val != NULL) { 1900 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1901 if (val->rcv_prev != NULL) 1902 val->rcv_prev->rcv_next = val->rcv_next; 1903 else 1904 rctl->rc_values = val->rcv_next; 1905 1906 if (val->rcv_next != NULL) 1907 val->rcv_next->rcv_prev = val->rcv_prev; 1908 1909 tmp_val = val; 1910 val = val->rcv_next; 1911 kmem_cache_free(rctl_val_cache, tmp_val); 1912 } else { 1913 val = val->rcv_next; 1914 } 1915 } 1916 1917 /* Delete the contents of rc_projdb */ 1918 val = rctl->rc_projdb; 1919 while (val != NULL) { 1920 1921 tmp_val = val; 1922 val = val->rcv_next; 1923 kmem_cache_free(rctl_val_cache, tmp_val); 1924 } 1925 rctl->rc_projdb = NULL; 1926 1927 /* 1928 * Insert new values from the project database. 1929 */ 1930 while (new_values != NULL) { 1931 next = new_values->rcv_next; 1932 1933 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1934 tmp_val = alloc_values->rcv_next; 1935 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1936 alloc_values->rcv_next = tmp_val; 1937 1938 if (rctl_val_list_insert(&rctl->rc_values, 1939 alloc_values) == 0) { 1940 /* inserted, so move alloc_values on */ 1941 alloc_values = tmp_val; 1942 } 1943 } else { 1944 /* 1945 * Unlike setrctl() we don't want to return an error on 1946 * a duplicate entry; we are concerned solely with 1947 * ensuring that all the values specified are set. 1948 */ 1949 kmem_cache_free(rctl_val_cache, new_values); 1950 } 1951 1952 new_values = next; 1953 } 1954 1955 /* Teardown any unused rctl_val_t */ 1956 while (alloc_values != NULL) { 1957 tmp_val = alloc_values; 1958 alloc_values = alloc_values->rcv_next; 1959 kmem_cache_free(rctl_val_cache, tmp_val); 1960 } 1961 1962 /* Always reset the cursor */ 1963 rctl->rc_cursor = rctl->rc_values; 1964 rctl_val_list_reset(rctl->rc_cursor); 1965 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1966 rctl->rc_cursor->rcv_value)); 1967 1968 return (0); 1969 } 1970 1971 int 1972 rctl_local_replace_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1973 rctl_val_t *alloc_values, struct proc *p) 1974 { 1975 return (rctl_local_op(hndl, new_values, alloc_values, 1976 rctl_local_replace_all_cb, p)); 1977 } 1978 1979 static int 1980 rctl_local_replace_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1981 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1982 { 1983 int ret; 1984 rctl_val_t *tmp; 1985 1986 /* Verify that old will be delete-able */ 1987 tmp = rctl_val_list_find(&rctl->rc_values, oval); 1988 if (tmp == NULL) 1989 return (ESRCH); 1990 /* 1991 * Caller should verify that value being deleted is not the 1992 * system value. 1993 */ 1994 ASSERT(tmp->rcv_privilege != RCPRIV_SYSTEM); 1995 1996 /* 1997 * rctl_local_insert_cb() does the job of flagging an error 1998 * for any duplicate values. So, call rctl_local_insert_cb() 1999 * for the new value first, then do deletion of the old value. 2000 * Since this is a callback function to rctl_local_op, we can 2001 * count on rcs_lock being held at this point. This guarantees 2002 * that there is at no point a visible list which contains both 2003 * new and old values. 2004 */ 2005 if (ret = rctl_local_insert_cb(hndl, p, e, rctl, NULL, nval)) 2006 return (ret); 2007 2008 ret = rctl_local_delete_cb(hndl, p, e, rctl, NULL, oval); 2009 ASSERT(ret == 0); 2010 return (0); 2011 } 2012 2013 /* 2014 * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *) 2015 * 2016 * Overview 2017 * Replace the rctl value with a new one. 2018 * 2019 * Return values 2020 * 0 for successful replace, errno otherwise. 2021 */ 2022 int 2023 rctl_local_replace(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 2024 struct proc *p) 2025 { 2026 return (rctl_local_op(hndl, oval, nval, rctl_local_replace_cb, p)); 2027 } 2028 2029 /* 2030 * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *) 2031 * 2032 * Overview 2033 * To support rlimit compatibility, we need a function which takes a 64-bit 2034 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2035 * This operation is only intended for legacy rlimits. 2036 */ 2037 int 2038 rctl_rlimit_get(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64) 2039 { 2040 rctl_t *rctl; 2041 rctl_val_t *rval; 2042 rctl_set_t *rset = p->p_rctls; 2043 int soft_limit_seen = 0; 2044 int test_for_deny = 1; 2045 2046 mutex_enter(&rset->rcs_lock); 2047 if (rctl_set_find(rset, rc, &rctl) == -1) { 2048 mutex_exit(&rset->rcs_lock); 2049 return (-1); 2050 } 2051 2052 rval = rctl->rc_values; 2053 2054 if (rctl->rc_dict_entry->rcd_flagaction & (RCTL_GLOBAL_DENY_NEVER | 2055 RCTL_GLOBAL_DENY_ALWAYS)) 2056 test_for_deny = 0; 2057 2058 /* 2059 * 1. Find the first control value with the RCTL_LOCAL_DENY bit set. 2060 */ 2061 while (rval != NULL && rval->rcv_privilege != RCPRIV_SYSTEM) { 2062 if (test_for_deny && 2063 (rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0) { 2064 rval = rval->rcv_next; 2065 continue; 2066 } 2067 2068 /* 2069 * 2. If this is an RCPRIV_BASIC value, then we've found the 2070 * effective soft limit and should set rlim_cur. We should then 2071 * continue looking for another control value with the DENY bit 2072 * set. 2073 */ 2074 if (rval->rcv_privilege == RCPRIV_BASIC) { 2075 if (soft_limit_seen) { 2076 rval = rval->rcv_next; 2077 continue; 2078 } 2079 2080 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2081 rval->rcv_value < rctl_model_maximum( 2082 rctl->rc_dict_entry, p)) 2083 rlp64->rlim_cur = rval->rcv_value; 2084 else 2085 rlp64->rlim_cur = RLIM64_INFINITY; 2086 soft_limit_seen = 1; 2087 2088 rval = rval->rcv_next; 2089 continue; 2090 } 2091 2092 /* 2093 * 3. This is an RCPRIV_PRIVILEGED value. If we haven't found 2094 * a soft limit candidate, then we've found the effective hard 2095 * and soft limits and should set both If we had found a soft 2096 * limit, then this is only the hard limit and we need only set 2097 * rlim_max. 2098 */ 2099 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2100 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, 2101 p)) 2102 rlp64->rlim_max = rval->rcv_value; 2103 else 2104 rlp64->rlim_max = RLIM64_INFINITY; 2105 if (!soft_limit_seen) 2106 rlp64->rlim_cur = rlp64->rlim_max; 2107 2108 mutex_exit(&rset->rcs_lock); 2109 return (0); 2110 } 2111 2112 if (rval == NULL) { 2113 /* 2114 * This control sequence is corrupt, as it is not terminated by 2115 * a system privileged control value. 2116 */ 2117 mutex_exit(&rset->rcs_lock); 2118 return (-1); 2119 } 2120 2121 /* 2122 * 4. If we run into a RCPRIV_SYSTEM value, then the hard limit (and 2123 * the soft, if we haven't a soft candidate) should be the value of the 2124 * system control value. 2125 */ 2126 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2127 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, p)) 2128 rlp64->rlim_max = rval->rcv_value; 2129 else 2130 rlp64->rlim_max = RLIM64_INFINITY; 2131 2132 if (!soft_limit_seen) 2133 rlp64->rlim_cur = rlp64->rlim_max; 2134 2135 mutex_exit(&rset->rcs_lock); 2136 return (0); 2137 } 2138 2139 /* 2140 * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t) 2141 * 2142 * Overview 2143 * Before making a series of calls to rctl_rlimit_set(), we must have a 2144 * preallocated batch of resource control values, as rctl_rlimit_set() can 2145 * potentially consume two resource control values per call. 2146 * 2147 * Return values 2148 * A populated resource control allocation group with 2n resource control 2149 * values. 2150 * 2151 * Caller's context 2152 * Must be safe for KM_SLEEP allocations. 2153 */ 2154 rctl_alloc_gp_t * 2155 rctl_rlimit_set_prealloc(uint_t n) 2156 { 2157 rctl_alloc_gp_t *gp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 2158 2159 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 2160 2161 gp->rcag_nvals = 2 * n; 2162 2163 rctl_gp_alloc(gp); 2164 2165 return (gp); 2166 } 2167 2168 /* 2169 * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int, 2170 * int) 2171 * 2172 * Overview 2173 * To support rlimit compatibility, we need a function which takes a 64-bit 2174 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2175 * This operation is only intended for legacy rlimits. 2176 * 2177 * The implementation of rctl_rlimit_set() is a bit clever, as it tries to 2178 * minimize the number of values placed on the value sequence in various 2179 * cases. Furthermore, we don't allow multiple identical privilege-action 2180 * values on the same sequence. (That is, we don't want a sequence like 2181 * "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel 2182 * memory.) So we want to delete any values with the same privilege value and 2183 * action. 2184 * 2185 * Return values 2186 * 0 for successful set, errno otherwise. Errno will be either EINVAL 2187 * or EPERM, in keeping with defined errnos for ulimit() and setrlimit() 2188 * system calls. 2189 */ 2190 /*ARGSUSED*/ 2191 int 2192 rctl_rlimit_set(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64, 2193 rctl_alloc_gp_t *ragp, int flagaction, int signal, const cred_t *cr) 2194 { 2195 rctl_t *rctl; 2196 rctl_val_t *rval, *rval_priv, *rval_basic; 2197 rctl_set_t *rset = p->p_rctls; 2198 rctl_qty_t max; 2199 rctl_entity_p_t e; 2200 struct rlimit64 cur_rl; 2201 2202 e.rcep_t = RCENTITY_PROCESS; 2203 e.rcep_p.proc = p; 2204 2205 if (rlp64->rlim_cur > rlp64->rlim_max) 2206 return (EINVAL); 2207 2208 if (rctl_rlimit_get(rc, p, &cur_rl) == -1) 2209 return (EINVAL); 2210 2211 /* 2212 * If we are not privileged, we can only lower the hard limit. 2213 */ 2214 if ((rlp64->rlim_max > cur_rl.rlim_max) && 2215 cur_rl.rlim_max != RLIM64_INFINITY && 2216 secpolicy_resource(cr) != 0) 2217 return (EPERM); 2218 2219 mutex_enter(&rset->rcs_lock); 2220 2221 if (rctl_set_find(rset, rc, &rctl) == -1) { 2222 mutex_exit(&rset->rcs_lock); 2223 return (EINVAL); 2224 } 2225 2226 rval_priv = rctl_gp_detach_val(ragp); 2227 2228 rval = rctl->rc_values; 2229 2230 while (rval != NULL) { 2231 rctl_val_t *next = rval->rcv_next; 2232 2233 if (rval->rcv_privilege == RCPRIV_SYSTEM) 2234 break; 2235 2236 if ((rval->rcv_privilege == RCPRIV_BASIC) || 2237 (rval->rcv_flagaction & ~RCTL_LOCAL_ACTION_MASK) == 2238 (flagaction & ~RCTL_LOCAL_ACTION_MASK)) { 2239 if (rctl->rc_cursor == rval) { 2240 rctl->rc_cursor = rval->rcv_next; 2241 rctl_val_list_reset(rctl->rc_cursor); 2242 RCTLOP_SET(rctl, p, &e, rctl_model_value( 2243 rctl->rc_dict_entry, p, 2244 rctl->rc_cursor->rcv_value)); 2245 } 2246 (void) rctl_val_list_delete(&rctl->rc_values, rval); 2247 } 2248 2249 rval = next; 2250 } 2251 2252 rval_priv->rcv_privilege = RCPRIV_PRIVILEGED; 2253 rval_priv->rcv_flagaction = flagaction; 2254 if (rlp64->rlim_max == RLIM64_INFINITY) { 2255 rval_priv->rcv_flagaction |= RCTL_LOCAL_MAXIMAL; 2256 max = rctl->rc_dict_entry->rcd_max_native; 2257 } else { 2258 max = rlp64->rlim_max; 2259 } 2260 rval_priv->rcv_value = max; 2261 rval_priv->rcv_action_signal = signal; 2262 rval_priv->rcv_action_recipient = NULL; 2263 rval_priv->rcv_action_recip_pid = -1; 2264 rval_priv->rcv_firing_time = 0; 2265 rval_priv->rcv_prev = rval_priv->rcv_next = NULL; 2266 2267 (void) rctl_val_list_insert(&rctl->rc_values, rval_priv); 2268 rctl->rc_cursor = rval_priv; 2269 rctl_val_list_reset(rctl->rc_cursor); 2270 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2271 rctl->rc_cursor->rcv_value)); 2272 2273 if (rlp64->rlim_cur != RLIM64_INFINITY && rlp64->rlim_cur < max) { 2274 rval_basic = rctl_gp_detach_val(ragp); 2275 2276 rval_basic->rcv_privilege = RCPRIV_BASIC; 2277 rval_basic->rcv_value = rlp64->rlim_cur; 2278 rval_basic->rcv_flagaction = flagaction; 2279 rval_basic->rcv_action_signal = signal; 2280 rval_basic->rcv_action_recipient = p; 2281 rval_basic->rcv_action_recip_pid = p->p_pid; 2282 rval_basic->rcv_firing_time = 0; 2283 rval_basic->rcv_prev = rval_basic->rcv_next = NULL; 2284 2285 (void) rctl_val_list_insert(&rctl->rc_values, rval_basic); 2286 rctl->rc_cursor = rval_basic; 2287 rctl_val_list_reset(rctl->rc_cursor); 2288 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2289 rctl->rc_cursor->rcv_value)); 2290 } 2291 2292 ASSERT(rctl->rc_cursor != NULL); 2293 2294 mutex_exit(&rset->rcs_lock); 2295 return (0); 2296 } 2297 2298 2299 /* 2300 * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t, 2301 * rlim64_t, rctl_ops_t *) 2302 * 2303 * Overview 2304 * rctl_register() performs a look-up in the dictionary of rctls 2305 * active on the system; if a rctl of that name is absent, an entry is 2306 * made into the dictionary. The rctl is returned with its reference 2307 * count incremented by one. If the rctl name already exists, we panic. 2308 * (Were the resource control system to support dynamic loading and unloading, 2309 * which it is structured for, duplicate registration should lead to load 2310 * failure instead of panicking.) 2311 * 2312 * Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be 2313 * defined. This limit contains the highest possible value for this quantity 2314 * on the system. Furthermore, the registered control must provide infinite 2315 * values for all applicable address space models supported by the operating 2316 * system. Attempts to set resource control values beyond the system limit 2317 * will fail. 2318 * 2319 * Return values 2320 * The rctl's ID. 2321 * 2322 * Caller's context 2323 * Caller must be in a context suitable for KM_SLEEP allocations. 2324 */ 2325 rctl_hndl_t 2326 rctl_register( 2327 const char *name, 2328 rctl_entity_t entity, 2329 int global_flags, 2330 rlim64_t max_native, 2331 rlim64_t max_ilp32, 2332 rctl_ops_t *ops) 2333 { 2334 rctl_t *rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 2335 rctl_val_t *rctl_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 2336 rctl_dict_entry_t *rctl_de = kmem_zalloc(sizeof (rctl_dict_entry_t), 2337 KM_SLEEP); 2338 rctl_t *old_rctl; 2339 rctl_hndl_t rhndl; 2340 int localflags; 2341 2342 ASSERT(ops != NULL); 2343 2344 bzero(rctl, sizeof (rctl_t)); 2345 bzero(rctl_val, sizeof (rctl_val_t)); 2346 2347 if (global_flags & RCTL_GLOBAL_DENY_NEVER) 2348 localflags = RCTL_LOCAL_MAXIMAL; 2349 else 2350 localflags = RCTL_LOCAL_MAXIMAL | RCTL_LOCAL_DENY; 2351 2352 rctl_val->rcv_privilege = RCPRIV_SYSTEM; 2353 rctl_val->rcv_value = max_native; 2354 rctl_val->rcv_flagaction = localflags; 2355 rctl_val->rcv_action_signal = 0; 2356 rctl_val->rcv_action_recipient = NULL; 2357 rctl_val->rcv_action_recip_pid = -1; 2358 rctl_val->rcv_firing_time = 0; 2359 rctl_val->rcv_next = NULL; 2360 rctl_val->rcv_prev = NULL; 2361 2362 rctl_de->rcd_name = (char *)name; 2363 rctl_de->rcd_default_value = rctl_val; 2364 rctl_de->rcd_max_native = max_native; 2365 rctl_de->rcd_max_ilp32 = max_ilp32; 2366 rctl_de->rcd_entity = entity; 2367 rctl_de->rcd_ops = ops; 2368 rctl_de->rcd_flagaction = global_flags; 2369 2370 rctl->rc_dict_entry = rctl_de; 2371 rctl->rc_values = rctl_val; 2372 2373 /* 2374 * 1. Take global lock, validate nonexistence of name, get ID. 2375 */ 2376 mutex_enter(&rctl_dict_lock); 2377 2378 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 2379 (mod_hash_val_t *)&rhndl) != MH_ERR_NOTFOUND) 2380 panic("duplicate registration of rctl %s", name); 2381 2382 rhndl = rctl_de->rcd_id = rctl->rc_id = 2383 (rctl_hndl_t)id_alloc(rctl_ids); 2384 2385 /* 2386 * 2. Insert name-entry pair in rctl_dict_by_name. 2387 */ 2388 if (mod_hash_insert(rctl_dict_by_name, (mod_hash_key_t)name, 2389 (mod_hash_val_t)rctl_de)) 2390 panic("unable to insert rctl dict entry for %s (%u)", name, 2391 (uint_t)rctl->rc_id); 2392 2393 /* 2394 * 3. Insert ID-rctl_t * pair in rctl_dict. 2395 */ 2396 if (mod_hash_find(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2397 (mod_hash_val_t *)&old_rctl) != MH_ERR_NOTFOUND) 2398 panic("duplicate rctl ID %u registered", rctl->rc_id); 2399 2400 if (mod_hash_insert(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2401 (mod_hash_val_t)rctl)) 2402 panic("unable to insert rctl %s/%u (%p)", name, 2403 (uint_t)rctl->rc_id, (void *)rctl); 2404 2405 /* 2406 * 3a. Insert rctl_dict_entry_t * in appropriate entity list. 2407 */ 2408 2409 mutex_enter(&rctl_lists_lock); 2410 2411 switch (entity) { 2412 case RCENTITY_ZONE: 2413 case RCENTITY_PROJECT: 2414 case RCENTITY_TASK: 2415 case RCENTITY_PROCESS: 2416 rctl_de->rcd_next = rctl_lists[entity]; 2417 rctl_lists[entity] = rctl_de; 2418 break; 2419 default: 2420 panic("registering unknown rctl entity %d (%s)", entity, 2421 name); 2422 break; 2423 } 2424 2425 mutex_exit(&rctl_lists_lock); 2426 2427 /* 2428 * 4. Drop lock. 2429 */ 2430 mutex_exit(&rctl_dict_lock); 2431 2432 return (rhndl); 2433 } 2434 2435 /* 2436 * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, 2437 * rctl_val_t *v) 2438 * 2439 * Overview 2440 * rctl_global_action() takes, in according with the flags on the rctl_dict 2441 * entry for the given control, the appropriate actions on the exceeded 2442 * control value. Additionally, rctl_global_action() updates the firing time 2443 * on the exceeded value. 2444 * 2445 * Return values 2446 * A bitmask reflecting the actions actually taken. 2447 * 2448 * Caller's context 2449 * No restrictions on context. 2450 */ 2451 /*ARGSUSED*/ 2452 static int 2453 rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v) 2454 { 2455 rctl_dict_entry_t *rde = r->rc_dict_entry; 2456 const char *pr, *en, *idstr; 2457 id_t id; 2458 enum { 2459 SUFFIX_NONE, /* id consumed directly */ 2460 SUFFIX_NUMERIC, /* id consumed in suffix */ 2461 SUFFIX_STRING /* idstr consumed in suffix */ 2462 } suffix = SUFFIX_NONE; 2463 int ret = 0; 2464 2465 v->rcv_firing_time = gethrtime(); 2466 2467 switch (v->rcv_privilege) { 2468 case RCPRIV_BASIC: 2469 pr = "basic"; 2470 break; 2471 case RCPRIV_PRIVILEGED: 2472 pr = "privileged"; 2473 break; 2474 case RCPRIV_SYSTEM: 2475 pr = "system"; 2476 break; 2477 default: 2478 pr = "unknown"; 2479 break; 2480 } 2481 2482 switch (rde->rcd_entity) { 2483 case RCENTITY_PROCESS: 2484 en = "process"; 2485 id = p->p_pid; 2486 suffix = SUFFIX_NONE; 2487 break; 2488 case RCENTITY_TASK: 2489 en = "task"; 2490 id = p->p_task->tk_tkid; 2491 suffix = SUFFIX_NUMERIC; 2492 break; 2493 case RCENTITY_PROJECT: 2494 en = "project"; 2495 id = p->p_task->tk_proj->kpj_id; 2496 suffix = SUFFIX_NUMERIC; 2497 break; 2498 case RCENTITY_ZONE: 2499 en = "zone"; 2500 idstr = p->p_zone->zone_name; 2501 suffix = SUFFIX_STRING; 2502 break; 2503 default: 2504 en = "unknown entity associated with process"; 2505 id = p->p_pid; 2506 suffix = SUFFIX_NONE; 2507 break; 2508 } 2509 2510 if (rde->rcd_flagaction & RCTL_GLOBAL_SYSLOG) { 2511 switch (suffix) { 2512 default: 2513 case SUFFIX_NONE: 2514 (void) strlog(0, 0, 0, 2515 rde->rcd_strlog_flags | log_global.lz_active, 2516 "%s rctl %s (value %llu) exceeded by %s %d.", 2517 pr, rde->rcd_name, v->rcv_value, en, id); 2518 break; 2519 case SUFFIX_NUMERIC: 2520 (void) strlog(0, 0, 0, 2521 rde->rcd_strlog_flags | log_global.lz_active, 2522 "%s rctl %s (value %llu) exceeded by process %d" 2523 " in %s %d.", 2524 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2525 en, id); 2526 break; 2527 case SUFFIX_STRING: 2528 (void) strlog(0, 0, 0, 2529 rde->rcd_strlog_flags | log_global.lz_active, 2530 "%s rctl %s (value %llu) exceeded by process %d" 2531 " in %s %s.", 2532 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2533 en, idstr); 2534 break; 2535 } 2536 } 2537 2538 if (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) 2539 ret |= RCT_DENY; 2540 2541 return (ret); 2542 } 2543 2544 static int 2545 rctl_local_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v, 2546 uint_t safety) 2547 { 2548 int ret = 0; 2549 sigqueue_t *sqp = NULL; 2550 rctl_dict_entry_t *rde = r->rc_dict_entry; 2551 int unobservable = (rde->rcd_flagaction & RCTL_GLOBAL_UNOBSERVABLE); 2552 2553 proc_t *recipient = v->rcv_action_recipient; 2554 id_t recip_pid = v->rcv_action_recip_pid; 2555 int recip_signal = v->rcv_action_signal; 2556 uint_t flagaction = v->rcv_flagaction; 2557 2558 if (safety == RCA_UNSAFE_ALL) { 2559 if (flagaction & RCTL_LOCAL_DENY) { 2560 ret |= RCT_DENY; 2561 } 2562 return (ret); 2563 } 2564 2565 if (flagaction & RCTL_LOCAL_SIGNAL) { 2566 /* 2567 * We can build a siginfo only in the case that it is 2568 * safe for us to drop p_lock. (For asynchronous 2569 * checks this is currently not true.) 2570 */ 2571 if (safety == RCA_SAFE) { 2572 mutex_exit(&rset->rcs_lock); 2573 mutex_exit(&p->p_lock); 2574 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 2575 mutex_enter(&p->p_lock); 2576 mutex_enter(&rset->rcs_lock); 2577 2578 sqp->sq_info.si_signo = recip_signal; 2579 sqp->sq_info.si_code = SI_RCTL; 2580 sqp->sq_info.si_errno = 0; 2581 sqp->sq_info.si_entity = (int)rde->rcd_entity; 2582 } 2583 2584 if (recipient == NULL || recipient == p) { 2585 ret |= RCT_SIGNAL; 2586 2587 if (sqp == NULL) { 2588 sigtoproc(p, NULL, recip_signal); 2589 } else if (p == curproc) { 2590 /* 2591 * Then this is a synchronous test and we can 2592 * direct the signal at the violating thread. 2593 */ 2594 sigaddqa(curproc, curthread, sqp); 2595 } else { 2596 sigaddqa(p, NULL, sqp); 2597 } 2598 } else if (!unobservable) { 2599 proc_t *rp; 2600 2601 mutex_exit(&rset->rcs_lock); 2602 mutex_exit(&p->p_lock); 2603 2604 mutex_enter(&pidlock); 2605 if ((rp = prfind(recip_pid)) == recipient) { 2606 /* 2607 * Recipient process is still alive, but may not 2608 * be in this task or project any longer. In 2609 * this case, the recipient's resource control 2610 * set pertinent to this control will have 2611 * changed--and we will not deliver the signal, 2612 * as the recipient process is trying to tear 2613 * itself off of its former set. 2614 */ 2615 mutex_enter(&rp->p_lock); 2616 mutex_exit(&pidlock); 2617 2618 if (rctl_entity_obtain_rset(rde, rp) == rset) { 2619 ret |= RCT_SIGNAL; 2620 2621 if (sqp == NULL) 2622 sigtoproc(rp, NULL, 2623 recip_signal); 2624 else 2625 sigaddqa(rp, NULL, sqp); 2626 } else if (sqp) { 2627 kmem_free(sqp, sizeof (sigqueue_t)); 2628 } 2629 mutex_exit(&rp->p_lock); 2630 } else { 2631 mutex_exit(&pidlock); 2632 if (sqp) 2633 kmem_free(sqp, sizeof (sigqueue_t)); 2634 } 2635 2636 mutex_enter(&p->p_lock); 2637 /* 2638 * Since we dropped p_lock, we may no longer be in the 2639 * same task or project as we were at entry. It is thus 2640 * unsafe for us to reacquire the set lock at this 2641 * point; callers of rctl_local_action() must handle 2642 * this possibility. 2643 */ 2644 ret |= RCT_LK_ABANDONED; 2645 } else if (sqp) { 2646 kmem_free(sqp, sizeof (sigqueue_t)); 2647 } 2648 } 2649 2650 if ((flagaction & RCTL_LOCAL_DENY) && 2651 (recipient == NULL || recipient == p)) { 2652 ret |= RCT_DENY; 2653 } 2654 2655 return (ret); 2656 } 2657 2658 /* 2659 * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t) 2660 * 2661 * Overview 2662 * Take the action associated with the enforced value (as defined by 2663 * rctl_get_enforced_value()) being exceeded or encountered. Possibly perform 2664 * a restricted subset of the available actions, if circumstances dictate that 2665 * we cannot safely allocate memory (for a sigqueue_t) or guarantee process 2666 * persistence across the duration of the function (an asynchronous action). 2667 * 2668 * Return values 2669 * Actions taken, according to the rctl_test bitmask. 2670 * 2671 * Caller's context 2672 * Safe to acquire rcs_lock. 2673 */ 2674 int 2675 rctl_action(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, uint_t safety) 2676 { 2677 return (rctl_action_entity(hndl, rset, p, NULL, safety)); 2678 } 2679 2680 int 2681 rctl_action_entity(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, 2682 rctl_entity_p_t *e, uint_t safety) 2683 { 2684 int ret = RCT_NONE; 2685 rctl_t *lrctl; 2686 rctl_entity_p_t e_tmp; 2687 2688 rctl_action_acquire: 2689 mutex_enter(&rset->rcs_lock); 2690 if (rctl_set_find(rset, hndl, &lrctl) == -1) { 2691 mutex_exit(&rset->rcs_lock); 2692 return (ret); 2693 } 2694 2695 if (e == NULL) { 2696 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2697 p, &e_tmp); 2698 e = &e_tmp; 2699 } 2700 2701 if ((ret & RCT_LK_ABANDONED) == 0) { 2702 ret |= rctl_global_action(lrctl, rset, p, lrctl->rc_cursor); 2703 2704 RCTLOP_ACTION(lrctl, p, e); 2705 2706 ret |= rctl_local_action(lrctl, rset, p, 2707 lrctl->rc_cursor, safety); 2708 2709 if (ret & RCT_LK_ABANDONED) 2710 goto rctl_action_acquire; 2711 } 2712 2713 ret &= ~RCT_LK_ABANDONED; 2714 2715 if (!(ret & RCT_DENY) && 2716 lrctl->rc_cursor->rcv_next != NULL) { 2717 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2718 2719 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2720 p, lrctl->rc_cursor->rcv_value)); 2721 2722 } 2723 mutex_exit(&rset->rcs_lock); 2724 2725 return (ret); 2726 } 2727 2728 /* 2729 * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t) 2730 * 2731 * Overview 2732 * Increment the resource associated with the given handle, returning zero if 2733 * the incremented value does not exceed the threshold for the current limit 2734 * on the resource. 2735 * 2736 * Return values 2737 * Actions taken, according to the rctl_test bitmask. 2738 * 2739 * Caller's context 2740 * p_lock held by caller. 2741 */ 2742 /*ARGSUSED*/ 2743 int 2744 rctl_test(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2745 rctl_qty_t incr, uint_t flags) 2746 { 2747 return (rctl_test_entity(rhndl, rset, p, NULL, incr, flags)); 2748 } 2749 2750 int 2751 rctl_test_entity(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2752 rctl_entity_p_t *e, rctl_qty_t incr, uint_t flags) 2753 { 2754 rctl_t *lrctl; 2755 int ret = RCT_NONE; 2756 rctl_entity_p_t e_tmp; 2757 if (p == &p0) { 2758 /* 2759 * We don't enforce rctls on the kernel itself. 2760 */ 2761 return (ret); 2762 } 2763 2764 rctl_test_acquire: 2765 ASSERT(MUTEX_HELD(&p->p_lock)); 2766 2767 mutex_enter(&rset->rcs_lock); 2768 2769 /* 2770 * Dereference from rctl_set. We don't enforce newly loaded controls 2771 * that haven't been set on this entity (since the only valid value is 2772 * the infinite system value). 2773 */ 2774 if (rctl_set_find(rset, rhndl, &lrctl) == -1) { 2775 mutex_exit(&rset->rcs_lock); 2776 return (ret); 2777 } 2778 2779 /* 2780 * This control is currently unenforced: maximal value on control 2781 * supporting infinitely available resource. 2782 */ 2783 if ((lrctl->rc_dict_entry->rcd_flagaction & RCTL_GLOBAL_INFINITE) && 2784 (lrctl->rc_cursor->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) { 2785 2786 mutex_exit(&rset->rcs_lock); 2787 return (ret); 2788 } 2789 2790 /* 2791 * If we have been called by rctl_test, look up the entity pointer 2792 * from the proc pointer. 2793 */ 2794 if (e == NULL) { 2795 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2796 p, &e_tmp); 2797 e = &e_tmp; 2798 } 2799 2800 /* 2801 * Get enforced rctl value and current usage. Test the increment 2802 * with the current usage against the enforced value--take action as 2803 * necessary. 2804 */ 2805 while (RCTLOP_TEST(lrctl, p, e, lrctl->rc_cursor, incr, flags)) { 2806 if ((ret & RCT_LK_ABANDONED) == 0) { 2807 ret |= rctl_global_action(lrctl, rset, p, 2808 lrctl->rc_cursor); 2809 2810 RCTLOP_ACTION(lrctl, p, e); 2811 2812 ret |= rctl_local_action(lrctl, rset, p, 2813 lrctl->rc_cursor, flags); 2814 2815 if (ret & RCT_LK_ABANDONED) 2816 goto rctl_test_acquire; 2817 } 2818 2819 ret &= ~RCT_LK_ABANDONED; 2820 2821 if ((ret & RCT_DENY) == RCT_DENY || 2822 lrctl->rc_cursor->rcv_next == NULL) { 2823 ret |= RCT_DENY; 2824 break; 2825 } 2826 2827 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2828 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2829 p, lrctl->rc_cursor->rcv_value)); 2830 } 2831 2832 mutex_exit(&rset->rcs_lock); 2833 2834 return (ret); 2835 } 2836 2837 /* 2838 * void rctl_init(void) 2839 * 2840 * Overview 2841 * Initialize the rctl subsystem, including the primoridal rctls 2842 * provided by the system. New subsystem-specific rctls should _not_ be 2843 * initialized here. (Do it in your own file.) 2844 * 2845 * Return values 2846 * None. 2847 * 2848 * Caller's context 2849 * Safe for KM_SLEEP allocations. Must be called prior to any process model 2850 * initialization. 2851 */ 2852 void 2853 rctl_init(void) 2854 { 2855 rctl_cache = kmem_cache_create("rctl_cache", sizeof (rctl_t), 2856 0, NULL, NULL, NULL, NULL, NULL, 0); 2857 rctl_val_cache = kmem_cache_create("rctl_val_cache", 2858 sizeof (rctl_val_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 2859 2860 rctl_dict = mod_hash_create_extended("rctl_dict", 2861 rctl_dict_size, mod_hash_null_keydtor, rctl_dict_val_dtor, 2862 rctl_dict_hash_by_id, NULL, rctl_dict_id_cmp, KM_SLEEP); 2863 rctl_dict_by_name = mod_hash_create_strhash( 2864 "rctl_handles_by_name", rctl_dict_size, 2865 mod_hash_null_valdtor); 2866 rctl_ids = id_space_create("rctl_ids", 1, max_rctl_hndl); 2867 bzero(rctl_lists, (RC_MAX_ENTITY + 1) * sizeof (rctl_dict_entry_t *)); 2868 2869 rctlproc_init(); 2870 } 2871 2872 /* 2873 * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2874 * int chargeproc) 2875 * 2876 * Increments the amount of locked memory on a project, and 2877 * zone. If proj is non-NULL the project must be held by the 2878 * caller; if it is NULL the proj and zone of proc_t p are used. 2879 * If chargeproc is non-zero, then the charged amount is cached 2880 * on p->p_locked_mem so that the charge can be migrated when a 2881 * process changes projects. 2882 * 2883 * Return values 2884 * 0 - success 2885 * EAGAIN - attempting to increment locked memory is denied by one 2886 * or more resource entities. 2887 */ 2888 int 2889 rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2890 int chargeproc) 2891 { 2892 kproject_t *projp; 2893 zone_t *zonep; 2894 rctl_entity_p_t e; 2895 int ret = 0; 2896 2897 ASSERT(p != NULL); 2898 ASSERT(MUTEX_HELD(&p->p_lock)); 2899 if (proj != NULL) { 2900 projp = proj; 2901 zonep = proj->kpj_zone; 2902 } else { 2903 projp = p->p_task->tk_proj; 2904 zonep = p->p_zone; 2905 } 2906 2907 mutex_enter(&zonep->zone_mem_lock); 2908 2909 e.rcep_p.proj = projp; 2910 e.rcep_t = RCENTITY_PROJECT; 2911 2912 /* check for overflow */ 2913 if ((projp->kpj_data.kpd_locked_mem + inc) < 2914 projp->kpj_data.kpd_locked_mem) { 2915 ret = EAGAIN; 2916 goto out; 2917 } 2918 if (projp->kpj_data.kpd_locked_mem + inc > 2919 projp->kpj_data.kpd_locked_mem_ctl) { 2920 if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls, 2921 p, &e, inc, 0) & RCT_DENY) { 2922 ret = EAGAIN; 2923 goto out; 2924 } 2925 } 2926 e.rcep_p.zone = zonep; 2927 e.rcep_t = RCENTITY_ZONE; 2928 2929 /* Check for overflow */ 2930 if ((zonep->zone_locked_mem + inc) < zonep->zone_locked_mem) { 2931 ret = EAGAIN; 2932 goto out; 2933 } 2934 if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) { 2935 if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls, 2936 p, &e, inc, 0) & RCT_DENY) { 2937 ret = EAGAIN; 2938 goto out; 2939 } 2940 } 2941 2942 zonep->zone_locked_mem += inc; 2943 projp->kpj_data.kpd_locked_mem += inc; 2944 if (chargeproc != 0) { 2945 p->p_locked_mem += inc; 2946 } 2947 out: 2948 mutex_exit(&zonep->zone_mem_lock); 2949 return (ret); 2950 } 2951 2952 /* 2953 * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2954 * int creditproc) 2955 * 2956 * Decrements the amount of locked memory on a project and 2957 * zone. If proj is non-NULL the project must be held by the 2958 * caller; if it is NULL the proj and zone of proc_t p are used. 2959 * If creditproc is non-zero, then the quantity of locked memory 2960 * is subtracted from p->p_locked_mem. 2961 * 2962 * Return values 2963 * none 2964 */ 2965 void 2966 rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2967 int creditproc) 2968 { 2969 kproject_t *projp; 2970 zone_t *zonep; 2971 2972 if (proj != NULL) { 2973 projp = proj; 2974 zonep = proj->kpj_zone; 2975 } else { 2976 ASSERT(p != NULL); 2977 ASSERT(MUTEX_HELD(&p->p_lock)); 2978 projp = p->p_task->tk_proj; 2979 zonep = p->p_zone; 2980 } 2981 2982 mutex_enter(&zonep->zone_mem_lock); 2983 zonep->zone_locked_mem -= inc; 2984 projp->kpj_data.kpd_locked_mem -= inc; 2985 if (creditproc != 0) { 2986 ASSERT(p != NULL); 2987 ASSERT(MUTEX_HELD(&p->p_lock)); 2988 p->p_locked_mem -= inc; 2989 } 2990 mutex_exit(&zonep->zone_mem_lock); 2991 } 2992 2993 /* 2994 * rctl_incr_swap(proc_t *, zone_t *, size_t) 2995 * 2996 * Overview 2997 * Increments the swap charge on the specified zone. 2998 * 2999 * Return values 3000 * 0 on success. EAGAIN if swap increment fails due an rctl value 3001 * on the zone. 3002 * 3003 * Callers context 3004 * p_lock held on specified proc. 3005 * swap must be even multiple of PAGESIZE 3006 */ 3007 int 3008 rctl_incr_swap(proc_t *proc, zone_t *zone, size_t swap) 3009 { 3010 rctl_entity_p_t e; 3011 3012 ASSERT(MUTEX_HELD(&proc->p_lock)); 3013 ASSERT((swap & PAGEOFFSET) == 0); 3014 e.rcep_p.zone = zone; 3015 e.rcep_t = RCENTITY_ZONE; 3016 3017 mutex_enter(&zone->zone_mem_lock); 3018 3019 /* Check for overflow */ 3020 if ((zone->zone_max_swap + swap) < zone->zone_max_swap) { 3021 mutex_exit(&zone->zone_mem_lock); 3022 return (EAGAIN); 3023 } 3024 if ((zone->zone_max_swap + swap) > 3025 zone->zone_max_swap_ctl) { 3026 3027 if (rctl_test_entity(rc_zone_max_swap, zone->zone_rctls, 3028 proc, &e, swap, 0) & RCT_DENY) { 3029 mutex_exit(&zone->zone_mem_lock); 3030 return (EAGAIN); 3031 } 3032 } 3033 zone->zone_max_swap += swap; 3034 mutex_exit(&zone->zone_mem_lock); 3035 return (0); 3036 } 3037 3038 /* 3039 * rctl_decr_swap(zone_t *, size_t) 3040 * 3041 * Overview 3042 * Decrements the swap charge on the specified zone. 3043 * 3044 * Return values 3045 * None 3046 * 3047 * Callers context 3048 * swap must be even multiple of PAGESIZE 3049 */ 3050 void 3051 rctl_decr_swap(zone_t *zone, size_t swap) 3052 { 3053 ASSERT((swap & PAGEOFFSET) == 0); 3054 mutex_enter(&zone->zone_mem_lock); 3055 ASSERT(zone->zone_max_swap >= swap); 3056 zone->zone_max_swap -= swap; 3057 mutex_exit(&zone->zone_mem_lock); 3058 } 3059 3060 /* 3061 * Create resource kstat 3062 */ 3063 static kstat_t * 3064 rctl_kstat_create_common(char *ks_name, int ks_instance, char *ks_class, 3065 uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, int ks_zoneid) 3066 { 3067 kstat_t *ksp = NULL; 3068 char name[KSTAT_STRLEN]; 3069 3070 (void) snprintf(name, KSTAT_STRLEN, "%s_%d", ks_name, ks_instance); 3071 3072 if ((ksp = kstat_create_zone("caps", ks_zoneid, 3073 name, ks_class, ks_type, 3074 ks_ndata, ks_flags, ks_zoneid)) != NULL) { 3075 if (ks_zoneid != GLOBAL_ZONEID) 3076 kstat_zone_add(ksp, GLOBAL_ZONEID); 3077 } 3078 return (ksp); 3079 } 3080 3081 /* 3082 * Create zone-specific resource kstat 3083 */ 3084 kstat_t * 3085 rctl_kstat_create_zone(zone_t *zone, char *ks_name, uchar_t ks_type, 3086 uint_t ks_ndata, uchar_t ks_flags) 3087 { 3088 char name[KSTAT_STRLEN]; 3089 3090 (void) snprintf(name, KSTAT_STRLEN, "%s_zone", ks_name); 3091 3092 return (rctl_kstat_create_common(name, zone->zone_id, "zone_caps", 3093 ks_type, ks_ndata, ks_flags, zone->zone_id)); 3094 } 3095 3096 /* 3097 * Create project-specific resource kstat 3098 */ 3099 kstat_t * 3100 rctl_kstat_create_project(kproject_t *kpj, char *ks_name, uchar_t ks_type, 3101 uint_t ks_ndata, uchar_t ks_flags) 3102 { 3103 char name[KSTAT_STRLEN]; 3104 3105 (void) snprintf(name, KSTAT_STRLEN, "%s_project", ks_name); 3106 3107 return (rctl_kstat_create_common(name, kpj->kpj_id, "project_caps", 3108 ks_type, ks_ndata, ks_flags, kpj->kpj_zoneid)); 3109 } 3110