1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/atomic.h> 26 #include <sys/cmn_err.h> 27 #include <sys/id_space.h> 28 #include <sys/kmem.h> 29 #include <sys/kstat.h> 30 #include <sys/log.h> 31 #include <sys/modctl.h> 32 #include <sys/modhash.h> 33 #include <sys/mutex.h> 34 #include <sys/proc.h> 35 #include <sys/procset.h> 36 #include <sys/project.h> 37 #include <sys/resource.h> 38 #include <sys/rctl.h> 39 #include <sys/siginfo.h> 40 #include <sys/strlog.h> 41 #include <sys/systm.h> 42 #include <sys/task.h> 43 #include <sys/types.h> 44 #include <sys/policy.h> 45 #include <sys/zone.h> 46 47 /* 48 * Resource controls (rctls) 49 * 50 * The rctl subsystem provides a mechanism for kernel components to 51 * register their individual resource controls with the system as a whole, 52 * such that those controls can subscribe to specific actions while being 53 * associated with the various process-model entities provided by the kernel: 54 * the process, the task, the project, and the zone. (In principle, only 55 * minor modifications would be required to connect the resource control 56 * functionality to non-process-model entities associated with the system.) 57 * 58 * Subsystems register their rctls via rctl_register(). Subsystems 59 * also wishing to provide additional limits on a given rctl can modify 60 * them once they have the rctl handle. Each subsystem should store the 61 * handle to their rctl for direct access. 62 * 63 * A primary dictionary, rctl_dict, contains a hash of id to the default 64 * control definition for each controlled resource-entity pair on the system. 65 * A secondary dictionary, rctl_dict_by_name, contains a hash of name to 66 * resource control handles. The resource control handles are distributed by 67 * the rctl_ids ID space. The handles are private and not to be 68 * advertised to userland; all userland interactions are via the rctl 69 * names. 70 * 71 * Entities inherit their rctls from their predecessor. Since projects have 72 * no ancestor, they inherit their rctls from the rctl dict for project 73 * rctls. It is expected that project controls will be set to their 74 * appropriate values shortly after project creation, presumably from a 75 * policy source such as the project database. 76 * 77 * Data structures 78 * The rctl_set_t attached to each of the process model entities is a simple 79 * hash table keyed on the rctl handle assigned at registration. The entries 80 * in the hash table are rctl_t's, whose relationship with the active control 81 * values on that resource and with the global state of the resource we 82 * illustrate below: 83 * 84 * rctl_dict[key] --> rctl_dict_entry 85 * ^ 86 * | 87 * +--+---+ 88 * rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL 89 * +--+---+ ^ 90 * | | 91 * +------- cursor ------+ 92 * 93 * That is, the rctl contains a back pointer to the global resource control 94 * state for this resource, which is also available in the rctl_dict hash 95 * table mentioned earlier. The rctl contains two pointers to resource 96 * control values: one, values, indicates the entire sequence of control 97 * values; the other, cursor, indicates the currently active control 98 * value--the next value to be enforced. The value list itself is an open, 99 * doubly-linked list, the last non-NULL member of which is the system value 100 * for that resource (being the theoretical/conventional maximum allowable 101 * value for the resource on this OS instance). 102 * 103 * Ops Vector 104 * Subsystems publishing rctls need not provide instances of all of the 105 * functions specified by the ops vector. In particular, if general 106 * rctl_*() entry points are not being called, certain functions can be 107 * omitted. These align as follows: 108 * 109 * rctl_set() 110 * You may wish to provide a set callback if locking circumstances prevent 111 * it or if the performance cost of requesting the enforced value from the 112 * resource control is prohibitively expensive. For instance, the currently 113 * enforced file size limit is stored on the process in the p_fsz_ctl to 114 * maintain read()/write() performance. 115 * 116 * rctl_test() 117 * You must provide a test callback if you are using the rctl_test() 118 * interface. An action callback is optional. 119 * 120 * rctl_action() 121 * You may wish to provide an action callback. 122 * 123 * Registration 124 * New resource controls can be added to a running instance by loaded modules 125 * via registration. (The current implementation does not support unloadable 126 * modules; this functionality can be added if needed, via an 127 * activation/deactivation interface involving the manipulation of the 128 * ops vector for the resource control(s) needing to support unloading.) 129 * 130 * Control value ordering 131 * Because the rctl_val chain on each rctl must be navigable in a 132 * deterministic way, we have to define an ordering on the rctl_val_t's. The 133 * defined order is (flags & [maximal], value, flags & [deny-action], 134 * privilege). 135 * 136 * Locking 137 * rctl_dict_lock must be acquired prior to rctl_lists_lock. Since 138 * rctl_dict_lock or rctl_lists_lock can be called at the enforcement point 139 * of any subsystem, holding subsystem locks, it is at all times inappropriate 140 * to call kmem_alloc(., KM_SLEEP) while holding either of these locks. 141 * Traversing any of the various resource control entity lists requires 142 * holding rctl_lists_lock. 143 * 144 * Each individual resource control set associated with an entity must have 145 * its rcs_lock held for the duration of any operations that would add 146 * resource controls or control values to the set. 147 * 148 * The locking subsequence of interest is: p_lock, rctl_dict_lock, 149 * rctl_lists_lock, entity->rcs_lock. 150 * 151 * The projects(4) database and project entity resource controls 152 * A special case is made for RCENTITY_PROJECT values set through the 153 * setproject(3PROJECT) interface. setproject() makes use of a private 154 * interface, setprojrctl(), which passes through an array of resource control 155 * blocks that need to be set while holding the entity->rcs_lock. This 156 * ensures that the act of modifying a project's resource controls is 157 * "atomic" within the kernel. 158 * 159 * Within the rctl sub-system, we provide two interfaces that are only used by 160 * the setprojrctl() code path - rctl_local_insert_all() and 161 * rctl_local_replace_all(). rctl_local_insert_all() will ensure that the 162 * resource values specified in *new_values are applied. 163 * rctl_local_replace_all() will purge the current rctl->rc_projdb and 164 * rctl->rc_values entries, and apply the *new_values. 165 * 166 * These functions modify not only the linked list of active resource controls 167 * (rctl->rc_values), but also a "cached" linked list (rctl->rc_projdb) of 168 * values set through these interfaces. To clarify: 169 * 170 * rctl->rc_values - a linked list of rctl_val_t. These are the active 171 * resource values associated with this rctl, and may have been set by 172 * setrctl() - via prctl(1M), or by setprojrctl() - via 173 * setproject(3PROJECT). 174 * 175 * rctl->rc_projdb - a linked list of rctl_val_t. These reflect the 176 * resource values set by the setprojrctl() code path. rc_projdb is not 177 * referenced by any other component of the rctl sub-system. 178 * 179 * As various locks are held when calling these functions, we ensure that all 180 * the possible memory allocations are performed prior to calling the 181 * function. *alloc_values is a linked list of uninitialized rctl_val_t, 182 * which may be used to duplicate a new resource control value (passed in as 183 * one of the members of the *new_values linked list), in order to populate 184 * rctl->rc_values. 185 */ 186 187 id_t max_rctl_hndl = 32768; 188 int rctl_dict_size = 64; 189 int rctl_set_size = 8; 190 kmutex_t rctl_dict_lock; 191 mod_hash_t *rctl_dict; 192 mod_hash_t *rctl_dict_by_name; 193 id_space_t *rctl_ids; 194 kmem_cache_t *rctl_cache; /* kmem cache for rctl structures */ 195 kmem_cache_t *rctl_val_cache; /* kmem cache for rctl values */ 196 197 kmutex_t rctl_lists_lock; 198 rctl_dict_entry_t *rctl_lists[RC_MAX_ENTITY + 1]; 199 200 /* 201 * Default resource control operations and ops vector 202 * To be used if the particular rcontrol has no specific actions defined, or 203 * if the subsystem providing the control is quiescing (in preparation for 204 * unloading, presumably.) 205 * 206 * Resource controls with callbacks should fill the unused operations with the 207 * appropriate default impotent callback. 208 */ 209 /*ARGSUSED*/ 210 void 211 rcop_no_action(struct rctl *r, struct proc *p, rctl_entity_p_t *e) 212 { 213 } 214 215 /*ARGSUSED*/ 216 rctl_qty_t 217 rcop_no_usage(struct rctl *r, struct proc *p) 218 { 219 return (0); 220 } 221 222 /*ARGSUSED*/ 223 int 224 rcop_no_set(struct rctl *r, struct proc *p, rctl_entity_p_t *e, rctl_qty_t l) 225 { 226 return (0); 227 } 228 229 /*ARGSUSED*/ 230 int 231 rcop_no_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 232 struct rctl_val *rv, rctl_qty_t i, uint_t f) 233 { 234 return (0); 235 } 236 237 rctl_ops_t rctl_default_ops = { 238 rcop_no_action, 239 rcop_no_usage, 240 rcop_no_set, 241 rcop_no_test 242 }; 243 244 /* 245 * Default "absolute" resource control operation and ops vector 246 * Useful if there is no usage associated with the 247 * resource control. 248 */ 249 /*ARGSUSED*/ 250 int 251 rcop_absolute_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 252 struct rctl_val *rv, rctl_qty_t i, uint_t f) 253 { 254 return (i > rv->rcv_value); 255 } 256 257 rctl_ops_t rctl_absolute_ops = { 258 rcop_no_action, 259 rcop_no_usage, 260 rcop_no_set, 261 rcop_absolute_test 262 }; 263 264 /*ARGSUSED*/ 265 static uint_t 266 rctl_dict_hash_by_id(void *hash_data, mod_hash_key_t key) 267 { 268 return ((uint_t)(uintptr_t)key % rctl_dict_size); 269 } 270 271 static int 272 rctl_dict_id_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 273 { 274 uint_t u1 = (uint_t)(uintptr_t)key1; 275 uint_t u2 = (uint_t)(uintptr_t)key2; 276 277 if (u1 > u2) 278 return (1); 279 280 if (u1 == u2) 281 return (0); 282 283 return (-1); 284 } 285 286 static void 287 rctl_dict_val_dtor(mod_hash_val_t val) 288 { 289 rctl_dict_entry_t *kr = (rctl_dict_entry_t *)val; 290 291 kmem_free(kr, sizeof (rctl_dict_entry_t)); 292 } 293 294 /* 295 * size_t rctl_build_name_buf() 296 * 297 * Overview 298 * rctl_build_name_buf() walks all active resource controls in the dictionary, 299 * building a buffer of continguous NUL-terminated strings. 300 * 301 * Return values 302 * The size of the buffer is returned, the passed pointer's contents are 303 * modified to that of the location of the buffer. 304 * 305 * Caller's context 306 * Caller must be in a context suitable for KM_SLEEP allocations. 307 */ 308 size_t 309 rctl_build_name_buf(char **rbufp) 310 { 311 size_t req_size, cpy_size; 312 char *rbufloc; 313 int i; 314 315 rctl_rebuild_name_buf: 316 req_size = cpy_size = 0; 317 318 /* 319 * Calculate needed buffer length. 320 */ 321 mutex_enter(&rctl_lists_lock); 322 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 323 rctl_dict_entry_t *rde; 324 325 for (rde = rctl_lists[i]; 326 rde != NULL; 327 rde = rde->rcd_next) 328 req_size += strlen(rde->rcd_name) + 1; 329 } 330 mutex_exit(&rctl_lists_lock); 331 332 rbufloc = *rbufp = kmem_alloc(req_size, KM_SLEEP); 333 334 /* 335 * Copy rctl names into our buffer. If the copy length exceeds the 336 * allocate length (due to registration changes), stop copying, free the 337 * buffer, and start again. 338 */ 339 mutex_enter(&rctl_lists_lock); 340 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 341 rctl_dict_entry_t *rde; 342 343 for (rde = rctl_lists[i]; 344 rde != NULL; 345 rde = rde->rcd_next) { 346 size_t length = strlen(rde->rcd_name) + 1; 347 348 cpy_size += length; 349 350 if (cpy_size > req_size) { 351 kmem_free(*rbufp, req_size); 352 mutex_exit(&rctl_lists_lock); 353 goto rctl_rebuild_name_buf; 354 } 355 356 bcopy(rde->rcd_name, rbufloc, length); 357 rbufloc += length; 358 } 359 } 360 mutex_exit(&rctl_lists_lock); 361 362 return (req_size); 363 } 364 365 /* 366 * rctl_dict_entry_t *rctl_dict_lookup(const char *) 367 * 368 * Overview 369 * rctl_dict_lookup() returns the resource control dictionary entry for the 370 * named resource control. 371 * 372 * Return values 373 * A pointer to the appropriate resource control dictionary entry, or NULL if 374 * no such named entry exists. 375 * 376 * Caller's context 377 * Caller must not be holding rctl_dict_lock. 378 */ 379 rctl_dict_entry_t * 380 rctl_dict_lookup(const char *name) 381 { 382 rctl_dict_entry_t *rde; 383 384 mutex_enter(&rctl_dict_lock); 385 386 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 387 (mod_hash_val_t *)&rde) == MH_ERR_NOTFOUND) { 388 mutex_exit(&rctl_dict_lock); 389 return (NULL); 390 } 391 392 mutex_exit(&rctl_dict_lock); 393 394 return (rde); 395 } 396 397 /* 398 * rctl_hndl_t rctl_hndl_lookup(const char *) 399 * 400 * Overview 401 * rctl_hndl_lookup() returns the resource control id (the "handle") for the 402 * named resource control. 403 * 404 * Return values 405 * The appropriate id, or -1 if no such named entry exists. 406 * 407 * Caller's context 408 * Caller must not be holding rctl_dict_lock. 409 */ 410 rctl_hndl_t 411 rctl_hndl_lookup(const char *name) 412 { 413 rctl_dict_entry_t *rde; 414 415 if ((rde = rctl_dict_lookup(name)) == NULL) 416 return (-1); 417 418 return (rde->rcd_id); 419 } 420 421 /* 422 * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t) 423 * 424 * Overview 425 * rctl_dict_lookup_hndl() completes the public lookup functions, by returning 426 * the resource control dictionary entry matching a given resource control id. 427 * 428 * Return values 429 * A pointer to the matching resource control dictionary entry, or NULL if the 430 * id does not match any existing entries. 431 * 432 * Caller's context 433 * Caller must not be holding rctl_lists_lock. 434 */ 435 rctl_dict_entry_t * 436 rctl_dict_lookup_hndl(rctl_hndl_t hndl) 437 { 438 uint_t i; 439 440 mutex_enter(&rctl_lists_lock); 441 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 442 rctl_dict_entry_t *rde; 443 444 for (rde = rctl_lists[i]; 445 rde != NULL; 446 rde = rde->rcd_next) 447 if (rde->rcd_id == hndl) { 448 mutex_exit(&rctl_lists_lock); 449 return (rde); 450 } 451 } 452 mutex_exit(&rctl_lists_lock); 453 454 return (NULL); 455 } 456 457 /* 458 * void rctl_add_default_limit(const char *name, rctl_qty_t value, 459 * rctl_priv_t privilege, uint_t action) 460 * 461 * Overview 462 * Create a default limit with specified value, privilege, and action. 463 * 464 * Return value 465 * No value returned. 466 */ 467 void 468 rctl_add_default_limit(const char *name, rctl_qty_t value, 469 rctl_priv_t privilege, uint_t action) 470 { 471 rctl_val_t *dval; 472 rctl_dict_entry_t *rde; 473 474 dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 475 bzero(dval, sizeof (rctl_val_t)); 476 dval->rcv_value = value; 477 dval->rcv_privilege = privilege; 478 dval->rcv_flagaction = action; 479 dval->rcv_action_recip_pid = -1; 480 481 rde = rctl_dict_lookup(name); 482 (void) rctl_val_list_insert(&rde->rcd_default_value, dval); 483 } 484 485 /* 486 * void rctl_add_legacy_limit(const char *name, const char *mname, 487 * const char *lname, rctl_qty_t dflt) 488 * 489 * Overview 490 * Create a default privileged limit, using the value obtained from 491 * /etc/system if it exists and is greater than the specified default 492 * value. Exists primarily for System V IPC. 493 * 494 * Return value 495 * No value returned. 496 */ 497 void 498 rctl_add_legacy_limit(const char *name, const char *mname, const char *lname, 499 rctl_qty_t dflt, rctl_qty_t max) 500 { 501 rctl_qty_t qty; 502 503 if (!mod_sysvar(mname, lname, &qty) || (qty < dflt)) 504 qty = dflt; 505 506 if (qty > max) 507 qty = max; 508 509 rctl_add_default_limit(name, qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY); 510 } 511 512 rctl_set_t * 513 rctl_entity_obtain_rset(rctl_dict_entry_t *rcd, struct proc *p) 514 { 515 rctl_set_t *rset = NULL; 516 517 if (rcd == NULL) 518 return (NULL); 519 520 switch (rcd->rcd_entity) { 521 case RCENTITY_PROCESS: 522 rset = p->p_rctls; 523 break; 524 case RCENTITY_TASK: 525 ASSERT(MUTEX_HELD(&p->p_lock)); 526 if (p->p_task != NULL) 527 rset = p->p_task->tk_rctls; 528 break; 529 case RCENTITY_PROJECT: 530 ASSERT(MUTEX_HELD(&p->p_lock)); 531 if (p->p_task != NULL && 532 p->p_task->tk_proj != NULL) 533 rset = p->p_task->tk_proj->kpj_rctls; 534 break; 535 case RCENTITY_ZONE: 536 ASSERT(MUTEX_HELD(&p->p_lock)); 537 if (p->p_zone != NULL) 538 rset = p->p_zone->zone_rctls; 539 break; 540 default: 541 panic("unknown rctl entity type %d seen", rcd->rcd_entity); 542 break; 543 } 544 545 return (rset); 546 } 547 548 static void 549 rctl_entity_obtain_entity_p(rctl_entity_t entity, struct proc *p, 550 rctl_entity_p_t *e) 551 { 552 e->rcep_p.proc = NULL; 553 e->rcep_t = entity; 554 555 switch (entity) { 556 case RCENTITY_PROCESS: 557 e->rcep_p.proc = p; 558 break; 559 case RCENTITY_TASK: 560 ASSERT(MUTEX_HELD(&p->p_lock)); 561 if (p->p_task != NULL) 562 e->rcep_p.task = p->p_task; 563 break; 564 case RCENTITY_PROJECT: 565 ASSERT(MUTEX_HELD(&p->p_lock)); 566 if (p->p_task != NULL && 567 p->p_task->tk_proj != NULL) 568 e->rcep_p.proj = p->p_task->tk_proj; 569 break; 570 case RCENTITY_ZONE: 571 ASSERT(MUTEX_HELD(&p->p_lock)); 572 if (p->p_zone != NULL) 573 e->rcep_p.zone = p->p_zone; 574 break; 575 default: 576 panic("unknown rctl entity type %d seen", entity); 577 break; 578 } 579 } 580 581 static void 582 rctl_gp_alloc(rctl_alloc_gp_t *rcgp) 583 { 584 uint_t i; 585 586 if (rcgp->rcag_nctls > 0) { 587 rctl_t *prev = kmem_cache_alloc(rctl_cache, KM_SLEEP); 588 rctl_t *rctl = prev; 589 590 rcgp->rcag_ctls = prev; 591 592 for (i = 1; i < rcgp->rcag_nctls; i++) { 593 rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 594 prev->rc_next = rctl; 595 prev = rctl; 596 } 597 598 rctl->rc_next = NULL; 599 } 600 601 if (rcgp->rcag_nvals > 0) { 602 rctl_val_t *prev = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 603 rctl_val_t *rval = prev; 604 605 rcgp->rcag_vals = prev; 606 607 for (i = 1; i < rcgp->rcag_nvals; i++) { 608 rval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 609 prev->rcv_next = rval; 610 prev = rval; 611 } 612 613 rval->rcv_next = NULL; 614 } 615 616 } 617 618 static rctl_val_t * 619 rctl_gp_detach_val(rctl_alloc_gp_t *rcgp) 620 { 621 rctl_val_t *rval = rcgp->rcag_vals; 622 623 ASSERT(rcgp->rcag_nvals > 0); 624 rcgp->rcag_nvals--; 625 rcgp->rcag_vals = rval->rcv_next; 626 627 rval->rcv_next = NULL; 628 629 return (rval); 630 } 631 632 static rctl_t * 633 rctl_gp_detach_ctl(rctl_alloc_gp_t *rcgp) 634 { 635 rctl_t *rctl = rcgp->rcag_ctls; 636 637 ASSERT(rcgp->rcag_nctls > 0); 638 rcgp->rcag_nctls--; 639 rcgp->rcag_ctls = rctl->rc_next; 640 641 rctl->rc_next = NULL; 642 643 return (rctl); 644 645 } 646 647 static void 648 rctl_gp_free(rctl_alloc_gp_t *rcgp) 649 { 650 rctl_val_t *rval = rcgp->rcag_vals; 651 rctl_t *rctl = rcgp->rcag_ctls; 652 653 while (rval != NULL) { 654 rctl_val_t *next = rval->rcv_next; 655 656 kmem_cache_free(rctl_val_cache, rval); 657 rval = next; 658 } 659 660 while (rctl != NULL) { 661 rctl_t *next = rctl->rc_next; 662 663 kmem_cache_free(rctl_cache, rctl); 664 rctl = next; 665 } 666 } 667 668 /* 669 * void rctl_prealloc_destroy(rctl_alloc_gp_t *) 670 * 671 * Overview 672 * Release all unused memory allocated via one of the "prealloc" functions: 673 * rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc. 674 * 675 * Return values 676 * None. 677 * 678 * Caller's context 679 * No restrictions on context. 680 */ 681 void 682 rctl_prealloc_destroy(rctl_alloc_gp_t *gp) 683 { 684 rctl_gp_free(gp); 685 kmem_free(gp, sizeof (rctl_alloc_gp_t)); 686 } 687 688 /* 689 * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int) 690 * 691 * Overview 692 * This function defines an ordering to rctl_val_t's in order to allow 693 * for correct placement in value lists. When the imprecise flag is set, 694 * the action recipient is ignored. This is to facilitate insert, 695 * delete, and replace operations by rctlsys. 696 * 697 * Return values 698 * 0 if the val_t's are are considered identical 699 * -1 if a is ordered lower than b 700 * 1 if a is lowered higher than b 701 * 702 * Caller's context 703 * No restrictions on context. 704 */ 705 int 706 rctl_val_cmp(rctl_val_t *a, rctl_val_t *b, int imprecise) 707 { 708 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) < 709 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 710 return (-1); 711 712 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) > 713 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 714 return (1); 715 716 if (a->rcv_value < b->rcv_value) 717 return (-1); 718 719 if (a->rcv_value > b->rcv_value) 720 return (1); 721 722 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) < 723 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 724 return (-1); 725 726 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) > 727 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 728 return (1); 729 730 if (a->rcv_privilege < b->rcv_privilege) 731 return (-1); 732 733 if (a->rcv_privilege > b->rcv_privilege) 734 return (1); 735 736 if (imprecise) 737 return (0); 738 739 if (a->rcv_action_recip_pid < b->rcv_action_recip_pid) 740 return (-1); 741 742 if (a->rcv_action_recip_pid > b->rcv_action_recip_pid) 743 return (1); 744 745 return (0); 746 } 747 748 static rctl_val_t * 749 rctl_val_list_find(rctl_val_t **head, rctl_val_t *cval) 750 { 751 rctl_val_t *rval = *head; 752 753 while (rval != NULL) { 754 if (rctl_val_cmp(cval, rval, 0) == 0) 755 return (rval); 756 757 rval = rval->rcv_next; 758 } 759 760 return (NULL); 761 762 } 763 764 /* 765 * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *) 766 * 767 * Overview 768 * This function inserts the rctl_val_t into the value list provided. 769 * The insert is always successful unless if the value is a duplicate 770 * of one already in the list. 771 * 772 * Return values 773 * 1 if the value was a duplicate of an existing value in the list. 774 * 0 if the insert was successful. 775 */ 776 int 777 rctl_val_list_insert(rctl_val_t **root, rctl_val_t *rval) 778 { 779 rctl_val_t *prev; 780 int equiv; 781 782 rval->rcv_next = NULL; 783 rval->rcv_prev = NULL; 784 785 if (*root == NULL) { 786 *root = rval; 787 return (0); 788 } 789 790 equiv = rctl_val_cmp(rval, *root, 0); 791 792 if (equiv == 0) 793 return (1); 794 795 if (equiv < 0) { 796 rval->rcv_next = *root; 797 rval->rcv_next->rcv_prev = rval; 798 *root = rval; 799 800 return (0); 801 } 802 803 prev = *root; 804 while (prev->rcv_next != NULL && 805 (equiv = rctl_val_cmp(rval, prev->rcv_next, 0)) > 0) { 806 prev = prev->rcv_next; 807 } 808 809 if (equiv == 0) 810 return (1); 811 812 rval->rcv_next = prev->rcv_next; 813 if (rval->rcv_next != NULL) 814 rval->rcv_next->rcv_prev = rval; 815 prev->rcv_next = rval; 816 rval->rcv_prev = prev; 817 818 return (0); 819 } 820 821 static int 822 rctl_val_list_delete(rctl_val_t **root, rctl_val_t *rval) 823 { 824 rctl_val_t *prev; 825 826 if (*root == NULL) 827 return (-1); 828 829 prev = *root; 830 if (rctl_val_cmp(rval, prev, 0) == 0) { 831 *root = prev->rcv_next; 832 if (*root != NULL) 833 (*root)->rcv_prev = NULL; 834 835 kmem_cache_free(rctl_val_cache, prev); 836 837 return (0); 838 } 839 840 while (prev->rcv_next != NULL && 841 rctl_val_cmp(rval, prev->rcv_next, 0) != 0) { 842 prev = prev->rcv_next; 843 } 844 845 if (prev->rcv_next == NULL) { 846 /* 847 * If we navigate the entire list and cannot find a match, then 848 * return failure. 849 */ 850 return (-1); 851 } 852 853 prev = prev->rcv_next; 854 prev->rcv_prev->rcv_next = prev->rcv_next; 855 if (prev->rcv_next != NULL) 856 prev->rcv_next->rcv_prev = prev->rcv_prev; 857 858 kmem_cache_free(rctl_val_cache, prev); 859 860 return (0); 861 } 862 863 static rctl_val_t * 864 rctl_val_list_dup(rctl_val_t *rval, rctl_alloc_gp_t *ragp, struct proc *oldp, 865 struct proc *newp) 866 { 867 rctl_val_t *head = NULL; 868 869 for (; rval != NULL; rval = rval->rcv_next) { 870 rctl_val_t *dval = rctl_gp_detach_val(ragp); 871 872 bcopy(rval, dval, sizeof (rctl_val_t)); 873 dval->rcv_prev = dval->rcv_next = NULL; 874 875 if (oldp == NULL || 876 rval->rcv_action_recipient == NULL || 877 rval->rcv_action_recipient == oldp) { 878 if (rval->rcv_privilege == RCPRIV_BASIC) { 879 dval->rcv_action_recipient = newp; 880 dval->rcv_action_recip_pid = newp->p_pid; 881 } else { 882 dval->rcv_action_recipient = NULL; 883 dval->rcv_action_recip_pid = -1; 884 } 885 886 (void) rctl_val_list_insert(&head, dval); 887 } else { 888 kmem_cache_free(rctl_val_cache, dval); 889 } 890 } 891 892 return (head); 893 } 894 895 static void 896 rctl_val_list_reset(rctl_val_t *rval) 897 { 898 for (; rval != NULL; rval = rval->rcv_next) 899 rval->rcv_firing_time = 0; 900 } 901 902 static uint_t 903 rctl_val_list_count(rctl_val_t *rval) 904 { 905 uint_t n = 0; 906 907 for (; rval != NULL; rval = rval->rcv_next) 908 n++; 909 910 return (n); 911 } 912 913 914 static void 915 rctl_val_list_free(rctl_val_t *rval) 916 { 917 while (rval != NULL) { 918 rctl_val_t *next = rval->rcv_next; 919 920 kmem_cache_free(rctl_val_cache, rval); 921 922 rval = next; 923 } 924 } 925 926 /* 927 * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *) 928 * 929 * Overview 930 * In cases where the operating system supports more than one process 931 * addressing model, the operating system capabilities will exceed those of 932 * one or more of these models. Processes in a less capable model must have 933 * their resources accurately controlled, without diluting those of their 934 * descendants reached via exec(). rctl_model_maximum() returns the governing 935 * value for the specified process with respect to a resource control, such 936 * that the value can used for the RCTLOP_SET callback or compatability 937 * support. 938 * 939 * Return values 940 * The maximum value for the given process for the specified resource control. 941 * 942 * Caller's context 943 * No restrictions on context. 944 */ 945 rctl_qty_t 946 rctl_model_maximum(rctl_dict_entry_t *rde, struct proc *p) 947 { 948 if (p->p_model == DATAMODEL_NATIVE) 949 return (rde->rcd_max_native); 950 951 return (rde->rcd_max_ilp32); 952 } 953 954 /* 955 * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t) 956 * 957 * Overview 958 * Convenience function wrapping the rctl_model_maximum() functionality. 959 * 960 * Return values 961 * The lesser of the process's maximum value and the given value for the 962 * specified resource control. 963 * 964 * Caller's context 965 * No restrictions on context. 966 */ 967 rctl_qty_t 968 rctl_model_value(rctl_dict_entry_t *rde, struct proc *p, rctl_qty_t value) 969 { 970 rctl_qty_t max = rctl_model_maximum(rde, p); 971 972 return (value < max ? value : max); 973 } 974 975 static void 976 rctl_set_insert(rctl_set_t *set, rctl_hndl_t hndl, rctl_t *rctl) 977 { 978 uint_t index = hndl % rctl_set_size; 979 rctl_t *next_ctl, *prev_ctl; 980 981 ASSERT(MUTEX_HELD(&set->rcs_lock)); 982 983 rctl->rc_next = NULL; 984 985 if (set->rcs_ctls[index] == NULL) { 986 set->rcs_ctls[index] = rctl; 987 return; 988 } 989 990 if (hndl < set->rcs_ctls[index]->rc_id) { 991 rctl->rc_next = set->rcs_ctls[index]; 992 set->rcs_ctls[index] = rctl; 993 994 return; 995 } 996 997 for (next_ctl = set->rcs_ctls[index]->rc_next, 998 prev_ctl = set->rcs_ctls[index]; 999 next_ctl != NULL; 1000 prev_ctl = next_ctl, 1001 next_ctl = next_ctl->rc_next) { 1002 if (next_ctl->rc_id > hndl) { 1003 rctl->rc_next = next_ctl; 1004 prev_ctl->rc_next = rctl; 1005 1006 return; 1007 } 1008 } 1009 1010 rctl->rc_next = next_ctl; 1011 prev_ctl->rc_next = rctl; 1012 } 1013 1014 /* 1015 * rctl_set_t *rctl_set_create() 1016 * 1017 * Overview 1018 * Create an empty resource control set, suitable for attaching to a 1019 * controlled entity. 1020 * 1021 * Return values 1022 * A pointer to the newly created set. 1023 * 1024 * Caller's context 1025 * Safe for KM_SLEEP allocations. 1026 */ 1027 rctl_set_t * 1028 rctl_set_create() 1029 { 1030 rctl_set_t *rset = kmem_zalloc(sizeof (rctl_set_t), KM_SLEEP); 1031 1032 mutex_init(&rset->rcs_lock, NULL, MUTEX_DEFAULT, NULL); 1033 rset->rcs_ctls = kmem_zalloc(rctl_set_size * sizeof (rctl_t *), 1034 KM_SLEEP); 1035 rset->rcs_entity = -1; 1036 1037 return (rset); 1038 } 1039 1040 /* 1041 * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t) 1042 * 1043 * Overview 1044 * rctl_set_init_prealloc() examines the globally defined resource controls 1045 * and their default values and returns a resource control allocation group 1046 * populated with sufficient controls and values to form a representative 1047 * resource control set for the specified entity. 1048 * 1049 * Return values 1050 * A pointer to the newly created allocation group. 1051 * 1052 * Caller's context 1053 * Caller must be in a context suitable for KM_SLEEP allocations. 1054 */ 1055 rctl_alloc_gp_t * 1056 rctl_set_init_prealloc(rctl_entity_t entity) 1057 { 1058 rctl_dict_entry_t *rde; 1059 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1060 1061 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1062 1063 if (rctl_lists[entity] == NULL) 1064 return (ragp); 1065 1066 mutex_enter(&rctl_lists_lock); 1067 1068 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1069 ragp->rcag_nctls++; 1070 ragp->rcag_nvals += rctl_val_list_count(rde->rcd_default_value); 1071 } 1072 1073 mutex_exit(&rctl_lists_lock); 1074 1075 rctl_gp_alloc(ragp); 1076 1077 return (ragp); 1078 } 1079 1080 /* 1081 * rctl_set_t *rctl_set_init(rctl_entity_t) 1082 * 1083 * Overview 1084 * rctl_set_create() creates a resource control set, initialized with the 1085 * system infinite values on all registered controls, for attachment to a 1086 * system entity requiring resource controls, such as a process or a task. 1087 * 1088 * Return values 1089 * A pointer to the newly filled set. 1090 * 1091 * Caller's context 1092 * Caller must be holding p_lock on entry so that RCTLOP_SET() functions 1093 * may modify task and project members based on the proc structure 1094 * they are passed. 1095 */ 1096 rctl_set_t * 1097 rctl_set_init(rctl_entity_t entity, struct proc *p, rctl_entity_p_t *e, 1098 rctl_set_t *rset, rctl_alloc_gp_t *ragp) 1099 { 1100 rctl_dict_entry_t *rde; 1101 1102 ASSERT(MUTEX_HELD(&p->p_lock)); 1103 ASSERT(e); 1104 rset->rcs_entity = entity; 1105 1106 if (rctl_lists[entity] == NULL) 1107 return (rset); 1108 1109 mutex_enter(&rctl_lists_lock); 1110 mutex_enter(&rset->rcs_lock); 1111 1112 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1113 rctl_t *rctl = rctl_gp_detach_ctl(ragp); 1114 1115 rctl->rc_dict_entry = rde; 1116 rctl->rc_id = rde->rcd_id; 1117 rctl->rc_projdb = NULL; 1118 1119 rctl->rc_values = rctl_val_list_dup(rde->rcd_default_value, 1120 ragp, NULL, p); 1121 rctl->rc_cursor = rctl->rc_values; 1122 1123 ASSERT(rctl->rc_cursor != NULL); 1124 1125 rctl_set_insert(rset, rde->rcd_id, rctl); 1126 1127 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1128 rctl->rc_cursor->rcv_value)); 1129 } 1130 1131 mutex_exit(&rset->rcs_lock); 1132 mutex_exit(&rctl_lists_lock); 1133 1134 return (rset); 1135 } 1136 1137 static rctl_t * 1138 rctl_dup(rctl_t *rctl, rctl_alloc_gp_t *ragp, struct proc *oldp, 1139 struct proc *newp) 1140 { 1141 rctl_t *dup = rctl_gp_detach_ctl(ragp); 1142 rctl_val_t *dval; 1143 1144 dup->rc_id = rctl->rc_id; 1145 dup->rc_dict_entry = rctl->rc_dict_entry; 1146 dup->rc_next = NULL; 1147 dup->rc_cursor = NULL; 1148 dup->rc_values = rctl_val_list_dup(rctl->rc_values, ragp, oldp, newp); 1149 1150 for (dval = dup->rc_values; 1151 dval != NULL; dval = dval->rcv_next) { 1152 if (rctl_val_cmp(rctl->rc_cursor, dval, 0) >= 0) { 1153 dup->rc_cursor = dval; 1154 break; 1155 } 1156 } 1157 1158 if (dup->rc_cursor == NULL) 1159 dup->rc_cursor = dup->rc_values; 1160 1161 return (dup); 1162 } 1163 1164 static void 1165 rctl_set_fill_alloc_gp(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1166 { 1167 uint_t i; 1168 1169 bzero(ragp, sizeof (rctl_alloc_gp_t)); 1170 1171 for (i = 0; i < rctl_set_size; i++) { 1172 rctl_t *r = set->rcs_ctls[i]; 1173 1174 while (r != NULL) { 1175 ragp->rcag_nctls++; 1176 1177 ragp->rcag_nvals += rctl_val_list_count(r->rc_values); 1178 1179 r = r->rc_next; 1180 } 1181 } 1182 } 1183 1184 /* 1185 * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *) 1186 * 1187 * Overview 1188 * Given a resource control set, allocate a sufficiently large allocation 1189 * group to contain a duplicate of the set. 1190 * 1191 * Return value 1192 * A pointer to the newly created allocation group. 1193 * 1194 * Caller's context 1195 * Safe for KM_SLEEP allocations. 1196 */ 1197 rctl_alloc_gp_t * 1198 rctl_set_dup_prealloc(rctl_set_t *set) 1199 { 1200 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1201 1202 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1203 1204 mutex_enter(&set->rcs_lock); 1205 rctl_set_fill_alloc_gp(set, ragp); 1206 mutex_exit(&set->rcs_lock); 1207 1208 rctl_gp_alloc(ragp); 1209 1210 return (ragp); 1211 } 1212 1213 /* 1214 * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *) 1215 * 1216 * Overview 1217 * Verify that the allocation group provided is large enough to allow a 1218 * duplicate of the given resource control set to be constructed from its 1219 * contents. 1220 * 1221 * Return values 1222 * 1 if the allocation group is sufficiently large, 0 otherwise. 1223 * 1224 * Caller's context 1225 * rcs_lock must be held prior to entry. 1226 */ 1227 int 1228 rctl_set_dup_ready(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1229 { 1230 rctl_alloc_gp_t curr_gp; 1231 1232 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1233 1234 rctl_set_fill_alloc_gp(set, &curr_gp); 1235 1236 if (curr_gp.rcag_nctls <= ragp->rcag_nctls && 1237 curr_gp.rcag_nvals <= ragp->rcag_nvals) 1238 return (1); 1239 1240 return (0); 1241 } 1242 1243 /* 1244 * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *, 1245 * rctl_set_t *, rctl_alloc_gp_t *, int) 1246 * 1247 * Overview 1248 * Make a duplicate of the resource control set. The proc pointers are those 1249 * of the owning process and of the process associated with the entity 1250 * receiving the duplicate. 1251 * 1252 * Duplication is a 3 stage process. Stage 1 is memory allocation for 1253 * the duplicate set, which is taken care of by rctl_set_dup_prealloc(). 1254 * Stage 2 consists of copying all rctls and values from the old set into 1255 * the new. Stage 3 completes the duplication by performing the appropriate 1256 * callbacks for each rctl in the new set. 1257 * 1258 * Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and 1259 * RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only 1260 * be supplied if the newp proc structure reflects the new task and 1261 * project linkage. 1262 * 1263 * Return value 1264 * A pointer to the duplicate set. 1265 * 1266 * Caller's context 1267 * The rcs_lock of the set to be duplicated must be held prior to entry. 1268 */ 1269 rctl_set_t * 1270 rctl_set_dup(rctl_set_t *set, struct proc *oldp, struct proc *newp, 1271 rctl_entity_p_t *e, rctl_set_t *dup, rctl_alloc_gp_t *ragp, int flag) 1272 { 1273 uint_t i; 1274 rctl_set_t *iter; 1275 1276 ASSERT((flag & RCD_DUP) || (flag & RCD_CALLBACK)); 1277 ASSERT(e); 1278 /* 1279 * When copying the old set, iterate over that. Otherwise, when 1280 * only callbacks have been requested, iterate over the dup set. 1281 */ 1282 if (flag & RCD_DUP) { 1283 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1284 iter = set; 1285 dup->rcs_entity = set->rcs_entity; 1286 } else { 1287 iter = dup; 1288 } 1289 1290 mutex_enter(&dup->rcs_lock); 1291 1292 for (i = 0; i < rctl_set_size; i++) { 1293 rctl_t *r = iter->rcs_ctls[i]; 1294 rctl_t *d; 1295 1296 while (r != NULL) { 1297 if (flag & RCD_DUP) { 1298 d = rctl_dup(r, ragp, oldp, newp); 1299 rctl_set_insert(dup, r->rc_id, d); 1300 } else { 1301 d = r; 1302 } 1303 1304 if (flag & RCD_CALLBACK) 1305 RCTLOP_SET(d, newp, e, 1306 rctl_model_value(d->rc_dict_entry, newp, 1307 d->rc_cursor->rcv_value)); 1308 1309 r = r->rc_next; 1310 } 1311 } 1312 1313 mutex_exit(&dup->rcs_lock); 1314 1315 return (dup); 1316 } 1317 1318 /* 1319 * void rctl_set_free(rctl_set_t *) 1320 * 1321 * Overview 1322 * Delete resource control set and all attached values. 1323 * 1324 * Return values 1325 * No value returned. 1326 * 1327 * Caller's context 1328 * No restrictions on context. 1329 */ 1330 void 1331 rctl_set_free(rctl_set_t *set) 1332 { 1333 uint_t i; 1334 1335 mutex_enter(&set->rcs_lock); 1336 for (i = 0; i < rctl_set_size; i++) { 1337 rctl_t *r = set->rcs_ctls[i]; 1338 1339 while (r != NULL) { 1340 rctl_val_t *v = r->rc_values; 1341 rctl_t *n = r->rc_next; 1342 1343 kmem_cache_free(rctl_cache, r); 1344 1345 rctl_val_list_free(v); 1346 1347 r = n; 1348 } 1349 } 1350 mutex_exit(&set->rcs_lock); 1351 1352 kmem_free(set->rcs_ctls, sizeof (rctl_t *) * rctl_set_size); 1353 kmem_free(set, sizeof (rctl_set_t)); 1354 } 1355 1356 /* 1357 * void rctl_set_reset(rctl_set_t *) 1358 * 1359 * Overview 1360 * Resets all rctls within the set such that the lowest value becomes active. 1361 * 1362 * Return values 1363 * No value returned. 1364 * 1365 * Caller's context 1366 * No restrictions on context. 1367 */ 1368 void 1369 rctl_set_reset(rctl_set_t *set, struct proc *p, rctl_entity_p_t *e) 1370 { 1371 uint_t i; 1372 1373 ASSERT(e); 1374 1375 mutex_enter(&set->rcs_lock); 1376 for (i = 0; i < rctl_set_size; i++) { 1377 rctl_t *r = set->rcs_ctls[i]; 1378 1379 while (r != NULL) { 1380 r->rc_cursor = r->rc_values; 1381 rctl_val_list_reset(r->rc_cursor); 1382 RCTLOP_SET(r, p, e, rctl_model_value(r->rc_dict_entry, 1383 p, r->rc_cursor->rcv_value)); 1384 1385 ASSERT(r->rc_cursor != NULL); 1386 1387 r = r->rc_next; 1388 } 1389 } 1390 1391 mutex_exit(&set->rcs_lock); 1392 } 1393 1394 /* 1395 * void rctl_set_tearoff(rctl_set *, struct proc *) 1396 * 1397 * Overview 1398 * Tear off any resource control values on this set with an action recipient 1399 * equal to the specified process (as they are becoming invalid with the 1400 * process's departure from this set as an observer). 1401 * 1402 * Return values 1403 * No value returned. 1404 * 1405 * Caller's context 1406 * No restrictions on context 1407 */ 1408 void 1409 rctl_set_tearoff(rctl_set_t *set, struct proc *p) 1410 { 1411 uint_t i; 1412 1413 mutex_enter(&set->rcs_lock); 1414 for (i = 0; i < rctl_set_size; i++) { 1415 rctl_t *r = set->rcs_ctls[i]; 1416 1417 while (r != NULL) { 1418 rctl_val_t *rval; 1419 1420 tearoff_rewalk_list: 1421 rval = r->rc_values; 1422 1423 while (rval != NULL) { 1424 if (rval->rcv_privilege == RCPRIV_BASIC && 1425 rval->rcv_action_recipient == p) { 1426 if (r->rc_cursor == rval) 1427 r->rc_cursor = rval->rcv_next; 1428 1429 (void) rctl_val_list_delete( 1430 &r->rc_values, rval); 1431 1432 goto tearoff_rewalk_list; 1433 } 1434 1435 rval = rval->rcv_next; 1436 } 1437 1438 ASSERT(r->rc_cursor != NULL); 1439 1440 r = r->rc_next; 1441 } 1442 } 1443 1444 mutex_exit(&set->rcs_lock); 1445 } 1446 1447 int 1448 rctl_set_find(rctl_set_t *set, rctl_hndl_t hndl, rctl_t **rctl) 1449 { 1450 uint_t index = hndl % rctl_set_size; 1451 rctl_t *curr_ctl; 1452 1453 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1454 1455 for (curr_ctl = set->rcs_ctls[index]; curr_ctl != NULL; 1456 curr_ctl = curr_ctl->rc_next) { 1457 if (curr_ctl->rc_id == hndl) { 1458 *rctl = curr_ctl; 1459 1460 return (0); 1461 } 1462 } 1463 1464 return (-1); 1465 } 1466 1467 /* 1468 * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *) 1469 * 1470 * Overview 1471 * Given a process, get the next enforced value on the rctl of the specified 1472 * handle. 1473 * 1474 * Return value 1475 * The enforced value. 1476 * 1477 * Caller's context 1478 * For controls on process collectives, p->p_lock must be held across the 1479 * operation. 1480 */ 1481 /*ARGSUSED*/ 1482 rctl_qty_t 1483 rctl_enforced_value(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p) 1484 { 1485 rctl_t *rctl; 1486 rlim64_t ret; 1487 1488 mutex_enter(&rset->rcs_lock); 1489 1490 if (rctl_set_find(rset, hndl, &rctl) == -1) 1491 panic("unknown resource control handle %d requested", hndl); 1492 else 1493 ret = rctl_model_value(rctl->rc_dict_entry, p, 1494 rctl->rc_cursor->rcv_value); 1495 1496 mutex_exit(&rset->rcs_lock); 1497 1498 return (ret); 1499 } 1500 1501 /* 1502 * int rctl_global_get(const char *, rctl_dict_entry_t *) 1503 * 1504 * Overview 1505 * Copy a sanitized version of the global rctl for a given resource control 1506 * name. (By sanitization, we mean that the unsafe data pointers have been 1507 * zeroed.) 1508 * 1509 * Return value 1510 * -1 if name not defined, 0 otherwise. 1511 * 1512 * Caller's context 1513 * No restrictions on context. rctl_dict_lock must not be held. 1514 */ 1515 int 1516 rctl_global_get(const char *name, rctl_dict_entry_t *drde) 1517 { 1518 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1519 1520 if (rde == NULL) 1521 return (-1); 1522 1523 bcopy(rde, drde, sizeof (rctl_dict_entry_t)); 1524 1525 drde->rcd_next = NULL; 1526 drde->rcd_ops = NULL; 1527 1528 return (0); 1529 } 1530 1531 /* 1532 * int rctl_global_set(const char *, rctl_dict_entry_t *) 1533 * 1534 * Overview 1535 * Transfer the settable fields of the named rctl to the global rctl matching 1536 * the given resource control name. 1537 * 1538 * Return value 1539 * -1 if name not defined, 0 otherwise. 1540 * 1541 * Caller's context 1542 * No restrictions on context. rctl_dict_lock must not be held. 1543 */ 1544 int 1545 rctl_global_set(const char *name, rctl_dict_entry_t *drde) 1546 { 1547 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1548 1549 if (rde == NULL) 1550 return (-1); 1551 1552 rde->rcd_flagaction = drde->rcd_flagaction; 1553 rde->rcd_syslog_level = drde->rcd_syslog_level; 1554 rde->rcd_strlog_flags = drde->rcd_strlog_flags; 1555 1556 return (0); 1557 } 1558 1559 static int 1560 rctl_local_op(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1561 int (*cbop)(rctl_hndl_t, struct proc *p, rctl_entity_p_t *e, rctl_t *, 1562 rctl_val_t *, rctl_val_t *), struct proc *p) 1563 { 1564 rctl_t *rctl; 1565 rctl_set_t *rset; 1566 rctl_entity_p_t e; 1567 int ret = 0; 1568 rctl_dict_entry_t *rde = rctl_dict_lookup_hndl(hndl); 1569 1570 local_op_retry: 1571 1572 ASSERT(MUTEX_HELD(&p->p_lock)); 1573 1574 rset = rctl_entity_obtain_rset(rde, p); 1575 1576 if (rset == NULL) { 1577 return (-1); 1578 } 1579 rctl_entity_obtain_entity_p(rset->rcs_entity, p, &e); 1580 1581 mutex_enter(&rset->rcs_lock); 1582 1583 /* using rctl's hndl, get rctl from local set */ 1584 if (rctl_set_find(rset, hndl, &rctl) == -1) { 1585 mutex_exit(&rset->rcs_lock); 1586 return (-1); 1587 } 1588 1589 ret = cbop(hndl, p, &e, rctl, oval, nval); 1590 1591 mutex_exit(&rset->rcs_lock); 1592 return (ret); 1593 } 1594 1595 /*ARGSUSED*/ 1596 static int 1597 rctl_local_get_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1598 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1599 { 1600 if (oval == NULL) { 1601 /* 1602 * RCTL_FIRST 1603 */ 1604 bcopy(rctl->rc_values, nval, sizeof (rctl_val_t)); 1605 } else { 1606 /* 1607 * RCTL_NEXT 1608 */ 1609 rctl_val_t *tval = rctl_val_list_find(&rctl->rc_values, oval); 1610 1611 if (tval == NULL) 1612 return (ESRCH); 1613 else if (tval->rcv_next == NULL) 1614 return (ENOENT); 1615 else 1616 bcopy(tval->rcv_next, nval, sizeof (rctl_val_t)); 1617 } 1618 1619 return (0); 1620 } 1621 1622 /* 1623 * int rctl_local_get(rctl_hndl_t, rctl_val_t *) 1624 * 1625 * Overview 1626 * Get the rctl value for the given flags. 1627 * 1628 * Return values 1629 * 0 for successful get, errno otherwise. 1630 */ 1631 int 1632 rctl_local_get(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1633 struct proc *p) 1634 { 1635 return (rctl_local_op(hndl, oval, nval, rctl_local_get_cb, p)); 1636 } 1637 1638 /*ARGSUSED*/ 1639 static int 1640 rctl_local_delete_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1641 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1642 { 1643 if ((oval = rctl_val_list_find(&rctl->rc_values, nval)) == NULL) 1644 return (ESRCH); 1645 1646 if (rctl->rc_cursor == oval) { 1647 rctl->rc_cursor = oval->rcv_next; 1648 rctl_val_list_reset(rctl->rc_cursor); 1649 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1650 rctl->rc_cursor->rcv_value)); 1651 1652 ASSERT(rctl->rc_cursor != NULL); 1653 } 1654 1655 (void) rctl_val_list_delete(&rctl->rc_values, oval); 1656 1657 return (0); 1658 } 1659 1660 /* 1661 * int rctl_local_delete(rctl_hndl_t, rctl_val_t *) 1662 * 1663 * Overview 1664 * Delete the rctl value for the given flags. 1665 * 1666 * Return values 1667 * 0 for successful delete, errno otherwise. 1668 */ 1669 int 1670 rctl_local_delete(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1671 { 1672 return (rctl_local_op(hndl, NULL, val, rctl_local_delete_cb, p)); 1673 } 1674 1675 /* 1676 * rctl_local_insert_cb() 1677 * 1678 * Overview 1679 * Insert a new value into the rctl's val list. If an error occurs, 1680 * the val list must be left in the same state as when the function 1681 * was entered. 1682 * 1683 * Return Values 1684 * 0 for successful insert, EINVAL if the value is duplicated in the 1685 * existing list. 1686 */ 1687 /*ARGSUSED*/ 1688 static int 1689 rctl_local_insert_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1690 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1691 { 1692 /* 1693 * Before inserting, confirm there are no duplicates of this value 1694 * and flag level. If there is a duplicate, flag an error and do 1695 * nothing. 1696 */ 1697 if (rctl_val_list_insert(&rctl->rc_values, nval) != 0) 1698 return (EINVAL); 1699 1700 if (rctl_val_cmp(nval, rctl->rc_cursor, 0) < 0) { 1701 rctl->rc_cursor = nval; 1702 rctl_val_list_reset(rctl->rc_cursor); 1703 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1704 rctl->rc_cursor->rcv_value)); 1705 1706 ASSERT(rctl->rc_cursor != NULL); 1707 } 1708 1709 return (0); 1710 } 1711 1712 /* 1713 * int rctl_local_insert(rctl_hndl_t, rctl_val_t *) 1714 * 1715 * Overview 1716 * Insert the rctl value into the appropriate rctl set for the calling 1717 * process, given the handle. 1718 */ 1719 int 1720 rctl_local_insert(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1721 { 1722 return (rctl_local_op(hndl, NULL, val, rctl_local_insert_cb, p)); 1723 } 1724 1725 /* 1726 * rctl_local_insert_all_cb() 1727 * 1728 * Overview 1729 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1730 * 1731 * Inserts new values from the project database (new_values). alloc_values 1732 * should be a linked list of pre-allocated rctl_val_t, which are used to 1733 * populate (rc_projdb). 1734 * 1735 * Should the *new_values linked list match the contents of the rctl's 1736 * rp_projdb then we do nothing. 1737 * 1738 * Return Values 1739 * 0 is always returned. 1740 */ 1741 /*ARGSUSED*/ 1742 static int 1743 rctl_local_insert_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1744 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1745 { 1746 rctl_val_t *val; 1747 rctl_val_t *tmp_val; 1748 rctl_val_t *next; 1749 int modified = 0; 1750 1751 /* 1752 * If this the first time we've set this project rctl, then we delete 1753 * all the privilege values. These privilege values have been set by 1754 * rctl_add_default_limit(). 1755 * 1756 * We save some cycles here by not calling rctl_val_list_delete(). 1757 */ 1758 if (rctl->rc_projdb == NULL) { 1759 val = rctl->rc_values; 1760 1761 while (val != NULL) { 1762 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1763 if (val->rcv_prev != NULL) 1764 val->rcv_prev->rcv_next = val->rcv_next; 1765 else 1766 rctl->rc_values = val->rcv_next; 1767 1768 if (val->rcv_next != NULL) 1769 val->rcv_next->rcv_prev = val->rcv_prev; 1770 1771 tmp_val = val; 1772 val = val->rcv_next; 1773 kmem_cache_free(rctl_val_cache, tmp_val); 1774 } else { 1775 val = val->rcv_next; 1776 } 1777 } 1778 modified = 1; 1779 } 1780 1781 /* 1782 * Delete active values previously set through the project database. 1783 */ 1784 val = rctl->rc_projdb; 1785 1786 while (val != NULL) { 1787 1788 /* Is the old value found in the new values? */ 1789 if (rctl_val_list_find(&new_values, val) == NULL) { 1790 1791 /* 1792 * Delete from the active values if it originated from 1793 * the project database. 1794 */ 1795 if (((tmp_val = rctl_val_list_find(&rctl->rc_values, 1796 val)) != NULL) && 1797 (tmp_val->rcv_flagaction & RCTL_LOCAL_PROJDB)) { 1798 (void) rctl_val_list_delete(&rctl->rc_values, 1799 tmp_val); 1800 } 1801 1802 tmp_val = val->rcv_next; 1803 (void) rctl_val_list_delete(&rctl->rc_projdb, val); 1804 val = tmp_val; 1805 modified = 1; 1806 1807 } else 1808 val = val->rcv_next; 1809 } 1810 1811 /* 1812 * Insert new values from the project database. 1813 */ 1814 while (new_values != NULL) { 1815 next = new_values->rcv_next; 1816 1817 /* 1818 * Insert this new value into the rc_projdb, and duplicate this 1819 * entry to the active list. 1820 */ 1821 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1822 1823 tmp_val = alloc_values->rcv_next; 1824 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1825 alloc_values->rcv_next = tmp_val; 1826 1827 if (rctl_val_list_insert(&rctl->rc_values, 1828 alloc_values) == 0) { 1829 /* inserted move alloc_values on */ 1830 alloc_values = tmp_val; 1831 modified = 1; 1832 } 1833 } else { 1834 /* 1835 * Unlike setrctl() we don't want to return an error on 1836 * a duplicate entry; we are concerned solely with 1837 * ensuring that all the values specified are set. 1838 */ 1839 kmem_cache_free(rctl_val_cache, new_values); 1840 } 1841 new_values = next; 1842 } 1843 1844 /* Teardown any unused rctl_val_t */ 1845 while (alloc_values != NULL) { 1846 tmp_val = alloc_values; 1847 alloc_values = alloc_values->rcv_next; 1848 kmem_cache_free(rctl_val_cache, tmp_val); 1849 } 1850 1851 /* Reset the cursor if rctl values have been modified */ 1852 if (modified) { 1853 rctl->rc_cursor = rctl->rc_values; 1854 rctl_val_list_reset(rctl->rc_cursor); 1855 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1856 rctl->rc_cursor->rcv_value)); 1857 } 1858 1859 return (0); 1860 } 1861 1862 int 1863 rctl_local_insert_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1864 rctl_val_t *alloc_values, struct proc *p) 1865 { 1866 return (rctl_local_op(hndl, new_values, alloc_values, 1867 rctl_local_insert_all_cb, p)); 1868 } 1869 1870 /* 1871 * rctl_local_replace_all_cb() 1872 * 1873 * Overview 1874 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1875 * 1876 * Clears the active rctl values (rc_values), and stored values from the 1877 * previous insertions from the project database (rc_projdb). 1878 * 1879 * Inserts new values from the project database (new_values). alloc_values 1880 * should be a linked list of pre-allocated rctl_val_t, which are used to 1881 * populate (rc_projdb). 1882 * 1883 * Return Values 1884 * 0 is always returned. 1885 */ 1886 /*ARGSUSED*/ 1887 static int 1888 rctl_local_replace_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1889 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1890 { 1891 rctl_val_t *val; 1892 rctl_val_t *next; 1893 rctl_val_t *tmp_val; 1894 1895 /* Delete all the privilege vaules */ 1896 val = rctl->rc_values; 1897 1898 while (val != NULL) { 1899 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1900 if (val->rcv_prev != NULL) 1901 val->rcv_prev->rcv_next = val->rcv_next; 1902 else 1903 rctl->rc_values = val->rcv_next; 1904 1905 if (val->rcv_next != NULL) 1906 val->rcv_next->rcv_prev = val->rcv_prev; 1907 1908 tmp_val = val; 1909 val = val->rcv_next; 1910 kmem_cache_free(rctl_val_cache, tmp_val); 1911 } else { 1912 val = val->rcv_next; 1913 } 1914 } 1915 1916 /* Delete the contents of rc_projdb */ 1917 val = rctl->rc_projdb; 1918 while (val != NULL) { 1919 1920 tmp_val = val; 1921 val = val->rcv_next; 1922 kmem_cache_free(rctl_val_cache, tmp_val); 1923 } 1924 rctl->rc_projdb = NULL; 1925 1926 /* 1927 * Insert new values from the project database. 1928 */ 1929 while (new_values != NULL) { 1930 next = new_values->rcv_next; 1931 1932 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1933 tmp_val = alloc_values->rcv_next; 1934 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1935 alloc_values->rcv_next = tmp_val; 1936 1937 if (rctl_val_list_insert(&rctl->rc_values, 1938 alloc_values) == 0) { 1939 /* inserted, so move alloc_values on */ 1940 alloc_values = tmp_val; 1941 } 1942 } else { 1943 /* 1944 * Unlike setrctl() we don't want to return an error on 1945 * a duplicate entry; we are concerned solely with 1946 * ensuring that all the values specified are set. 1947 */ 1948 kmem_cache_free(rctl_val_cache, new_values); 1949 } 1950 1951 new_values = next; 1952 } 1953 1954 /* Teardown any unused rctl_val_t */ 1955 while (alloc_values != NULL) { 1956 tmp_val = alloc_values; 1957 alloc_values = alloc_values->rcv_next; 1958 kmem_cache_free(rctl_val_cache, tmp_val); 1959 } 1960 1961 /* Always reset the cursor */ 1962 rctl->rc_cursor = rctl->rc_values; 1963 rctl_val_list_reset(rctl->rc_cursor); 1964 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1965 rctl->rc_cursor->rcv_value)); 1966 1967 return (0); 1968 } 1969 1970 int 1971 rctl_local_replace_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1972 rctl_val_t *alloc_values, struct proc *p) 1973 { 1974 return (rctl_local_op(hndl, new_values, alloc_values, 1975 rctl_local_replace_all_cb, p)); 1976 } 1977 1978 static int 1979 rctl_local_replace_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1980 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1981 { 1982 int ret; 1983 rctl_val_t *tmp; 1984 1985 /* Verify that old will be delete-able */ 1986 tmp = rctl_val_list_find(&rctl->rc_values, oval); 1987 if (tmp == NULL) 1988 return (ESRCH); 1989 /* 1990 * Caller should verify that value being deleted is not the 1991 * system value. 1992 */ 1993 ASSERT(tmp->rcv_privilege != RCPRIV_SYSTEM); 1994 1995 /* 1996 * rctl_local_insert_cb() does the job of flagging an error 1997 * for any duplicate values. So, call rctl_local_insert_cb() 1998 * for the new value first, then do deletion of the old value. 1999 * Since this is a callback function to rctl_local_op, we can 2000 * count on rcs_lock being held at this point. This guarantees 2001 * that there is at no point a visible list which contains both 2002 * new and old values. 2003 */ 2004 if (ret = rctl_local_insert_cb(hndl, p, e, rctl, NULL, nval)) 2005 return (ret); 2006 2007 ret = rctl_local_delete_cb(hndl, p, e, rctl, NULL, oval); 2008 ASSERT(ret == 0); 2009 return (0); 2010 } 2011 2012 /* 2013 * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *) 2014 * 2015 * Overview 2016 * Replace the rctl value with a new one. 2017 * 2018 * Return values 2019 * 0 for successful replace, errno otherwise. 2020 */ 2021 int 2022 rctl_local_replace(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 2023 struct proc *p) 2024 { 2025 return (rctl_local_op(hndl, oval, nval, rctl_local_replace_cb, p)); 2026 } 2027 2028 /* 2029 * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *) 2030 * 2031 * Overview 2032 * To support rlimit compatibility, we need a function which takes a 64-bit 2033 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2034 * This operation is only intended for legacy rlimits. 2035 */ 2036 int 2037 rctl_rlimit_get(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64) 2038 { 2039 rctl_t *rctl; 2040 rctl_val_t *rval; 2041 rctl_set_t *rset = p->p_rctls; 2042 int soft_limit_seen = 0; 2043 int test_for_deny = 1; 2044 2045 mutex_enter(&rset->rcs_lock); 2046 if (rctl_set_find(rset, rc, &rctl) == -1) { 2047 mutex_exit(&rset->rcs_lock); 2048 return (-1); 2049 } 2050 2051 rval = rctl->rc_values; 2052 2053 if (rctl->rc_dict_entry->rcd_flagaction & (RCTL_GLOBAL_DENY_NEVER | 2054 RCTL_GLOBAL_DENY_ALWAYS)) 2055 test_for_deny = 0; 2056 2057 /* 2058 * 1. Find the first control value with the RCTL_LOCAL_DENY bit set. 2059 */ 2060 while (rval != NULL && rval->rcv_privilege != RCPRIV_SYSTEM) { 2061 if (test_for_deny && 2062 (rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0) { 2063 rval = rval->rcv_next; 2064 continue; 2065 } 2066 2067 /* 2068 * 2. If this is an RCPRIV_BASIC value, then we've found the 2069 * effective soft limit and should set rlim_cur. We should then 2070 * continue looking for another control value with the DENY bit 2071 * set. 2072 */ 2073 if (rval->rcv_privilege == RCPRIV_BASIC) { 2074 if (soft_limit_seen) { 2075 rval = rval->rcv_next; 2076 continue; 2077 } 2078 2079 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2080 rval->rcv_value < rctl_model_maximum( 2081 rctl->rc_dict_entry, p)) 2082 rlp64->rlim_cur = rval->rcv_value; 2083 else 2084 rlp64->rlim_cur = RLIM64_INFINITY; 2085 soft_limit_seen = 1; 2086 2087 rval = rval->rcv_next; 2088 continue; 2089 } 2090 2091 /* 2092 * 3. This is an RCPRIV_PRIVILEGED value. If we haven't found 2093 * a soft limit candidate, then we've found the effective hard 2094 * and soft limits and should set both If we had found a soft 2095 * limit, then this is only the hard limit and we need only set 2096 * rlim_max. 2097 */ 2098 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2099 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, 2100 p)) 2101 rlp64->rlim_max = rval->rcv_value; 2102 else 2103 rlp64->rlim_max = RLIM64_INFINITY; 2104 if (!soft_limit_seen) 2105 rlp64->rlim_cur = rlp64->rlim_max; 2106 2107 mutex_exit(&rset->rcs_lock); 2108 return (0); 2109 } 2110 2111 if (rval == NULL) { 2112 /* 2113 * This control sequence is corrupt, as it is not terminated by 2114 * a system privileged control value. 2115 */ 2116 mutex_exit(&rset->rcs_lock); 2117 return (-1); 2118 } 2119 2120 /* 2121 * 4. If we run into a RCPRIV_SYSTEM value, then the hard limit (and 2122 * the soft, if we haven't a soft candidate) should be the value of the 2123 * system control value. 2124 */ 2125 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2126 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, p)) 2127 rlp64->rlim_max = rval->rcv_value; 2128 else 2129 rlp64->rlim_max = RLIM64_INFINITY; 2130 2131 if (!soft_limit_seen) 2132 rlp64->rlim_cur = rlp64->rlim_max; 2133 2134 mutex_exit(&rset->rcs_lock); 2135 return (0); 2136 } 2137 2138 /* 2139 * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t) 2140 * 2141 * Overview 2142 * Before making a series of calls to rctl_rlimit_set(), we must have a 2143 * preallocated batch of resource control values, as rctl_rlimit_set() can 2144 * potentially consume two resource control values per call. 2145 * 2146 * Return values 2147 * A populated resource control allocation group with 2n resource control 2148 * values. 2149 * 2150 * Caller's context 2151 * Must be safe for KM_SLEEP allocations. 2152 */ 2153 rctl_alloc_gp_t * 2154 rctl_rlimit_set_prealloc(uint_t n) 2155 { 2156 rctl_alloc_gp_t *gp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 2157 2158 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 2159 2160 gp->rcag_nvals = 2 * n; 2161 2162 rctl_gp_alloc(gp); 2163 2164 return (gp); 2165 } 2166 2167 /* 2168 * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int, 2169 * int) 2170 * 2171 * Overview 2172 * To support rlimit compatibility, we need a function which takes a 64-bit 2173 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2174 * This operation is only intended for legacy rlimits. 2175 * 2176 * The implementation of rctl_rlimit_set() is a bit clever, as it tries to 2177 * minimize the number of values placed on the value sequence in various 2178 * cases. Furthermore, we don't allow multiple identical privilege-action 2179 * values on the same sequence. (That is, we don't want a sequence like 2180 * "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel 2181 * memory.) So we want to delete any values with the same privilege value and 2182 * action. 2183 * 2184 * Return values 2185 * 0 for successful set, errno otherwise. Errno will be either EINVAL 2186 * or EPERM, in keeping with defined errnos for ulimit() and setrlimit() 2187 * system calls. 2188 */ 2189 /*ARGSUSED*/ 2190 int 2191 rctl_rlimit_set(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64, 2192 rctl_alloc_gp_t *ragp, int flagaction, int signal, const cred_t *cr) 2193 { 2194 rctl_t *rctl; 2195 rctl_val_t *rval, *rval_priv, *rval_basic; 2196 rctl_set_t *rset = p->p_rctls; 2197 rctl_qty_t max; 2198 rctl_entity_p_t e; 2199 struct rlimit64 cur_rl; 2200 2201 e.rcep_t = RCENTITY_PROCESS; 2202 e.rcep_p.proc = p; 2203 2204 if (rlp64->rlim_cur > rlp64->rlim_max) 2205 return (EINVAL); 2206 2207 if (rctl_rlimit_get(rc, p, &cur_rl) == -1) 2208 return (EINVAL); 2209 2210 /* 2211 * If we are not privileged, we can only lower the hard limit. 2212 */ 2213 if ((rlp64->rlim_max > cur_rl.rlim_max) && 2214 cur_rl.rlim_max != RLIM64_INFINITY && 2215 secpolicy_resource(cr) != 0) 2216 return (EPERM); 2217 2218 mutex_enter(&rset->rcs_lock); 2219 2220 if (rctl_set_find(rset, rc, &rctl) == -1) { 2221 mutex_exit(&rset->rcs_lock); 2222 return (EINVAL); 2223 } 2224 2225 rval_priv = rctl_gp_detach_val(ragp); 2226 2227 rval = rctl->rc_values; 2228 2229 while (rval != NULL) { 2230 rctl_val_t *next = rval->rcv_next; 2231 2232 if (rval->rcv_privilege == RCPRIV_SYSTEM) 2233 break; 2234 2235 if ((rval->rcv_privilege == RCPRIV_BASIC) || 2236 (rval->rcv_flagaction & ~RCTL_LOCAL_ACTION_MASK) == 2237 (flagaction & ~RCTL_LOCAL_ACTION_MASK)) { 2238 if (rctl->rc_cursor == rval) { 2239 rctl->rc_cursor = rval->rcv_next; 2240 rctl_val_list_reset(rctl->rc_cursor); 2241 RCTLOP_SET(rctl, p, &e, rctl_model_value( 2242 rctl->rc_dict_entry, p, 2243 rctl->rc_cursor->rcv_value)); 2244 } 2245 (void) rctl_val_list_delete(&rctl->rc_values, rval); 2246 } 2247 2248 rval = next; 2249 } 2250 2251 rval_priv->rcv_privilege = RCPRIV_PRIVILEGED; 2252 rval_priv->rcv_flagaction = flagaction; 2253 if (rlp64->rlim_max == RLIM64_INFINITY) { 2254 rval_priv->rcv_flagaction |= RCTL_LOCAL_MAXIMAL; 2255 max = rctl->rc_dict_entry->rcd_max_native; 2256 } else { 2257 max = rlp64->rlim_max; 2258 } 2259 rval_priv->rcv_value = max; 2260 rval_priv->rcv_action_signal = signal; 2261 rval_priv->rcv_action_recipient = NULL; 2262 rval_priv->rcv_action_recip_pid = -1; 2263 rval_priv->rcv_firing_time = 0; 2264 rval_priv->rcv_prev = rval_priv->rcv_next = NULL; 2265 2266 (void) rctl_val_list_insert(&rctl->rc_values, rval_priv); 2267 rctl->rc_cursor = rval_priv; 2268 rctl_val_list_reset(rctl->rc_cursor); 2269 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2270 rctl->rc_cursor->rcv_value)); 2271 2272 if (rlp64->rlim_cur != RLIM64_INFINITY && rlp64->rlim_cur < max) { 2273 rval_basic = rctl_gp_detach_val(ragp); 2274 2275 rval_basic->rcv_privilege = RCPRIV_BASIC; 2276 rval_basic->rcv_value = rlp64->rlim_cur; 2277 rval_basic->rcv_flagaction = flagaction; 2278 rval_basic->rcv_action_signal = signal; 2279 rval_basic->rcv_action_recipient = p; 2280 rval_basic->rcv_action_recip_pid = p->p_pid; 2281 rval_basic->rcv_firing_time = 0; 2282 rval_basic->rcv_prev = rval_basic->rcv_next = NULL; 2283 2284 (void) rctl_val_list_insert(&rctl->rc_values, rval_basic); 2285 rctl->rc_cursor = rval_basic; 2286 rctl_val_list_reset(rctl->rc_cursor); 2287 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2288 rctl->rc_cursor->rcv_value)); 2289 } 2290 2291 ASSERT(rctl->rc_cursor != NULL); 2292 2293 mutex_exit(&rset->rcs_lock); 2294 return (0); 2295 } 2296 2297 2298 /* 2299 * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t, 2300 * rlim64_t, rctl_ops_t *) 2301 * 2302 * Overview 2303 * rctl_register() performs a look-up in the dictionary of rctls 2304 * active on the system; if a rctl of that name is absent, an entry is 2305 * made into the dictionary. The rctl is returned with its reference 2306 * count incremented by one. If the rctl name already exists, we panic. 2307 * (Were the resource control system to support dynamic loading and unloading, 2308 * which it is structured for, duplicate registration should lead to load 2309 * failure instead of panicking.) 2310 * 2311 * Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be 2312 * defined. This limit contains the highest possible value for this quantity 2313 * on the system. Furthermore, the registered control must provide infinite 2314 * values for all applicable address space models supported by the operating 2315 * system. Attempts to set resource control values beyond the system limit 2316 * will fail. 2317 * 2318 * Return values 2319 * The rctl's ID. 2320 * 2321 * Caller's context 2322 * Caller must be in a context suitable for KM_SLEEP allocations. 2323 */ 2324 rctl_hndl_t 2325 rctl_register( 2326 const char *name, 2327 rctl_entity_t entity, 2328 int global_flags, 2329 rlim64_t max_native, 2330 rlim64_t max_ilp32, 2331 rctl_ops_t *ops) 2332 { 2333 rctl_t *rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 2334 rctl_val_t *rctl_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 2335 rctl_dict_entry_t *rctl_de = kmem_zalloc(sizeof (rctl_dict_entry_t), 2336 KM_SLEEP); 2337 rctl_t *old_rctl; 2338 rctl_hndl_t rhndl; 2339 int localflags; 2340 2341 ASSERT(ops != NULL); 2342 2343 bzero(rctl, sizeof (rctl_t)); 2344 bzero(rctl_val, sizeof (rctl_val_t)); 2345 2346 if (global_flags & RCTL_GLOBAL_DENY_NEVER) 2347 localflags = RCTL_LOCAL_MAXIMAL; 2348 else 2349 localflags = RCTL_LOCAL_MAXIMAL | RCTL_LOCAL_DENY; 2350 2351 rctl_val->rcv_privilege = RCPRIV_SYSTEM; 2352 rctl_val->rcv_value = max_native; 2353 rctl_val->rcv_flagaction = localflags; 2354 rctl_val->rcv_action_signal = 0; 2355 rctl_val->rcv_action_recipient = NULL; 2356 rctl_val->rcv_action_recip_pid = -1; 2357 rctl_val->rcv_firing_time = 0; 2358 rctl_val->rcv_next = NULL; 2359 rctl_val->rcv_prev = NULL; 2360 2361 rctl_de->rcd_name = (char *)name; 2362 rctl_de->rcd_default_value = rctl_val; 2363 rctl_de->rcd_max_native = max_native; 2364 rctl_de->rcd_max_ilp32 = max_ilp32; 2365 rctl_de->rcd_entity = entity; 2366 rctl_de->rcd_ops = ops; 2367 rctl_de->rcd_flagaction = global_flags; 2368 2369 rctl->rc_dict_entry = rctl_de; 2370 rctl->rc_values = rctl_val; 2371 2372 /* 2373 * 1. Take global lock, validate nonexistence of name, get ID. 2374 */ 2375 mutex_enter(&rctl_dict_lock); 2376 2377 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 2378 (mod_hash_val_t *)&rhndl) != MH_ERR_NOTFOUND) 2379 panic("duplicate registration of rctl %s", name); 2380 2381 rhndl = rctl_de->rcd_id = rctl->rc_id = 2382 (rctl_hndl_t)id_alloc(rctl_ids); 2383 2384 /* 2385 * 2. Insert name-entry pair in rctl_dict_by_name. 2386 */ 2387 if (mod_hash_insert(rctl_dict_by_name, (mod_hash_key_t)name, 2388 (mod_hash_val_t)rctl_de)) 2389 panic("unable to insert rctl dict entry for %s (%u)", name, 2390 (uint_t)rctl->rc_id); 2391 2392 /* 2393 * 3. Insert ID-rctl_t * pair in rctl_dict. 2394 */ 2395 if (mod_hash_find(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2396 (mod_hash_val_t *)&old_rctl) != MH_ERR_NOTFOUND) 2397 panic("duplicate rctl ID %u registered", rctl->rc_id); 2398 2399 if (mod_hash_insert(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2400 (mod_hash_val_t)rctl)) 2401 panic("unable to insert rctl %s/%u (%p)", name, 2402 (uint_t)rctl->rc_id, (void *)rctl); 2403 2404 /* 2405 * 3a. Insert rctl_dict_entry_t * in appropriate entity list. 2406 */ 2407 2408 mutex_enter(&rctl_lists_lock); 2409 2410 switch (entity) { 2411 case RCENTITY_ZONE: 2412 case RCENTITY_PROJECT: 2413 case RCENTITY_TASK: 2414 case RCENTITY_PROCESS: 2415 rctl_de->rcd_next = rctl_lists[entity]; 2416 rctl_lists[entity] = rctl_de; 2417 break; 2418 default: 2419 panic("registering unknown rctl entity %d (%s)", entity, 2420 name); 2421 break; 2422 } 2423 2424 mutex_exit(&rctl_lists_lock); 2425 2426 /* 2427 * 4. Drop lock. 2428 */ 2429 mutex_exit(&rctl_dict_lock); 2430 2431 return (rhndl); 2432 } 2433 2434 /* 2435 * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, 2436 * rctl_val_t *v) 2437 * 2438 * Overview 2439 * rctl_global_action() takes, in according with the flags on the rctl_dict 2440 * entry for the given control, the appropriate actions on the exceeded 2441 * control value. Additionally, rctl_global_action() updates the firing time 2442 * on the exceeded value. 2443 * 2444 * Return values 2445 * A bitmask reflecting the actions actually taken. 2446 * 2447 * Caller's context 2448 * No restrictions on context. 2449 */ 2450 /*ARGSUSED*/ 2451 static int 2452 rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v) 2453 { 2454 rctl_dict_entry_t *rde = r->rc_dict_entry; 2455 const char *pr, *en, *idstr; 2456 id_t id; 2457 enum { 2458 SUFFIX_NONE, /* id consumed directly */ 2459 SUFFIX_NUMERIC, /* id consumed in suffix */ 2460 SUFFIX_STRING /* idstr consumed in suffix */ 2461 } suffix = SUFFIX_NONE; 2462 int ret = 0; 2463 2464 v->rcv_firing_time = gethrtime(); 2465 2466 switch (v->rcv_privilege) { 2467 case RCPRIV_BASIC: 2468 pr = "basic"; 2469 break; 2470 case RCPRIV_PRIVILEGED: 2471 pr = "privileged"; 2472 break; 2473 case RCPRIV_SYSTEM: 2474 pr = "system"; 2475 break; 2476 default: 2477 pr = "unknown"; 2478 break; 2479 } 2480 2481 switch (rde->rcd_entity) { 2482 case RCENTITY_PROCESS: 2483 en = "process"; 2484 id = p->p_pid; 2485 suffix = SUFFIX_NONE; 2486 break; 2487 case RCENTITY_TASK: 2488 en = "task"; 2489 id = p->p_task->tk_tkid; 2490 suffix = SUFFIX_NUMERIC; 2491 break; 2492 case RCENTITY_PROJECT: 2493 en = "project"; 2494 id = p->p_task->tk_proj->kpj_id; 2495 suffix = SUFFIX_NUMERIC; 2496 break; 2497 case RCENTITY_ZONE: 2498 en = "zone"; 2499 idstr = p->p_zone->zone_name; 2500 suffix = SUFFIX_STRING; 2501 break; 2502 default: 2503 en = "unknown entity associated with process"; 2504 id = p->p_pid; 2505 suffix = SUFFIX_NONE; 2506 break; 2507 } 2508 2509 if (rde->rcd_flagaction & RCTL_GLOBAL_SYSLOG) { 2510 switch (suffix) { 2511 default: 2512 case SUFFIX_NONE: 2513 (void) strlog(0, 0, 0, 2514 rde->rcd_strlog_flags | log_global.lz_active, 2515 "%s rctl %s (value %llu) exceeded by %s %d.", 2516 pr, rde->rcd_name, v->rcv_value, en, id); 2517 break; 2518 case SUFFIX_NUMERIC: 2519 (void) strlog(0, 0, 0, 2520 rde->rcd_strlog_flags | log_global.lz_active, 2521 "%s rctl %s (value %llu) exceeded by process %d" 2522 " in %s %d.", 2523 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2524 en, id); 2525 break; 2526 case SUFFIX_STRING: 2527 (void) strlog(0, 0, 0, 2528 rde->rcd_strlog_flags | log_global.lz_active, 2529 "%s rctl %s (value %llu) exceeded by process %d" 2530 " in %s %s.", 2531 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2532 en, idstr); 2533 break; 2534 } 2535 } 2536 2537 if (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) 2538 ret |= RCT_DENY; 2539 2540 return (ret); 2541 } 2542 2543 static int 2544 rctl_local_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v, 2545 uint_t safety) 2546 { 2547 int ret = 0; 2548 sigqueue_t *sqp = NULL; 2549 rctl_dict_entry_t *rde = r->rc_dict_entry; 2550 int unobservable = (rde->rcd_flagaction & RCTL_GLOBAL_UNOBSERVABLE); 2551 2552 proc_t *recipient = v->rcv_action_recipient; 2553 id_t recip_pid = v->rcv_action_recip_pid; 2554 int recip_signal = v->rcv_action_signal; 2555 uint_t flagaction = v->rcv_flagaction; 2556 2557 if (safety == RCA_UNSAFE_ALL) { 2558 if (flagaction & RCTL_LOCAL_DENY) { 2559 ret |= RCT_DENY; 2560 } 2561 return (ret); 2562 } 2563 2564 if (flagaction & RCTL_LOCAL_SIGNAL) { 2565 /* 2566 * We can build a siginfo only in the case that it is 2567 * safe for us to drop p_lock. (For asynchronous 2568 * checks this is currently not true.) 2569 */ 2570 if (safety == RCA_SAFE) { 2571 mutex_exit(&rset->rcs_lock); 2572 mutex_exit(&p->p_lock); 2573 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 2574 mutex_enter(&p->p_lock); 2575 mutex_enter(&rset->rcs_lock); 2576 2577 sqp->sq_info.si_signo = recip_signal; 2578 sqp->sq_info.si_code = SI_RCTL; 2579 sqp->sq_info.si_errno = 0; 2580 sqp->sq_info.si_entity = (int)rde->rcd_entity; 2581 } 2582 2583 if (recipient == NULL || recipient == p) { 2584 ret |= RCT_SIGNAL; 2585 2586 if (sqp == NULL) { 2587 sigtoproc(p, NULL, recip_signal); 2588 } else if (p == curproc) { 2589 /* 2590 * Then this is a synchronous test and we can 2591 * direct the signal at the violating thread. 2592 */ 2593 sigaddqa(curproc, curthread, sqp); 2594 } else { 2595 sigaddqa(p, NULL, sqp); 2596 } 2597 } else if (!unobservable) { 2598 proc_t *rp; 2599 2600 mutex_exit(&rset->rcs_lock); 2601 mutex_exit(&p->p_lock); 2602 2603 mutex_enter(&pidlock); 2604 if ((rp = prfind(recip_pid)) == recipient) { 2605 /* 2606 * Recipient process is still alive, but may not 2607 * be in this task or project any longer. In 2608 * this case, the recipient's resource control 2609 * set pertinent to this control will have 2610 * changed--and we will not deliver the signal, 2611 * as the recipient process is trying to tear 2612 * itself off of its former set. 2613 */ 2614 mutex_enter(&rp->p_lock); 2615 mutex_exit(&pidlock); 2616 2617 if (rctl_entity_obtain_rset(rde, rp) == rset) { 2618 ret |= RCT_SIGNAL; 2619 2620 if (sqp == NULL) 2621 sigtoproc(rp, NULL, 2622 recip_signal); 2623 else 2624 sigaddqa(rp, NULL, sqp); 2625 } else if (sqp) { 2626 kmem_free(sqp, sizeof (sigqueue_t)); 2627 } 2628 mutex_exit(&rp->p_lock); 2629 } else { 2630 mutex_exit(&pidlock); 2631 if (sqp) 2632 kmem_free(sqp, sizeof (sigqueue_t)); 2633 } 2634 2635 mutex_enter(&p->p_lock); 2636 /* 2637 * Since we dropped p_lock, we may no longer be in the 2638 * same task or project as we were at entry. It is thus 2639 * unsafe for us to reacquire the set lock at this 2640 * point; callers of rctl_local_action() must handle 2641 * this possibility. 2642 */ 2643 ret |= RCT_LK_ABANDONED; 2644 } else if (sqp) { 2645 kmem_free(sqp, sizeof (sigqueue_t)); 2646 } 2647 } 2648 2649 if ((flagaction & RCTL_LOCAL_DENY) && 2650 (recipient == NULL || recipient == p)) { 2651 ret |= RCT_DENY; 2652 } 2653 2654 return (ret); 2655 } 2656 2657 /* 2658 * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t) 2659 * 2660 * Overview 2661 * Take the action associated with the enforced value (as defined by 2662 * rctl_get_enforced_value()) being exceeded or encountered. Possibly perform 2663 * a restricted subset of the available actions, if circumstances dictate that 2664 * we cannot safely allocate memory (for a sigqueue_t) or guarantee process 2665 * persistence across the duration of the function (an asynchronous action). 2666 * 2667 * Return values 2668 * Actions taken, according to the rctl_test bitmask. 2669 * 2670 * Caller's context 2671 * Safe to acquire rcs_lock. 2672 */ 2673 int 2674 rctl_action(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, uint_t safety) 2675 { 2676 return (rctl_action_entity(hndl, rset, p, NULL, safety)); 2677 } 2678 2679 int 2680 rctl_action_entity(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, 2681 rctl_entity_p_t *e, uint_t safety) 2682 { 2683 int ret = RCT_NONE; 2684 rctl_t *lrctl; 2685 rctl_entity_p_t e_tmp; 2686 2687 rctl_action_acquire: 2688 mutex_enter(&rset->rcs_lock); 2689 if (rctl_set_find(rset, hndl, &lrctl) == -1) { 2690 mutex_exit(&rset->rcs_lock); 2691 return (ret); 2692 } 2693 2694 if (e == NULL) { 2695 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2696 p, &e_tmp); 2697 e = &e_tmp; 2698 } 2699 2700 if ((ret & RCT_LK_ABANDONED) == 0) { 2701 ret |= rctl_global_action(lrctl, rset, p, lrctl->rc_cursor); 2702 2703 RCTLOP_ACTION(lrctl, p, e); 2704 2705 ret |= rctl_local_action(lrctl, rset, p, 2706 lrctl->rc_cursor, safety); 2707 2708 if (ret & RCT_LK_ABANDONED) 2709 goto rctl_action_acquire; 2710 } 2711 2712 ret &= ~RCT_LK_ABANDONED; 2713 2714 if (!(ret & RCT_DENY) && 2715 lrctl->rc_cursor->rcv_next != NULL) { 2716 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2717 2718 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2719 p, lrctl->rc_cursor->rcv_value)); 2720 2721 } 2722 mutex_exit(&rset->rcs_lock); 2723 2724 return (ret); 2725 } 2726 2727 /* 2728 * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t) 2729 * 2730 * Overview 2731 * Increment the resource associated with the given handle, returning zero if 2732 * the incremented value does not exceed the threshold for the current limit 2733 * on the resource. 2734 * 2735 * Return values 2736 * Actions taken, according to the rctl_test bitmask. 2737 * 2738 * Caller's context 2739 * p_lock held by caller. 2740 */ 2741 /*ARGSUSED*/ 2742 int 2743 rctl_test(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2744 rctl_qty_t incr, uint_t flags) 2745 { 2746 return (rctl_test_entity(rhndl, rset, p, NULL, incr, flags)); 2747 } 2748 2749 int 2750 rctl_test_entity(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2751 rctl_entity_p_t *e, rctl_qty_t incr, uint_t flags) 2752 { 2753 rctl_t *lrctl; 2754 int ret = RCT_NONE; 2755 rctl_entity_p_t e_tmp; 2756 if (p == &p0) { 2757 /* 2758 * We don't enforce rctls on the kernel itself. 2759 */ 2760 return (ret); 2761 } 2762 2763 rctl_test_acquire: 2764 ASSERT(MUTEX_HELD(&p->p_lock)); 2765 2766 mutex_enter(&rset->rcs_lock); 2767 2768 /* 2769 * Dereference from rctl_set. We don't enforce newly loaded controls 2770 * that haven't been set on this entity (since the only valid value is 2771 * the infinite system value). 2772 */ 2773 if (rctl_set_find(rset, rhndl, &lrctl) == -1) { 2774 mutex_exit(&rset->rcs_lock); 2775 return (ret); 2776 } 2777 2778 /* 2779 * This control is currently unenforced: maximal value on control 2780 * supporting infinitely available resource. 2781 */ 2782 if ((lrctl->rc_dict_entry->rcd_flagaction & RCTL_GLOBAL_INFINITE) && 2783 (lrctl->rc_cursor->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) { 2784 2785 mutex_exit(&rset->rcs_lock); 2786 return (ret); 2787 } 2788 2789 /* 2790 * If we have been called by rctl_test, look up the entity pointer 2791 * from the proc pointer. 2792 */ 2793 if (e == NULL) { 2794 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2795 p, &e_tmp); 2796 e = &e_tmp; 2797 } 2798 2799 /* 2800 * Get enforced rctl value and current usage. Test the increment 2801 * with the current usage against the enforced value--take action as 2802 * necessary. 2803 */ 2804 while (RCTLOP_TEST(lrctl, p, e, lrctl->rc_cursor, incr, flags)) { 2805 if ((ret & RCT_LK_ABANDONED) == 0) { 2806 ret |= rctl_global_action(lrctl, rset, p, 2807 lrctl->rc_cursor); 2808 2809 RCTLOP_ACTION(lrctl, p, e); 2810 2811 ret |= rctl_local_action(lrctl, rset, p, 2812 lrctl->rc_cursor, flags); 2813 2814 if (ret & RCT_LK_ABANDONED) 2815 goto rctl_test_acquire; 2816 } 2817 2818 ret &= ~RCT_LK_ABANDONED; 2819 2820 if ((ret & RCT_DENY) == RCT_DENY || 2821 lrctl->rc_cursor->rcv_next == NULL) { 2822 ret |= RCT_DENY; 2823 break; 2824 } 2825 2826 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2827 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2828 p, lrctl->rc_cursor->rcv_value)); 2829 } 2830 2831 mutex_exit(&rset->rcs_lock); 2832 2833 return (ret); 2834 } 2835 2836 /* 2837 * void rctl_init(void) 2838 * 2839 * Overview 2840 * Initialize the rctl subsystem, including the primoridal rctls 2841 * provided by the system. New subsystem-specific rctls should _not_ be 2842 * initialized here. (Do it in your own file.) 2843 * 2844 * Return values 2845 * None. 2846 * 2847 * Caller's context 2848 * Safe for KM_SLEEP allocations. Must be called prior to any process model 2849 * initialization. 2850 */ 2851 void 2852 rctl_init(void) 2853 { 2854 rctl_cache = kmem_cache_create("rctl_cache", sizeof (rctl_t), 2855 0, NULL, NULL, NULL, NULL, NULL, 0); 2856 rctl_val_cache = kmem_cache_create("rctl_val_cache", 2857 sizeof (rctl_val_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 2858 2859 rctl_dict = mod_hash_create_extended("rctl_dict", 2860 rctl_dict_size, mod_hash_null_keydtor, rctl_dict_val_dtor, 2861 rctl_dict_hash_by_id, NULL, rctl_dict_id_cmp, KM_SLEEP); 2862 rctl_dict_by_name = mod_hash_create_strhash( 2863 "rctl_handles_by_name", rctl_dict_size, 2864 mod_hash_null_valdtor); 2865 rctl_ids = id_space_create("rctl_ids", 1, max_rctl_hndl); 2866 bzero(rctl_lists, (RC_MAX_ENTITY + 1) * sizeof (rctl_dict_entry_t *)); 2867 2868 rctlproc_init(); 2869 } 2870 2871 /* 2872 * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2873 * int chargeproc) 2874 * 2875 * Increments the amount of locked memory on a project, and 2876 * zone. If proj is non-NULL the project must be held by the 2877 * caller; if it is NULL the proj and zone of proc_t p are used. 2878 * If chargeproc is non-zero, then the charged amount is cached 2879 * on p->p_locked_mem so that the charge can be migrated when a 2880 * process changes projects. 2881 * 2882 * Return values 2883 * 0 - success 2884 * EAGAIN - attempting to increment locked memory is denied by one 2885 * or more resource entities. 2886 */ 2887 int 2888 rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2889 int chargeproc) 2890 { 2891 kproject_t *projp; 2892 zone_t *zonep; 2893 rctl_entity_p_t e; 2894 int ret = 0; 2895 2896 ASSERT(p != NULL); 2897 ASSERT(MUTEX_HELD(&p->p_lock)); 2898 if (proj != NULL) { 2899 projp = proj; 2900 zonep = proj->kpj_zone; 2901 } else { 2902 projp = p->p_task->tk_proj; 2903 zonep = p->p_zone; 2904 } 2905 2906 mutex_enter(&zonep->zone_mem_lock); 2907 2908 e.rcep_p.proj = projp; 2909 e.rcep_t = RCENTITY_PROJECT; 2910 2911 /* check for overflow */ 2912 if ((projp->kpj_data.kpd_locked_mem + inc) < 2913 projp->kpj_data.kpd_locked_mem) { 2914 ret = EAGAIN; 2915 goto out; 2916 } 2917 if (projp->kpj_data.kpd_locked_mem + inc > 2918 projp->kpj_data.kpd_locked_mem_ctl) { 2919 if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls, 2920 p, &e, inc, 0) & RCT_DENY) { 2921 ret = EAGAIN; 2922 goto out; 2923 } 2924 } 2925 e.rcep_p.zone = zonep; 2926 e.rcep_t = RCENTITY_ZONE; 2927 2928 /* Check for overflow */ 2929 if ((zonep->zone_locked_mem + inc) < zonep->zone_locked_mem) { 2930 ret = EAGAIN; 2931 goto out; 2932 } 2933 if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) { 2934 if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls, 2935 p, &e, inc, 0) & RCT_DENY) { 2936 ret = EAGAIN; 2937 goto out; 2938 } 2939 } 2940 2941 zonep->zone_locked_mem += inc; 2942 projp->kpj_data.kpd_locked_mem += inc; 2943 if (chargeproc != 0) { 2944 p->p_locked_mem += inc; 2945 } 2946 out: 2947 mutex_exit(&zonep->zone_mem_lock); 2948 return (ret); 2949 } 2950 2951 /* 2952 * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2953 * int creditproc) 2954 * 2955 * Decrements the amount of locked memory on a project and 2956 * zone. If proj is non-NULL the project must be held by the 2957 * caller; if it is NULL the proj and zone of proc_t p are used. 2958 * If creditproc is non-zero, then the quantity of locked memory 2959 * is subtracted from p->p_locked_mem. 2960 * 2961 * Return values 2962 * none 2963 */ 2964 void 2965 rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2966 int creditproc) 2967 { 2968 kproject_t *projp; 2969 zone_t *zonep; 2970 2971 if (proj != NULL) { 2972 projp = proj; 2973 zonep = proj->kpj_zone; 2974 } else { 2975 ASSERT(p != NULL); 2976 ASSERT(MUTEX_HELD(&p->p_lock)); 2977 projp = p->p_task->tk_proj; 2978 zonep = p->p_zone; 2979 } 2980 2981 mutex_enter(&zonep->zone_mem_lock); 2982 zonep->zone_locked_mem -= inc; 2983 projp->kpj_data.kpd_locked_mem -= inc; 2984 if (creditproc != 0) { 2985 ASSERT(p != NULL); 2986 ASSERT(MUTEX_HELD(&p->p_lock)); 2987 p->p_locked_mem -= inc; 2988 } 2989 mutex_exit(&zonep->zone_mem_lock); 2990 } 2991 2992 /* 2993 * rctl_incr_swap(proc_t *, zone_t *, size_t) 2994 * 2995 * Overview 2996 * Increments the swap charge on the specified zone. 2997 * 2998 * Return values 2999 * 0 on success. EAGAIN if swap increment fails due an rctl value 3000 * on the zone. 3001 * 3002 * Callers context 3003 * p_lock held on specified proc. 3004 * swap must be even multiple of PAGESIZE 3005 */ 3006 int 3007 rctl_incr_swap(proc_t *proc, zone_t *zone, size_t swap) 3008 { 3009 rctl_entity_p_t e; 3010 3011 ASSERT(MUTEX_HELD(&proc->p_lock)); 3012 ASSERT((swap & PAGEOFFSET) == 0); 3013 e.rcep_p.zone = zone; 3014 e.rcep_t = RCENTITY_ZONE; 3015 3016 mutex_enter(&zone->zone_mem_lock); 3017 3018 /* Check for overflow */ 3019 if ((zone->zone_max_swap + swap) < zone->zone_max_swap) { 3020 mutex_exit(&zone->zone_mem_lock); 3021 return (EAGAIN); 3022 } 3023 if ((zone->zone_max_swap + swap) > 3024 zone->zone_max_swap_ctl) { 3025 3026 if (rctl_test_entity(rc_zone_max_swap, zone->zone_rctls, 3027 proc, &e, swap, 0) & RCT_DENY) { 3028 mutex_exit(&zone->zone_mem_lock); 3029 return (EAGAIN); 3030 } 3031 } 3032 zone->zone_max_swap += swap; 3033 mutex_exit(&zone->zone_mem_lock); 3034 return (0); 3035 } 3036 3037 /* 3038 * rctl_decr_swap(zone_t *, size_t) 3039 * 3040 * Overview 3041 * Decrements the swap charge on the specified zone. 3042 * 3043 * Return values 3044 * None 3045 * 3046 * Callers context 3047 * swap must be even multiple of PAGESIZE 3048 */ 3049 void 3050 rctl_decr_swap(zone_t *zone, size_t swap) 3051 { 3052 ASSERT((swap & PAGEOFFSET) == 0); 3053 mutex_enter(&zone->zone_mem_lock); 3054 ASSERT(zone->zone_max_swap >= swap); 3055 zone->zone_max_swap -= swap; 3056 mutex_exit(&zone->zone_mem_lock); 3057 } 3058 3059 /* 3060 * rctl_incr_lofi(proc_t *, zone_t *, size_t) 3061 * 3062 * Overview 3063 * Increments the number of lofi devices for the zone. 3064 * 3065 * Return values 3066 * 0 on success. EAGAIN if increment fails due an rctl value 3067 * on the zone. 3068 * 3069 * Callers context 3070 * p_lock held on specified proc. 3071 */ 3072 int 3073 rctl_incr_lofi(proc_t *proc, zone_t *zone, size_t incr) 3074 { 3075 rctl_entity_p_t e; 3076 3077 ASSERT(MUTEX_HELD(&proc->p_lock)); 3078 ASSERT(incr > 0); 3079 3080 e.rcep_p.zone = zone; 3081 e.rcep_t = RCENTITY_ZONE; 3082 3083 mutex_enter(&zone->zone_rctl_lock); 3084 3085 /* Check for overflow */ 3086 if ((zone->zone_max_lofi + incr) < zone->zone_max_lofi) { 3087 mutex_exit(&zone->zone_rctl_lock); 3088 return (EAGAIN); 3089 } 3090 if ((zone->zone_max_lofi + incr) > zone->zone_max_lofi_ctl) { 3091 if (rctl_test_entity(rc_zone_max_lofi, zone->zone_rctls, 3092 proc, &e, incr, 0) & RCT_DENY) { 3093 mutex_exit(&zone->zone_rctl_lock); 3094 return (EAGAIN); 3095 } 3096 } 3097 zone->zone_max_lofi += incr; 3098 mutex_exit(&zone->zone_rctl_lock); 3099 return (0); 3100 } 3101 3102 /* 3103 * rctl_decr_lofi(zone_t *, size_t) 3104 * 3105 * Overview 3106 * Decrements the number of lofi devices for the zone. 3107 */ 3108 void 3109 rctl_decr_lofi(zone_t *zone, size_t decr) 3110 { 3111 mutex_enter(&zone->zone_rctl_lock); 3112 ASSERT(zone->zone_max_lofi >= decr); 3113 zone->zone_max_lofi -= decr; 3114 mutex_exit(&zone->zone_rctl_lock); 3115 } 3116 3117 /* 3118 * Create resource kstat 3119 */ 3120 static kstat_t * 3121 rctl_kstat_create_common(char *ks_name, int ks_instance, char *ks_class, 3122 uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, int ks_zoneid) 3123 { 3124 kstat_t *ksp = NULL; 3125 char name[KSTAT_STRLEN]; 3126 3127 (void) snprintf(name, KSTAT_STRLEN, "%s_%d", ks_name, ks_instance); 3128 3129 if ((ksp = kstat_create_zone("caps", ks_zoneid, 3130 name, ks_class, ks_type, 3131 ks_ndata, ks_flags, ks_zoneid)) != NULL) { 3132 if (ks_zoneid != GLOBAL_ZONEID) 3133 kstat_zone_add(ksp, GLOBAL_ZONEID); 3134 } 3135 return (ksp); 3136 } 3137 3138 /* 3139 * Create zone-specific resource kstat 3140 */ 3141 kstat_t * 3142 rctl_kstat_create_zone(zone_t *zone, char *ks_name, uchar_t ks_type, 3143 uint_t ks_ndata, uchar_t ks_flags) 3144 { 3145 char name[KSTAT_STRLEN]; 3146 3147 (void) snprintf(name, KSTAT_STRLEN, "%s_zone", ks_name); 3148 3149 return (rctl_kstat_create_common(name, zone->zone_id, "zone_caps", 3150 ks_type, ks_ndata, ks_flags, zone->zone_id)); 3151 } 3152 3153 /* 3154 * Create project-specific resource kstat 3155 */ 3156 kstat_t * 3157 rctl_kstat_create_project(kproject_t *kpj, char *ks_name, uchar_t ks_type, 3158 uint_t ks_ndata, uchar_t ks_flags) 3159 { 3160 char name[KSTAT_STRLEN]; 3161 3162 (void) snprintf(name, KSTAT_STRLEN, "%s_project", ks_name); 3163 3164 return (rctl_kstat_create_common(name, kpj->kpj_id, "project_caps", 3165 ks_type, ks_ndata, ks_flags, kpj->kpj_zoneid)); 3166 } 3167