1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2017 by Delphix. All rights reserved. 27 */ 28 29 /* 30 * Contracts 31 * --------- 32 * 33 * Contracts are a primitive which enrich the relationships between 34 * processes and system resources. The primary purpose of contracts is 35 * to provide a means for the system to negotiate the departure from a 36 * binding relationship (e.g. pages locked in memory or a thread bound 37 * to processor), but they can also be used as a purely asynchronous 38 * error reporting mechanism as they are with process contracts. 39 * 40 * More information on how one interfaces with contracts and what 41 * contracts can do for you can be found in: 42 * PSARC 2003/193 Solaris Contracts 43 * PSARC 2004/460 Contracts addendum 44 * 45 * This file contains the core contracts framework. By itself it is 46 * useless: it depends the contracts filesystem (ctfs) to provide an 47 * interface to user processes and individual contract types to 48 * implement the process/resource relationships. 49 * 50 * Data structure overview 51 * ----------------------- 52 * 53 * A contract is represented by a contract_t, which itself points to an 54 * encapsulating contract-type specific contract object. A contract_t 55 * contains the contract's static identity (including its terms), its 56 * linkage to various bookkeeping structures, the contract-specific 57 * event queue, and a reference count. 58 * 59 * A contract template is represented by a ct_template_t, which, like a 60 * contract, points to an encapsulating contract-type specific template 61 * object. A ct_template_t contains the template's terms. 62 * 63 * An event queue is represented by a ct_equeue_t, and consists of a 64 * list of events, a list of listeners, and a list of listeners who are 65 * waiting for new events (affectionately referred to as "tail 66 * listeners"). There are three queue types, defined by ct_listnum_t 67 * (an enum). An event may be on one of each type of queue 68 * simultaneously; the list linkage used by a queue is determined by 69 * its type. 70 * 71 * An event is represented by a ct_kevent_t, which contains mostly 72 * static event data (e.g. id, payload). It also has an array of 73 * ct_member_t structures, each of which contains a list_node_t and 74 * represent the event's linkage in a specific event queue. 75 * 76 * Each open of an event endpoint results in the creation of a new 77 * listener, represented by a ct_listener_t. In addition to linkage 78 * into the aforementioned lists in the event_queue, a ct_listener_t 79 * contains a pointer to the ct_kevent_t it is currently positioned at 80 * as well as a set of status flags and other administrative data. 81 * 82 * Each process has a list of contracts it owns, p_ct_held; a pointer 83 * to the process contract it is a member of, p_ct_process; the linkage 84 * for that membership, p_ct_member; and an array of event queue 85 * structures representing the process bundle queues. 86 * 87 * Each LWP has an array of its active templates, lwp_ct_active; and 88 * the most recently created contracts, lwp_ct_latest. 89 * 90 * A process contract has a list of member processes and a list of 91 * inherited contracts. 92 * 93 * There is a system-wide list of all contracts, as well as per-type 94 * lists of contracts. 95 * 96 * Lock ordering overview 97 * ---------------------- 98 * 99 * Locks at the top are taken first: 100 * 101 * ct_evtlock 102 * regent ct_lock 103 * member ct_lock 104 * pidlock 105 * p_lock 106 * contract ctq_lock contract_lock 107 * pbundle ctq_lock 108 * cte_lock 109 * ct_reflock 110 * 111 * contract_lock and ctq_lock/cte_lock are not currently taken at the 112 * same time. 113 * 114 * Reference counting and locking 115 * ------------------------------ 116 * 117 * A contract has a reference count, protected by ct_reflock. 118 * (ct_reflock is also used in a couple other places where atomic 119 * access to a variable is needed in an innermost context). A process 120 * maintains a hold on each contract it owns. A process contract has a 121 * hold on each contract is has inherited. Each event has a hold on 122 * the contract which generated it. Process contract templates have 123 * holds on the contracts referred to by their transfer terms. CTFS 124 * contract directory nodes have holds on contracts. Lastly, various 125 * code paths may temporarily take holds on contracts to prevent them 126 * from disappearing while other processing is going on. It is 127 * important to note that the global contract lists do not hold 128 * references on contracts; a contract is removed from these structures 129 * atomically with the release of its last reference. 130 * 131 * At a given point in time, a contract can either be owned by a 132 * process, inherited by a regent process contract, or orphaned. A 133 * contract_t's owner and regent pointers, ct_owner and ct_regent, are 134 * protected by its ct_lock. The linkage in the holder's (holder = 135 * owner or regent) list of contracts, ct_ctlist, is protected by 136 * whatever lock protects the holder's data structure. In order for 137 * these two directions to remain consistent, changing the holder of a 138 * contract requires that both locks be held. 139 * 140 * Events also have reference counts. There is one hold on an event 141 * per queue it is present on, in addition to those needed for the 142 * usual sundry reasons. Individual listeners are associated with 143 * specific queues, and increase a queue-specific reference count 144 * stored in the ct_member_t structure. 145 * 146 * The dynamic contents of an event (reference count and flags) are 147 * protected by its cte_lock, while the contents of the embedded 148 * ct_member_t structures are protected by the locks of the queues they 149 * are linked into. A ct_listener_t's contents are also protected by 150 * its event queue's ctq_lock. 151 * 152 * Resource controls 153 * ----------------- 154 * 155 * Control: project.max-contracts (rc_project_contract) 156 * Description: Maximum number of contracts allowed a project. 157 * 158 * When a contract is created, the project's allocation is tested and 159 * (assuming success) increased. When the last reference to a 160 * contract is released, the creating project's allocation is 161 * decreased. 162 */ 163 164 #include <sys/mutex.h> 165 #include <sys/debug.h> 166 #include <sys/types.h> 167 #include <sys/param.h> 168 #include <sys/kmem.h> 169 #include <sys/thread.h> 170 #include <sys/id_space.h> 171 #include <sys/avl.h> 172 #include <sys/list.h> 173 #include <sys/sysmacros.h> 174 #include <sys/proc.h> 175 #include <sys/ctfs.h> 176 #include <sys/contract_impl.h> 177 #include <sys/contract/process_impl.h> 178 #include <sys/dditypes.h> 179 #include <sys/contract/device_impl.h> 180 #include <sys/systm.h> 181 #include <sys/atomic.h> 182 #include <sys/cmn_err.h> 183 #include <sys/model.h> 184 #include <sys/policy.h> 185 #include <sys/zone.h> 186 #include <sys/task.h> 187 #include <sys/ddi.h> 188 #include <sys/sunddi.h> 189 190 extern rctl_hndl_t rc_project_contract; 191 192 static id_space_t *contract_ids; 193 static avl_tree_t contract_avl; 194 static kmutex_t contract_lock; 195 196 int ct_ntypes = CTT_MAXTYPE; 197 static ct_type_t *ct_types_static[CTT_MAXTYPE]; 198 ct_type_t **ct_types = ct_types_static; 199 int ct_debug; 200 201 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int); 202 static void cte_queue_destroy(ct_equeue_t *); 203 static void cte_queue_drain(ct_equeue_t *, int); 204 static void cte_trim(ct_equeue_t *, contract_t *); 205 static void cte_copy(ct_equeue_t *, ct_equeue_t *); 206 207 /* 208 * contract_compar 209 * 210 * A contract comparator which sorts on contract ID. 211 */ 212 int 213 contract_compar(const void *x, const void *y) 214 { 215 const contract_t *ct1 = x; 216 const contract_t *ct2 = y; 217 218 if (ct1->ct_id < ct2->ct_id) 219 return (-1); 220 if (ct1->ct_id > ct2->ct_id) 221 return (1); 222 return (0); 223 } 224 225 /* 226 * contract_init 227 * 228 * Initializes the contract subsystem, the specific contract types, and 229 * process 0. 230 */ 231 void 232 contract_init(void) 233 { 234 /* 235 * Initialize contract subsystem. 236 */ 237 contract_ids = id_space_create("contracts", 1, INT_MAX); 238 avl_create(&contract_avl, contract_compar, sizeof (contract_t), 239 offsetof(contract_t, ct_ctavl)); 240 mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL); 241 242 /* 243 * Initialize contract types. 244 */ 245 contract_process_init(); 246 contract_device_init(); 247 248 /* 249 * Initialize p0/lwp0 contract state. 250 */ 251 avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t), 252 offsetof(contract_t, ct_ctlist)); 253 } 254 255 /* 256 * contract_dtor 257 * 258 * Performs basic destruction of the common portions of a contract. 259 * Called from the failure path of contract_ctor and from 260 * contract_rele. 261 */ 262 static void 263 contract_dtor(contract_t *ct) 264 { 265 cte_queue_destroy(&ct->ct_events); 266 list_destroy(&ct->ct_vnodes); 267 mutex_destroy(&ct->ct_reflock); 268 mutex_destroy(&ct->ct_lock); 269 mutex_destroy(&ct->ct_evtlock); 270 } 271 272 /* 273 * contract_ctor 274 * 275 * Called by a contract type to initialize a contract. Fails if the 276 * max-contract resource control would have been exceeded. After a 277 * successful call to contract_ctor, the contract is unlocked and 278 * visible in all namespaces; any type-specific initialization should 279 * be completed before calling contract_ctor. Returns 0 on success. 280 * 281 * Because not all callers can tolerate failure, a 0 value for canfail 282 * instructs contract_ctor to ignore the project.max-contracts resource 283 * control. Obviously, this "out" should only be employed by callers 284 * who are sufficiently constrained in other ways (e.g. newproc). 285 */ 286 int 287 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data, 288 ctflags_t flags, proc_t *author, int canfail) 289 { 290 avl_index_t where; 291 klwp_t *curlwp = ttolwp(curthread); 292 293 ASSERT(author == curproc); 294 295 mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL); 296 mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL); 297 mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL); 298 ct->ct_id = id_alloc(contract_ids); 299 300 cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0); 301 list_create(&ct->ct_vnodes, sizeof (contract_vnode_t), 302 offsetof(contract_vnode_t, ctv_node)); 303 304 /* 305 * Instance data 306 */ 307 ct->ct_ref = 2; /* one for the holder, one for "latest" */ 308 ct->ct_cuid = crgetuid(CRED()); 309 ct->ct_type = type; 310 ct->ct_data = data; 311 gethrestime(&ct->ct_ctime); 312 ct->ct_state = CTS_OWNED; 313 ct->ct_flags = flags; 314 ct->ct_regent = author->p_ct_process ? 315 &author->p_ct_process->conp_contract : NULL; 316 ct->ct_ev_info = tmpl->ctmpl_ev_info; 317 ct->ct_ev_crit = tmpl->ctmpl_ev_crit; 318 ct->ct_cookie = tmpl->ctmpl_cookie; 319 ct->ct_owner = author; 320 ct->ct_ntime.ctm_total = -1; 321 ct->ct_qtime.ctm_total = -1; 322 ct->ct_nevent = NULL; 323 324 /* 325 * Test project.max-contracts. 326 */ 327 mutex_enter(&author->p_lock); 328 mutex_enter(&contract_lock); 329 if (canfail && rctl_test(rc_project_contract, 330 author->p_task->tk_proj->kpj_rctls, author, 1, 331 RCA_SAFE) & RCT_DENY) { 332 id_free(contract_ids, ct->ct_id); 333 mutex_exit(&contract_lock); 334 mutex_exit(&author->p_lock); 335 ct->ct_events.ctq_flags |= CTQ_DEAD; 336 contract_dtor(ct); 337 return (1); 338 } 339 ct->ct_proj = author->p_task->tk_proj; 340 ct->ct_proj->kpj_data.kpd_contract++; 341 (void) project_hold(ct->ct_proj); 342 mutex_exit(&contract_lock); 343 344 /* 345 * Insert into holder's avl of contracts. 346 * We use an avl not because order is important, but because 347 * readdir of /proc/contracts requires we be able to use a 348 * scalar as an index into the process's list of contracts 349 */ 350 ct->ct_zoneid = author->p_zone->zone_id; 351 ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid; 352 VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL); 353 avl_insert(&author->p_ct_held, ct, where); 354 mutex_exit(&author->p_lock); 355 356 /* 357 * Insert into global contract AVL 358 */ 359 mutex_enter(&contract_lock); 360 VERIFY(avl_find(&contract_avl, ct, &where) == NULL); 361 avl_insert(&contract_avl, ct, where); 362 mutex_exit(&contract_lock); 363 364 /* 365 * Insert into type AVL 366 */ 367 mutex_enter(&type->ct_type_lock); 368 VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL); 369 avl_insert(&type->ct_type_avl, ct, where); 370 type->ct_type_timestruc = ct->ct_ctime; 371 mutex_exit(&type->ct_type_lock); 372 373 if (curlwp->lwp_ct_latest[type->ct_type_index]) 374 contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]); 375 curlwp->lwp_ct_latest[type->ct_type_index] = ct; 376 377 return (0); 378 } 379 380 /* 381 * contract_rele 382 * 383 * Releases a reference to a contract. If the caller had the last 384 * reference, the contract is removed from all namespaces, its 385 * allocation against the max-contracts resource control is released, 386 * and the contract type's free entry point is invoked for any 387 * type-specific deconstruction and to (presumably) free the object. 388 */ 389 void 390 contract_rele(contract_t *ct) 391 { 392 uint64_t nref; 393 394 mutex_enter(&ct->ct_reflock); 395 ASSERT(ct->ct_ref > 0); 396 nref = --ct->ct_ref; 397 mutex_exit(&ct->ct_reflock); 398 if (nref == 0) { 399 /* 400 * ct_owner is cleared when it drops its reference. 401 */ 402 ASSERT(ct->ct_owner == NULL); 403 ASSERT(ct->ct_evcnt == 0); 404 405 /* 406 * Remove from global contract AVL 407 */ 408 mutex_enter(&contract_lock); 409 avl_remove(&contract_avl, ct); 410 mutex_exit(&contract_lock); 411 412 /* 413 * Remove from type AVL 414 */ 415 mutex_enter(&ct->ct_type->ct_type_lock); 416 avl_remove(&ct->ct_type->ct_type_avl, ct); 417 mutex_exit(&ct->ct_type->ct_type_lock); 418 419 /* 420 * Release the contract's ID 421 */ 422 id_free(contract_ids, ct->ct_id); 423 424 /* 425 * Release project hold 426 */ 427 mutex_enter(&contract_lock); 428 ct->ct_proj->kpj_data.kpd_contract--; 429 project_rele(ct->ct_proj); 430 mutex_exit(&contract_lock); 431 432 /* 433 * Free the contract 434 */ 435 contract_dtor(ct); 436 ct->ct_type->ct_type_ops->contop_free(ct); 437 } 438 } 439 440 /* 441 * contract_hold 442 * 443 * Adds a reference to a contract 444 */ 445 void 446 contract_hold(contract_t *ct) 447 { 448 mutex_enter(&ct->ct_reflock); 449 ASSERT(ct->ct_ref < UINT64_MAX); 450 ct->ct_ref++; 451 mutex_exit(&ct->ct_reflock); 452 } 453 454 /* 455 * contract_getzuniqid 456 * 457 * Get a contract's zone unique ID. Needed because 64-bit reads and 458 * writes aren't atomic on x86. Since there are contexts where we are 459 * unable to take ct_lock, we instead use ct_reflock; in actuality any 460 * lock would do. 461 */ 462 uint64_t 463 contract_getzuniqid(contract_t *ct) 464 { 465 uint64_t zuniqid; 466 467 mutex_enter(&ct->ct_reflock); 468 zuniqid = ct->ct_mzuniqid; 469 mutex_exit(&ct->ct_reflock); 470 471 return (zuniqid); 472 } 473 474 /* 475 * contract_setzuniqid 476 * 477 * Sets a contract's zone unique ID. See contract_getzuniqid. 478 */ 479 void 480 contract_setzuniqid(contract_t *ct, uint64_t zuniqid) 481 { 482 mutex_enter(&ct->ct_reflock); 483 ct->ct_mzuniqid = zuniqid; 484 mutex_exit(&ct->ct_reflock); 485 } 486 487 /* 488 * contract_abandon 489 * 490 * Abandons the specified contract. If "explicit" is clear, the 491 * contract was implicitly abandoned (by process exit) and should be 492 * inherited if its terms allow it and its owner was a member of a 493 * regent contract. Otherwise, the contract type's abandon entry point 494 * is invoked to either destroy or orphan the contract. 495 */ 496 int 497 contract_abandon(contract_t *ct, proc_t *p, int explicit) 498 { 499 ct_equeue_t *q = NULL; 500 contract_t *parent = &p->p_ct_process->conp_contract; 501 int inherit = 0; 502 503 VERIFY(p == curproc); 504 505 mutex_enter(&ct->ct_lock); 506 507 /* 508 * Multiple contract locks are taken contract -> subcontract. 509 * Check if the contract will be inherited so we can acquire 510 * all the necessary locks before making sensitive changes. 511 */ 512 if (!explicit && (ct->ct_flags & CTF_INHERIT) && 513 contract_process_accept(parent)) { 514 mutex_exit(&ct->ct_lock); 515 mutex_enter(&parent->ct_lock); 516 mutex_enter(&ct->ct_lock); 517 inherit = 1; 518 } 519 520 if (ct->ct_owner != p) { 521 mutex_exit(&ct->ct_lock); 522 if (inherit) 523 mutex_exit(&parent->ct_lock); 524 return (EINVAL); 525 } 526 527 mutex_enter(&p->p_lock); 528 if (explicit) 529 avl_remove(&p->p_ct_held, ct); 530 ct->ct_owner = NULL; 531 mutex_exit(&p->p_lock); 532 533 /* 534 * Since we can't call cte_trim with the contract lock held, 535 * we grab the queue pointer here. 536 */ 537 if (p->p_ct_equeue) 538 q = p->p_ct_equeue[ct->ct_type->ct_type_index]; 539 540 /* 541 * contop_abandon may destroy the contract so we rely on it to 542 * drop ct_lock. We retain a reference on the contract so that 543 * the cte_trim which follows functions properly. Even though 544 * cte_trim doesn't dereference the contract pointer, it is 545 * still necessary to retain a reference to the contract so 546 * that we don't trim events which are sent by a subsequently 547 * allocated contract infortuitously located at the same address. 548 */ 549 contract_hold(ct); 550 551 if (inherit) { 552 ct->ct_state = CTS_INHERITED; 553 VERIFY(ct->ct_regent == parent); 554 contract_process_take(parent, ct); 555 556 /* 557 * We are handing off the process's reference to the 558 * parent contract. For this reason, the order in 559 * which we drop the contract locks is also important. 560 */ 561 mutex_exit(&ct->ct_lock); 562 mutex_exit(&parent->ct_lock); 563 } else { 564 ct->ct_regent = NULL; 565 ct->ct_type->ct_type_ops->contop_abandon(ct); 566 } 567 568 /* 569 * ct_lock has been dropped; we can safely trim the event 570 * queue now. 571 */ 572 if (q) { 573 mutex_enter(&q->ctq_lock); 574 cte_trim(q, ct); 575 mutex_exit(&q->ctq_lock); 576 } 577 578 contract_rele(ct); 579 580 return (0); 581 } 582 583 int 584 contract_newct(contract_t *ct) 585 { 586 return (ct->ct_type->ct_type_ops->contop_newct(ct)); 587 } 588 589 /* 590 * contract_adopt 591 * 592 * Adopts a contract. After a successful call to this routine, the 593 * previously inherited contract will belong to the calling process, 594 * and its events will have been appended to its new owner's process 595 * bundle queue. 596 */ 597 int 598 contract_adopt(contract_t *ct, proc_t *p) 599 { 600 avl_index_t where; 601 ct_equeue_t *q; 602 contract_t *parent; 603 604 ASSERT(p == curproc); 605 606 /* 607 * Ensure the process has an event queue. Checked by ASSERTs 608 * below. 609 */ 610 (void) contract_type_pbundle(ct->ct_type, p); 611 612 mutex_enter(&ct->ct_lock); 613 parent = ct->ct_regent; 614 if (ct->ct_state != CTS_INHERITED || 615 &p->p_ct_process->conp_contract != parent || 616 p->p_zone->zone_uniqid != ct->ct_czuniqid) { 617 mutex_exit(&ct->ct_lock); 618 return (EINVAL); 619 } 620 621 /* 622 * Multiple contract locks are taken contract -> subcontract. 623 */ 624 mutex_exit(&ct->ct_lock); 625 mutex_enter(&parent->ct_lock); 626 mutex_enter(&ct->ct_lock); 627 628 /* 629 * It is possible that the contract was adopted by someone else 630 * while its lock was dropped. It isn't possible for the 631 * contract to have been inherited by a different regent 632 * contract. 633 */ 634 if (ct->ct_state != CTS_INHERITED) { 635 mutex_exit(&parent->ct_lock); 636 mutex_exit(&ct->ct_lock); 637 return (EBUSY); 638 } 639 ASSERT(ct->ct_regent == parent); 640 641 ct->ct_state = CTS_OWNED; 642 643 contract_process_adopt(ct, p); 644 645 mutex_enter(&p->p_lock); 646 ct->ct_owner = p; 647 VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL); 648 avl_insert(&p->p_ct_held, ct, where); 649 mutex_exit(&p->p_lock); 650 651 ASSERT(ct->ct_owner->p_ct_equeue); 652 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]); 653 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]; 654 cte_copy(&ct->ct_events, q); 655 mutex_exit(&ct->ct_lock); 656 657 return (0); 658 } 659 660 /* 661 * contract_ack 662 * 663 * Acknowledges receipt of a critical event. 664 */ 665 int 666 contract_ack(contract_t *ct, uint64_t evid, int ack) 667 { 668 ct_kevent_t *ev; 669 list_t *queue = &ct->ct_events.ctq_events; 670 int error = ESRCH; 671 int nego = 0; 672 uint_t evtype; 673 674 ASSERT(ack == CT_ACK || ack == CT_NACK); 675 676 mutex_enter(&ct->ct_lock); 677 mutex_enter(&ct->ct_events.ctq_lock); 678 /* 679 * We are probably ACKing something near the head of the queue. 680 */ 681 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 682 if (ev->cte_id == evid) { 683 if (ev->cte_flags & CTE_NEG) 684 nego = 1; 685 else if (ack == CT_NACK) 686 break; 687 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 688 ev->cte_flags |= CTE_ACK; 689 ct->ct_evcnt--; 690 evtype = ev->cte_type; 691 error = 0; 692 } 693 break; 694 } 695 } 696 mutex_exit(&ct->ct_events.ctq_lock); 697 mutex_exit(&ct->ct_lock); 698 699 /* 700 * Not all critical events are negotiation events, however 701 * every negotiation event is a critical event. NEGEND events 702 * are critical events but are not negotiation events 703 */ 704 if (error || !nego) 705 return (error); 706 707 if (ack == CT_ACK) 708 error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid); 709 else 710 error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid); 711 712 return (error); 713 } 714 715 /*ARGSUSED*/ 716 int 717 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid) 718 { 719 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u", 720 ct->ct_id); 721 return (ENOSYS); 722 } 723 724 /*ARGSUSED*/ 725 int 726 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid) 727 { 728 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u", 729 ct->ct_id); 730 return (ENOSYS); 731 } 732 733 /*ARGSUSED*/ 734 int 735 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid) 736 { 737 return (ERANGE); 738 } 739 740 /* 741 * contract_qack 742 * 743 * Asks that negotiations be extended by another time quantum 744 */ 745 int 746 contract_qack(contract_t *ct, uint64_t evid) 747 { 748 ct_kevent_t *ev; 749 list_t *queue = &ct->ct_events.ctq_events; 750 int nego = 0; 751 uint_t evtype; 752 753 mutex_enter(&ct->ct_lock); 754 mutex_enter(&ct->ct_events.ctq_lock); 755 756 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 757 if (ev->cte_id == evid) { 758 if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) { 759 evtype = ev->cte_type; 760 nego = 1; 761 } 762 break; 763 } 764 } 765 mutex_exit(&ct->ct_events.ctq_lock); 766 mutex_exit(&ct->ct_lock); 767 768 /* 769 * Only a negotiated event (which is by definition also a critical 770 * event) which has not yet been acknowledged can provide 771 * time quanta to a negotiating owner process. 772 */ 773 if (!nego) 774 return (ESRCH); 775 776 return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid)); 777 } 778 779 /* 780 * contract_orphan 781 * 782 * Icky-poo. This is a process-contract special, used to ACK all 783 * critical messages when a contract is orphaned. 784 */ 785 void 786 contract_orphan(contract_t *ct) 787 { 788 ct_kevent_t *ev; 789 list_t *queue = &ct->ct_events.ctq_events; 790 791 ASSERT(MUTEX_HELD(&ct->ct_lock)); 792 ASSERT(ct->ct_state != CTS_ORPHAN); 793 794 mutex_enter(&ct->ct_events.ctq_lock); 795 ct->ct_state = CTS_ORPHAN; 796 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 797 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 798 ev->cte_flags |= CTE_ACK; 799 ct->ct_evcnt--; 800 } 801 } 802 mutex_exit(&ct->ct_events.ctq_lock); 803 804 ASSERT(ct->ct_evcnt == 0); 805 } 806 807 /* 808 * contract_destroy 809 * 810 * Explicit contract destruction. Called when contract is empty. 811 * The contract will actually stick around until all of its events are 812 * removed from the bundle and and process bundle queues, and all fds 813 * which refer to it are closed. See contract_dtor if you are looking 814 * for what destroys the contract structure. 815 */ 816 void 817 contract_destroy(contract_t *ct) 818 { 819 ASSERT(MUTEX_HELD(&ct->ct_lock)); 820 ASSERT(ct->ct_state != CTS_DEAD); 821 ASSERT(ct->ct_owner == NULL); 822 823 ct->ct_state = CTS_DEAD; 824 cte_queue_drain(&ct->ct_events, 1); 825 mutex_exit(&ct->ct_lock); 826 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock); 827 cte_trim(&ct->ct_type->ct_type_events, ct); 828 mutex_exit(&ct->ct_type->ct_type_events.ctq_lock); 829 mutex_enter(&ct->ct_lock); 830 ct->ct_type->ct_type_ops->contop_destroy(ct); 831 mutex_exit(&ct->ct_lock); 832 contract_rele(ct); 833 } 834 835 /* 836 * contract_vnode_get 837 * 838 * Obtains the contract directory vnode for this contract, if there is 839 * one. The caller must VN_RELE the vnode when they are through using 840 * it. 841 */ 842 vnode_t * 843 contract_vnode_get(contract_t *ct, vfs_t *vfsp) 844 { 845 contract_vnode_t *ctv; 846 vnode_t *vp = NULL; 847 848 mutex_enter(&ct->ct_lock); 849 for (ctv = list_head(&ct->ct_vnodes); ctv != NULL; 850 ctv = list_next(&ct->ct_vnodes, ctv)) 851 if (ctv->ctv_vnode->v_vfsp == vfsp) { 852 vp = ctv->ctv_vnode; 853 VN_HOLD(vp); 854 break; 855 } 856 mutex_exit(&ct->ct_lock); 857 return (vp); 858 } 859 860 /* 861 * contract_vnode_set 862 * 863 * Sets the contract directory vnode for this contract. We don't hold 864 * a reference on the vnode because we don't want to prevent it from 865 * being freed. The vnode's inactive entry point will take care of 866 * notifying us when it should be removed. 867 */ 868 void 869 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode) 870 { 871 mutex_enter(&ct->ct_lock); 872 ctv->ctv_vnode = vnode; 873 list_insert_head(&ct->ct_vnodes, ctv); 874 mutex_exit(&ct->ct_lock); 875 } 876 877 /* 878 * contract_vnode_clear 879 * 880 * Removes this vnode as the contract directory vnode for this 881 * contract. Called from a contract directory's inactive entry point, 882 * this may return 0 indicating that the vnode gained another reference 883 * because of a simultaneous call to contract_vnode_get. 884 */ 885 int 886 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv) 887 { 888 vnode_t *vp = ctv->ctv_vnode; 889 int result; 890 891 mutex_enter(&ct->ct_lock); 892 mutex_enter(&vp->v_lock); 893 if (vp->v_count == 1) { 894 list_remove(&ct->ct_vnodes, ctv); 895 result = 1; 896 } else { 897 VN_RELE_LOCKED(vp); 898 result = 0; 899 } 900 mutex_exit(&vp->v_lock); 901 mutex_exit(&ct->ct_lock); 902 903 return (result); 904 } 905 906 /* 907 * contract_exit 908 * 909 * Abandons all contracts held by process p, and drains process p's 910 * bundle queues. Called on process exit. 911 */ 912 void 913 contract_exit(proc_t *p) 914 { 915 contract_t *ct; 916 void *cookie = NULL; 917 int i; 918 919 ASSERT(p == curproc); 920 921 /* 922 * Abandon held contracts. contract_abandon knows enough not 923 * to remove the contract from the list a second time. We are 924 * exiting, so no locks are needed here. But because 925 * contract_abandon will take p_lock, we need to make sure we 926 * aren't holding it. 927 */ 928 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 929 while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL) 930 VERIFY(contract_abandon(ct, p, 0) == 0); 931 932 /* 933 * Drain pbundles. Because a process bundle queue could have 934 * been passed to another process, they may not be freed right 935 * away. 936 */ 937 if (p->p_ct_equeue) { 938 for (i = 0; i < CTT_MAXTYPE; i++) 939 if (p->p_ct_equeue[i]) 940 cte_queue_drain(p->p_ct_equeue[i], 0); 941 kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *)); 942 } 943 } 944 945 static int 946 get_time_left(struct ct_time *t) 947 { 948 clock_t ticks_elapsed; 949 int secs_elapsed; 950 951 if (t->ctm_total == -1) 952 return (-1); 953 954 ticks_elapsed = ddi_get_lbolt() - t->ctm_start; 955 secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC); 956 return (secs_elapsed > 0 ? secs_elapsed : 0); 957 } 958 959 /* 960 * contract_status_common 961 * 962 * Populates a ct_status structure. Used by contract types in their 963 * status entry points and ctfs when only common information is 964 * requested. 965 */ 966 void 967 contract_status_common(contract_t *ct, zone_t *zone, void *status, 968 model_t model) 969 { 970 STRUCT_HANDLE(ct_status, lstatus); 971 972 STRUCT_SET_HANDLE(lstatus, model, status); 973 ASSERT(MUTEX_HELD(&ct->ct_lock)); 974 if (zone->zone_uniqid == GLOBAL_ZONEUNIQID || 975 zone->zone_uniqid == ct->ct_czuniqid) { 976 zone_t *czone; 977 zoneid_t zoneid = -1; 978 979 /* 980 * Contracts don't have holds on the zones they were 981 * created by. If the contract's zone no longer 982 * exists, we say its zoneid is -1. 983 */ 984 if (zone->zone_uniqid == ct->ct_czuniqid || 985 ct->ct_czuniqid == GLOBAL_ZONEUNIQID) { 986 zoneid = ct->ct_zoneid; 987 } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) { 988 if (czone->zone_uniqid == ct->ct_mzuniqid) 989 zoneid = ct->ct_zoneid; 990 zone_rele(czone); 991 } 992 993 STRUCT_FSET(lstatus, ctst_zoneid, zoneid); 994 STRUCT_FSET(lstatus, ctst_holder, 995 (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid : 996 (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0); 997 STRUCT_FSET(lstatus, ctst_state, ct->ct_state); 998 } else { 999 /* 1000 * We are looking at a contract which was created by a 1001 * process outside of our zone. We provide fake zone, 1002 * holder, and state information. 1003 */ 1004 1005 STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id); 1006 /* 1007 * Since "zone" can't disappear until the calling ctfs 1008 * is unmounted, zone_zsched must be valid. 1009 */ 1010 STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ? 1011 zone->zone_zsched->p_pid : 0); 1012 STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ? 1013 CTS_OWNED : ct->ct_state); 1014 } 1015 STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt); 1016 STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime)); 1017 STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime)); 1018 STRUCT_FSET(lstatus, ctst_nevid, 1019 ct->ct_nevent ? ct->ct_nevent->cte_id : 0); 1020 STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit); 1021 STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info); 1022 STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie); 1023 STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index); 1024 STRUCT_FSET(lstatus, ctst_id, ct->ct_id); 1025 } 1026 1027 /* 1028 * contract_checkcred 1029 * 1030 * Determines if the specified contract is owned by a process with the 1031 * same effective uid as the specified credential. The caller must 1032 * ensure that the uid spaces are the same. Returns 1 on success. 1033 */ 1034 static int 1035 contract_checkcred(contract_t *ct, const cred_t *cr) 1036 { 1037 proc_t *p; 1038 int fail = 1; 1039 1040 mutex_enter(&ct->ct_lock); 1041 if ((p = ct->ct_owner) != NULL) { 1042 mutex_enter(&p->p_crlock); 1043 fail = crgetuid(cr) != crgetuid(p->p_cred); 1044 mutex_exit(&p->p_crlock); 1045 } 1046 mutex_exit(&ct->ct_lock); 1047 1048 return (!fail); 1049 } 1050 1051 /* 1052 * contract_owned 1053 * 1054 * Determines if the specified credential can view an event generated 1055 * by the specified contract. If locked is set, the contract's ct_lock 1056 * is held and the caller will need to do additional work to determine 1057 * if they truly can see the event. Returns 1 on success. 1058 */ 1059 int 1060 contract_owned(contract_t *ct, const cred_t *cr, int locked) 1061 { 1062 int owner, cmatch, zmatch; 1063 uint64_t zuniqid, mzuniqid; 1064 uid_t euid; 1065 1066 ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock)); 1067 1068 zuniqid = curproc->p_zone->zone_uniqid; 1069 mzuniqid = contract_getzuniqid(ct); 1070 euid = crgetuid(cr); 1071 1072 /* 1073 * owner: we own the contract 1074 * cmatch: we are in the creator's (and holder's) zone and our 1075 * uid matches the creator's or holder's 1076 * zmatch: we are in the effective zone of a contract created 1077 * in the global zone, and our uid matches that of the 1078 * virtualized holder's (zsched/kcred) 1079 */ 1080 owner = (ct->ct_owner == curproc); 1081 cmatch = (zuniqid == ct->ct_czuniqid) && 1082 ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr))); 1083 zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) && 1084 (crgetuid(kcred) == euid); 1085 1086 return (owner || cmatch || zmatch); 1087 } 1088 1089 1090 /* 1091 * contract_type_init 1092 * 1093 * Called by contract types to register themselves with the contracts 1094 * framework. 1095 */ 1096 ct_type_t * 1097 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops, 1098 ct_f_default_t *dfault) 1099 { 1100 ct_type_t *result; 1101 1102 ASSERT(type < CTT_MAXTYPE); 1103 1104 result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP); 1105 1106 mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL); 1107 avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t), 1108 offsetof(contract_t, ct_cttavl)); 1109 cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0); 1110 result->ct_type_name = name; 1111 result->ct_type_ops = ops; 1112 result->ct_type_default = dfault; 1113 result->ct_type_evid = 0; 1114 gethrestime(&result->ct_type_timestruc); 1115 result->ct_type_index = type; 1116 1117 ct_types[type] = result; 1118 1119 return (result); 1120 } 1121 1122 /* 1123 * contract_type_count 1124 * 1125 * Obtains the number of contracts of a particular type. 1126 */ 1127 int 1128 contract_type_count(ct_type_t *type) 1129 { 1130 ulong_t count; 1131 1132 mutex_enter(&type->ct_type_lock); 1133 count = avl_numnodes(&type->ct_type_avl); 1134 mutex_exit(&type->ct_type_lock); 1135 1136 return (count); 1137 } 1138 1139 /* 1140 * contract_type_max 1141 * 1142 * Obtains the maximum contract id of of a particular type. 1143 */ 1144 ctid_t 1145 contract_type_max(ct_type_t *type) 1146 { 1147 contract_t *ct; 1148 ctid_t res; 1149 1150 mutex_enter(&type->ct_type_lock); 1151 ct = avl_last(&type->ct_type_avl); 1152 res = ct ? ct->ct_id : -1; 1153 mutex_exit(&type->ct_type_lock); 1154 1155 return (res); 1156 } 1157 1158 /* 1159 * contract_max 1160 * 1161 * Obtains the maximum contract id. 1162 */ 1163 ctid_t 1164 contract_max(void) 1165 { 1166 contract_t *ct; 1167 ctid_t res; 1168 1169 mutex_enter(&contract_lock); 1170 ct = avl_last(&contract_avl); 1171 res = ct ? ct->ct_id : -1; 1172 mutex_exit(&contract_lock); 1173 1174 return (res); 1175 } 1176 1177 /* 1178 * contract_lookup_common 1179 * 1180 * Common code for contract_lookup and contract_type_lookup. Takes a 1181 * pointer to an AVL tree to search in. Should be called with the 1182 * appropriate tree-protecting lock held (unfortunately unassertable). 1183 */ 1184 static ctid_t 1185 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current) 1186 { 1187 contract_t template, *ct; 1188 avl_index_t where; 1189 ctid_t res; 1190 1191 template.ct_id = current; 1192 ct = avl_find(tree, &template, &where); 1193 if (ct == NULL) 1194 ct = avl_nearest(tree, where, AVL_AFTER); 1195 if (zuniqid != GLOBAL_ZONEUNIQID) 1196 while (ct && (contract_getzuniqid(ct) != zuniqid)) 1197 ct = AVL_NEXT(tree, ct); 1198 res = ct ? ct->ct_id : -1; 1199 1200 return (res); 1201 } 1202 1203 /* 1204 * contract_type_lookup 1205 * 1206 * Returns the next type contract after the specified id, visible from 1207 * the specified zone. 1208 */ 1209 ctid_t 1210 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current) 1211 { 1212 ctid_t res; 1213 1214 mutex_enter(&type->ct_type_lock); 1215 res = contract_lookup_common(&type->ct_type_avl, zuniqid, current); 1216 mutex_exit(&type->ct_type_lock); 1217 1218 return (res); 1219 } 1220 1221 /* 1222 * contract_lookup 1223 * 1224 * Returns the next contract after the specified id, visible from the 1225 * specified zone. 1226 */ 1227 ctid_t 1228 contract_lookup(uint64_t zuniqid, ctid_t current) 1229 { 1230 ctid_t res; 1231 1232 mutex_enter(&contract_lock); 1233 res = contract_lookup_common(&contract_avl, zuniqid, current); 1234 mutex_exit(&contract_lock); 1235 1236 return (res); 1237 } 1238 1239 /* 1240 * contract_plookup 1241 * 1242 * Returns the next contract held by process p after the specified id, 1243 * visible from the specified zone. Made complicated by the fact that 1244 * contracts visible in a zone but held by processes outside of the 1245 * zone need to appear as being held by zsched to zone members. 1246 */ 1247 ctid_t 1248 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid) 1249 { 1250 contract_t template, *ct; 1251 avl_index_t where; 1252 ctid_t res; 1253 1254 template.ct_id = current; 1255 if (zuniqid != GLOBAL_ZONEUNIQID && 1256 (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) { 1257 /* This is inelegant. */ 1258 mutex_enter(&contract_lock); 1259 ct = avl_find(&contract_avl, &template, &where); 1260 if (ct == NULL) 1261 ct = avl_nearest(&contract_avl, where, AVL_AFTER); 1262 while (ct && !(ct->ct_state < CTS_ORPHAN && 1263 contract_getzuniqid(ct) == zuniqid && 1264 ct->ct_czuniqid == GLOBAL_ZONEUNIQID)) 1265 ct = AVL_NEXT(&contract_avl, ct); 1266 res = ct ? ct->ct_id : -1; 1267 mutex_exit(&contract_lock); 1268 } else { 1269 mutex_enter(&p->p_lock); 1270 ct = avl_find(&p->p_ct_held, &template, &where); 1271 if (ct == NULL) 1272 ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER); 1273 res = ct ? ct->ct_id : -1; 1274 mutex_exit(&p->p_lock); 1275 } 1276 1277 return (res); 1278 } 1279 1280 /* 1281 * contract_ptr_common 1282 * 1283 * Common code for contract_ptr and contract_type_ptr. Takes a pointer 1284 * to an AVL tree to search in. Should be called with the appropriate 1285 * tree-protecting lock held (unfortunately unassertable). 1286 */ 1287 static contract_t * 1288 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid) 1289 { 1290 contract_t template, *ct; 1291 1292 template.ct_id = id; 1293 ct = avl_find(tree, &template, NULL); 1294 if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID && 1295 contract_getzuniqid(ct) != zuniqid)) { 1296 return (NULL); 1297 } 1298 1299 /* 1300 * Check to see if a thread is in the window in contract_rele 1301 * between dropping the reference count and removing the 1302 * contract from the type AVL. 1303 */ 1304 mutex_enter(&ct->ct_reflock); 1305 if (ct->ct_ref) { 1306 ct->ct_ref++; 1307 mutex_exit(&ct->ct_reflock); 1308 } else { 1309 mutex_exit(&ct->ct_reflock); 1310 ct = NULL; 1311 } 1312 1313 return (ct); 1314 } 1315 1316 /* 1317 * contract_type_ptr 1318 * 1319 * Returns a pointer to the contract with the specified id. The 1320 * contract is held, so the caller needs to release the reference when 1321 * it is through with the contract. 1322 */ 1323 contract_t * 1324 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid) 1325 { 1326 contract_t *ct; 1327 1328 mutex_enter(&type->ct_type_lock); 1329 ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid); 1330 mutex_exit(&type->ct_type_lock); 1331 1332 return (ct); 1333 } 1334 1335 /* 1336 * contract_ptr 1337 * 1338 * Returns a pointer to the contract with the specified id. The 1339 * contract is held, so the caller needs to release the reference when 1340 * it is through with the contract. 1341 */ 1342 contract_t * 1343 contract_ptr(ctid_t id, uint64_t zuniqid) 1344 { 1345 contract_t *ct; 1346 1347 mutex_enter(&contract_lock); 1348 ct = contract_ptr_common(&contract_avl, id, zuniqid); 1349 mutex_exit(&contract_lock); 1350 1351 return (ct); 1352 } 1353 1354 /* 1355 * contract_type_time 1356 * 1357 * Obtains the last time a contract of a particular type was created. 1358 */ 1359 void 1360 contract_type_time(ct_type_t *type, timestruc_t *time) 1361 { 1362 mutex_enter(&type->ct_type_lock); 1363 *time = type->ct_type_timestruc; 1364 mutex_exit(&type->ct_type_lock); 1365 } 1366 1367 /* 1368 * contract_type_bundle 1369 * 1370 * Obtains a type's bundle queue. 1371 */ 1372 ct_equeue_t * 1373 contract_type_bundle(ct_type_t *type) 1374 { 1375 return (&type->ct_type_events); 1376 } 1377 1378 /* 1379 * contract_type_pbundle 1380 * 1381 * Obtain's a process's bundle queue. If one doesn't exist, one is 1382 * created. Often used simply to ensure that a bundle queue is 1383 * allocated. 1384 */ 1385 ct_equeue_t * 1386 contract_type_pbundle(ct_type_t *type, proc_t *pp) 1387 { 1388 /* 1389 * If there isn't an array of bundle queues, allocate one. 1390 */ 1391 if (pp->p_ct_equeue == NULL) { 1392 size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *); 1393 ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP); 1394 1395 mutex_enter(&pp->p_lock); 1396 if (pp->p_ct_equeue) 1397 kmem_free(qa, size); 1398 else 1399 pp->p_ct_equeue = qa; 1400 mutex_exit(&pp->p_lock); 1401 } 1402 1403 /* 1404 * If there isn't a bundle queue of the required type, allocate 1405 * one. 1406 */ 1407 if (pp->p_ct_equeue[type->ct_type_index] == NULL) { 1408 ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP); 1409 cte_queue_create(q, CTEL_PBUNDLE, 20, 1); 1410 1411 mutex_enter(&pp->p_lock); 1412 if (pp->p_ct_equeue[type->ct_type_index]) 1413 cte_queue_drain(q, 0); 1414 else 1415 pp->p_ct_equeue[type->ct_type_index] = q; 1416 mutex_exit(&pp->p_lock); 1417 } 1418 1419 return (pp->p_ct_equeue[type->ct_type_index]); 1420 } 1421 1422 /* 1423 * ctparam_copyin 1424 * 1425 * copyin a ct_param_t for CT_TSET or CT_TGET commands. 1426 * If ctparam_copyout() is not called after ctparam_copyin(), then 1427 * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf. 1428 * 1429 * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get() 1430 * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a 1431 * process lock. 1432 */ 1433 int 1434 ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd) 1435 { 1436 uint32_t size; 1437 void *ubuf; 1438 ct_param_t *param = &kparam->param; 1439 STRUCT_DECL(ct_param, uarg); 1440 1441 STRUCT_INIT(uarg, flag); 1442 if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg))) 1443 return (EFAULT); 1444 size = STRUCT_FGET(uarg, ctpm_size); 1445 ubuf = STRUCT_FGETP(uarg, ctpm_value); 1446 1447 if (size > CT_PARAM_MAX_SIZE || size == 0) 1448 return (EINVAL); 1449 1450 kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP); 1451 if (cmd == CT_TSET) { 1452 if (copyin(ubuf, kparam->ctpm_kbuf, size)) { 1453 kmem_free(kparam->ctpm_kbuf, size); 1454 return (EFAULT); 1455 } 1456 } 1457 param->ctpm_id = STRUCT_FGET(uarg, ctpm_id); 1458 param->ctpm_size = size; 1459 param->ctpm_value = ubuf; 1460 kparam->ret_size = 0; 1461 1462 return (0); 1463 } 1464 1465 /* 1466 * ctparam_copyout 1467 * 1468 * copyout a ct_kparam_t and frees the buffer pointed by the member 1469 * ctpm_kbuf of ct_kparam_t 1470 */ 1471 int 1472 ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag) 1473 { 1474 int r = 0; 1475 ct_param_t *param = &kparam->param; 1476 STRUCT_DECL(ct_param, uarg); 1477 1478 STRUCT_INIT(uarg, flag); 1479 1480 STRUCT_FSET(uarg, ctpm_id, param->ctpm_id); 1481 STRUCT_FSET(uarg, ctpm_size, kparam->ret_size); 1482 STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value); 1483 if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) { 1484 r = EFAULT; 1485 goto error; 1486 } 1487 if (copyout(kparam->ctpm_kbuf, param->ctpm_value, 1488 MIN(kparam->ret_size, param->ctpm_size))) { 1489 r = EFAULT; 1490 } 1491 1492 error: 1493 kmem_free(kparam->ctpm_kbuf, param->ctpm_size); 1494 1495 return (r); 1496 } 1497 1498 /* 1499 * ctmpl_free 1500 * 1501 * Frees a template. 1502 */ 1503 void 1504 ctmpl_free(ct_template_t *template) 1505 { 1506 mutex_destroy(&template->ctmpl_lock); 1507 template->ctmpl_ops->ctop_free(template); 1508 } 1509 1510 /* 1511 * ctmpl_dup 1512 * 1513 * Creates a copy of a template. 1514 */ 1515 ct_template_t * 1516 ctmpl_dup(ct_template_t *template) 1517 { 1518 ct_template_t *new; 1519 1520 if (template == NULL) 1521 return (NULL); 1522 1523 new = template->ctmpl_ops->ctop_dup(template); 1524 /* 1525 * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and 1526 * should have remain held until now. 1527 */ 1528 mutex_exit(&template->ctmpl_lock); 1529 1530 return (new); 1531 } 1532 1533 /* 1534 * ctmpl_set 1535 * 1536 * Sets the requested terms of a template. 1537 */ 1538 int 1539 ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr) 1540 { 1541 int result = 0; 1542 ct_param_t *param = &kparam->param; 1543 uint64_t param_value; 1544 1545 if (param->ctpm_id == CTP_COOKIE || 1546 param->ctpm_id == CTP_EV_INFO || 1547 param->ctpm_id == CTP_EV_CRITICAL) { 1548 if (param->ctpm_size < sizeof (uint64_t)) { 1549 return (EINVAL); 1550 } else { 1551 param_value = *(uint64_t *)kparam->ctpm_kbuf; 1552 } 1553 } 1554 1555 mutex_enter(&template->ctmpl_lock); 1556 switch (param->ctpm_id) { 1557 case CTP_COOKIE: 1558 template->ctmpl_cookie = param_value; 1559 break; 1560 case CTP_EV_INFO: 1561 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) 1562 result = EINVAL; 1563 else 1564 template->ctmpl_ev_info = param_value; 1565 break; 1566 case CTP_EV_CRITICAL: 1567 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) { 1568 result = EINVAL; 1569 break; 1570 } else if ((~template->ctmpl_ev_crit & param_value) == 0) { 1571 /* 1572 * Assume that a pure reduction of the critical 1573 * set is allowed by the contract type. 1574 */ 1575 template->ctmpl_ev_crit = param_value; 1576 break; 1577 } 1578 /* 1579 * There may be restrictions on what we can make 1580 * critical, so we defer to the judgement of the 1581 * contract type. 1582 */ 1583 /* FALLTHROUGH */ 1584 default: 1585 result = template->ctmpl_ops->ctop_set(template, kparam, cr); 1586 } 1587 mutex_exit(&template->ctmpl_lock); 1588 1589 return (result); 1590 } 1591 1592 /* 1593 * ctmpl_get 1594 * 1595 * Obtains the requested terms from a template. 1596 * 1597 * If the term requested is a variable-sized term and the buffer 1598 * provided is too small for the data, we truncate the data and return 1599 * the buffer size necessary to fit the term in kparam->ret_size. If the 1600 * term requested is fix-sized (uint64_t) and the buffer provided is too 1601 * small, we return EINVAL. This should never happen if you're using 1602 * libcontract(3LIB), only if you call ioctl with a hand constructed 1603 * ct_param_t argument. 1604 * 1605 * Currently, only contract specific parameters have variable-sized 1606 * parameters. 1607 */ 1608 int 1609 ctmpl_get(ct_template_t *template, ct_kparam_t *kparam) 1610 { 1611 int result = 0; 1612 ct_param_t *param = &kparam->param; 1613 uint64_t *param_value; 1614 1615 if (param->ctpm_id == CTP_COOKIE || 1616 param->ctpm_id == CTP_EV_INFO || 1617 param->ctpm_id == CTP_EV_CRITICAL) { 1618 if (param->ctpm_size < sizeof (uint64_t)) { 1619 return (EINVAL); 1620 } else { 1621 param_value = kparam->ctpm_kbuf; 1622 kparam->ret_size = sizeof (uint64_t); 1623 } 1624 } 1625 1626 mutex_enter(&template->ctmpl_lock); 1627 switch (param->ctpm_id) { 1628 case CTP_COOKIE: 1629 *param_value = template->ctmpl_cookie; 1630 break; 1631 case CTP_EV_INFO: 1632 *param_value = template->ctmpl_ev_info; 1633 break; 1634 case CTP_EV_CRITICAL: 1635 *param_value = template->ctmpl_ev_crit; 1636 break; 1637 default: 1638 result = template->ctmpl_ops->ctop_get(template, kparam); 1639 } 1640 mutex_exit(&template->ctmpl_lock); 1641 1642 return (result); 1643 } 1644 1645 /* 1646 * ctmpl_makecurrent 1647 * 1648 * Used by ctmpl_activate and ctmpl_clear to set the current thread's 1649 * active template. Frees the old active template, if there was one. 1650 */ 1651 static void 1652 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new) 1653 { 1654 klwp_t *curlwp = ttolwp(curthread); 1655 proc_t *p = curproc; 1656 ct_template_t *old; 1657 1658 mutex_enter(&p->p_lock); 1659 old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index]; 1660 curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new; 1661 mutex_exit(&p->p_lock); 1662 1663 if (old) 1664 ctmpl_free(old); 1665 } 1666 1667 /* 1668 * ctmpl_activate 1669 * 1670 * Copy the specified template as the current thread's activate 1671 * template of that type. 1672 */ 1673 void 1674 ctmpl_activate(ct_template_t *template) 1675 { 1676 ctmpl_makecurrent(template, ctmpl_dup(template)); 1677 } 1678 1679 /* 1680 * ctmpl_clear 1681 * 1682 * Clears the current thread's activate template of the same type as 1683 * the specified template. 1684 */ 1685 void 1686 ctmpl_clear(ct_template_t *template) 1687 { 1688 ctmpl_makecurrent(template, NULL); 1689 } 1690 1691 /* 1692 * ctmpl_create 1693 * 1694 * Creates a new contract using the specified template. 1695 */ 1696 int 1697 ctmpl_create(ct_template_t *template, ctid_t *ctidp) 1698 { 1699 return (template->ctmpl_ops->ctop_create(template, ctidp)); 1700 } 1701 1702 /* 1703 * ctmpl_init 1704 * 1705 * Initializes the common portion of a new contract template. 1706 */ 1707 void 1708 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data) 1709 { 1710 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL); 1711 new->ctmpl_ops = ops; 1712 new->ctmpl_type = type; 1713 new->ctmpl_data = data; 1714 new->ctmpl_ev_info = new->ctmpl_ev_crit = 0; 1715 new->ctmpl_cookie = 0; 1716 } 1717 1718 /* 1719 * ctmpl_copy 1720 * 1721 * Copies the common portions of a contract template. Intended for use 1722 * by a contract type's ctop_dup template op. Returns with the old 1723 * template's lock held, which will should remain held until the 1724 * template op returns (it is dropped by ctmpl_dup). 1725 */ 1726 void 1727 ctmpl_copy(ct_template_t *new, ct_template_t *old) 1728 { 1729 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL); 1730 mutex_enter(&old->ctmpl_lock); 1731 new->ctmpl_ops = old->ctmpl_ops; 1732 new->ctmpl_type = old->ctmpl_type; 1733 new->ctmpl_ev_crit = old->ctmpl_ev_crit; 1734 new->ctmpl_ev_info = old->ctmpl_ev_info; 1735 new->ctmpl_cookie = old->ctmpl_cookie; 1736 } 1737 1738 /* 1739 * ctmpl_create_inval 1740 * 1741 * Returns EINVAL. Provided for the convenience of those contract 1742 * types which don't support ct_tmpl_create(3contract) and would 1743 * otherwise need to create their own stub for the ctop_create template 1744 * op. 1745 */ 1746 /*ARGSUSED*/ 1747 int 1748 ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp) 1749 { 1750 return (EINVAL); 1751 } 1752 1753 1754 /* 1755 * cte_queue_create 1756 * 1757 * Initializes a queue of a particular type. If dynamic is set, the 1758 * queue is to be freed when its last listener is removed after being 1759 * drained. 1760 */ 1761 static void 1762 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic) 1763 { 1764 mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL); 1765 q->ctq_listno = list; 1766 list_create(&q->ctq_events, sizeof (ct_kevent_t), 1767 offsetof(ct_kevent_t, cte_nodes[list].ctm_node)); 1768 list_create(&q->ctq_listeners, sizeof (ct_listener_t), 1769 offsetof(ct_listener_t, ctl_allnode)); 1770 list_create(&q->ctq_tail, sizeof (ct_listener_t), 1771 offsetof(ct_listener_t, ctl_tailnode)); 1772 gethrestime(&q->ctq_atime); 1773 q->ctq_nlisteners = 0; 1774 q->ctq_nreliable = 0; 1775 q->ctq_ninf = 0; 1776 q->ctq_max = maxinf; 1777 1778 /* 1779 * Bundle queues and contract queues are embedded in other 1780 * structures and are implicitly referenced counted by virtue 1781 * of their vnodes' indirect hold on their contracts. Process 1782 * bundle queues are dynamically allocated and may persist 1783 * after the death of the process, so they must be explicitly 1784 * reference counted. 1785 */ 1786 q->ctq_flags = dynamic ? CTQ_REFFED : 0; 1787 } 1788 1789 /* 1790 * cte_queue_destroy 1791 * 1792 * Destroys the specified queue. The queue is freed if referenced 1793 * counted. 1794 */ 1795 static void 1796 cte_queue_destroy(ct_equeue_t *q) 1797 { 1798 ASSERT(q->ctq_flags & CTQ_DEAD); 1799 ASSERT(q->ctq_nlisteners == 0); 1800 ASSERT(q->ctq_nreliable == 0); 1801 list_destroy(&q->ctq_events); 1802 list_destroy(&q->ctq_listeners); 1803 list_destroy(&q->ctq_tail); 1804 mutex_destroy(&q->ctq_lock); 1805 if (q->ctq_flags & CTQ_REFFED) 1806 kmem_free(q, sizeof (ct_equeue_t)); 1807 } 1808 1809 /* 1810 * cte_hold 1811 * 1812 * Takes a hold on the specified event. 1813 */ 1814 static void 1815 cte_hold(ct_kevent_t *e) 1816 { 1817 mutex_enter(&e->cte_lock); 1818 ASSERT(e->cte_refs > 0); 1819 e->cte_refs++; 1820 mutex_exit(&e->cte_lock); 1821 } 1822 1823 /* 1824 * cte_rele 1825 * 1826 * Releases a hold on the specified event. If the caller had the last 1827 * reference, frees the event and releases its hold on the contract 1828 * that generated it. 1829 */ 1830 static void 1831 cte_rele(ct_kevent_t *e) 1832 { 1833 mutex_enter(&e->cte_lock); 1834 ASSERT(e->cte_refs > 0); 1835 if (--e->cte_refs) { 1836 mutex_exit(&e->cte_lock); 1837 return; 1838 } 1839 1840 contract_rele(e->cte_contract); 1841 1842 mutex_destroy(&e->cte_lock); 1843 nvlist_free(e->cte_data); 1844 nvlist_free(e->cte_gdata); 1845 kmem_free(e, sizeof (ct_kevent_t)); 1846 } 1847 1848 /* 1849 * cte_qrele 1850 * 1851 * Remove this listener's hold on the specified event, removing and 1852 * releasing the queue's hold on the event if appropriate. 1853 */ 1854 static void 1855 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e) 1856 { 1857 ct_member_t *member = &e->cte_nodes[q->ctq_listno]; 1858 1859 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1860 1861 if (l->ctl_flags & CTLF_RELIABLE) 1862 member->ctm_nreliable--; 1863 if ((--member->ctm_refs == 0) && member->ctm_trimmed) { 1864 member->ctm_trimmed = 0; 1865 list_remove(&q->ctq_events, e); 1866 cte_rele(e); 1867 } 1868 } 1869 1870 /* 1871 * cte_qmove 1872 * 1873 * Move this listener to the specified event in the queue. 1874 */ 1875 static ct_kevent_t * 1876 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e) 1877 { 1878 ct_kevent_t *olde; 1879 1880 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1881 ASSERT(l->ctl_equeue == q); 1882 1883 if ((olde = l->ctl_position) == NULL) 1884 list_remove(&q->ctq_tail, l); 1885 1886 while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed) 1887 e = list_next(&q->ctq_events, e); 1888 1889 if (e != NULL) { 1890 e->cte_nodes[q->ctq_listno].ctm_refs++; 1891 if (l->ctl_flags & CTLF_RELIABLE) 1892 e->cte_nodes[q->ctq_listno].ctm_nreliable++; 1893 } else { 1894 list_insert_tail(&q->ctq_tail, l); 1895 } 1896 1897 l->ctl_position = e; 1898 if (olde) 1899 cte_qrele(q, l, olde); 1900 1901 return (e); 1902 } 1903 1904 /* 1905 * cte_checkcred 1906 * 1907 * Determines if the specified event's contract is owned by a process 1908 * with the same effective uid as the specified credential. Called 1909 * after a failed call to contract_owned with locked set. Because it 1910 * drops the queue lock, its caller (cte_qreadable) needs to make sure 1911 * we're still in the same place after we return. Returns 1 on 1912 * success. 1913 */ 1914 static int 1915 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr) 1916 { 1917 int result; 1918 contract_t *ct = e->cte_contract; 1919 1920 cte_hold(e); 1921 mutex_exit(&q->ctq_lock); 1922 result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid && 1923 contract_checkcred(ct, cr); 1924 mutex_enter(&q->ctq_lock); 1925 cte_rele(e); 1926 1927 return (result); 1928 } 1929 1930 /* 1931 * cte_qreadable 1932 * 1933 * Ensures that the listener is pointing to a valid event that the 1934 * caller has the credentials to read. Returns 0 if we can read the 1935 * event we're pointing to. 1936 */ 1937 static int 1938 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr, 1939 uint64_t zuniqid, int crit) 1940 { 1941 ct_kevent_t *e, *next; 1942 contract_t *ct; 1943 1944 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1945 ASSERT(l->ctl_equeue == q); 1946 1947 if (l->ctl_flags & CTLF_COPYOUT) 1948 return (1); 1949 1950 next = l->ctl_position; 1951 while (e = cte_qmove(q, l, next)) { 1952 ct = e->cte_contract; 1953 /* 1954 * Check obvious things first. If we are looking for a 1955 * critical message, is this one? If we aren't in the 1956 * global zone, is this message meant for us? 1957 */ 1958 if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) || 1959 (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID && 1960 zuniqid != contract_getzuniqid(ct))) { 1961 1962 next = list_next(&q->ctq_events, e); 1963 1964 /* 1965 * Next, see if our effective uid equals that of owner 1966 * or author of the contract. Since we are holding the 1967 * queue lock, contract_owned can't always check if we 1968 * have the same effective uid as the contract's 1969 * owner. If it comes to that, it fails and we take 1970 * the slow(er) path. 1971 */ 1972 } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) { 1973 1974 /* 1975 * At this point we either don't have any claim 1976 * to this contract or we match the effective 1977 * uid of the owner but couldn't tell. We 1978 * first test for a NULL holder so that events 1979 * from orphans and inherited contracts avoid 1980 * the penalty phase. 1981 */ 1982 if (e->cte_contract->ct_owner == NULL && 1983 !secpolicy_contract_observer_choice(cr)) 1984 next = list_next(&q->ctq_events, e); 1985 1986 /* 1987 * cte_checkcred will juggle locks to see if we 1988 * have the same uid as the event's contract's 1989 * current owner. If it succeeds, we have to 1990 * make sure we are in the same point in the 1991 * queue. 1992 */ 1993 else if (cte_checkcred(q, e, cr) && 1994 l->ctl_position == e) 1995 break; 1996 1997 /* 1998 * cte_checkcred failed; see if we're in the 1999 * same place. 2000 */ 2001 else if (l->ctl_position == e) 2002 if (secpolicy_contract_observer_choice(cr)) 2003 break; 2004 else 2005 next = list_next(&q->ctq_events, e); 2006 2007 /* 2008 * cte_checkcred failed, and our position was 2009 * changed. Start from there. 2010 */ 2011 else 2012 next = l->ctl_position; 2013 } else { 2014 break; 2015 } 2016 } 2017 2018 /* 2019 * We check for CTLF_COPYOUT again in case we dropped the queue 2020 * lock in cte_checkcred. 2021 */ 2022 return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL)); 2023 } 2024 2025 /* 2026 * cte_qwakeup 2027 * 2028 * Wakes up any waiting listeners and points them at the specified event. 2029 */ 2030 static void 2031 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e) 2032 { 2033 ct_listener_t *l; 2034 2035 ASSERT(MUTEX_HELD(&q->ctq_lock)); 2036 2037 while (l = list_head(&q->ctq_tail)) { 2038 list_remove(&q->ctq_tail, l); 2039 e->cte_nodes[q->ctq_listno].ctm_refs++; 2040 if (l->ctl_flags & CTLF_RELIABLE) 2041 e->cte_nodes[q->ctq_listno].ctm_nreliable++; 2042 l->ctl_position = e; 2043 cv_signal(&l->ctl_cv); 2044 pollwakeup(&l->ctl_pollhead, POLLIN); 2045 } 2046 } 2047 2048 /* 2049 * cte_copy 2050 * 2051 * Copies events from the specified contract event queue to the 2052 * end of the specified process bundle queue. Only called from 2053 * contract_adopt. 2054 * 2055 * We copy to the end of the target queue instead of mixing the events 2056 * in their proper order because otherwise the act of adopting a 2057 * contract would require a process to reset all process bundle 2058 * listeners it needed to see the new events. This would, in turn, 2059 * require the process to keep track of which preexisting events had 2060 * already been processed. 2061 */ 2062 static void 2063 cte_copy(ct_equeue_t *q, ct_equeue_t *newq) 2064 { 2065 ct_kevent_t *e, *first = NULL; 2066 2067 VERIFY(q->ctq_listno == CTEL_CONTRACT); 2068 VERIFY(newq->ctq_listno == CTEL_PBUNDLE); 2069 2070 mutex_enter(&q->ctq_lock); 2071 mutex_enter(&newq->ctq_lock); 2072 2073 /* 2074 * For now, only copy critical events. 2075 */ 2076 for (e = list_head(&q->ctq_events); e != NULL; 2077 e = list_next(&q->ctq_events, e)) { 2078 if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 2079 if (first == NULL) 2080 first = e; 2081 /* 2082 * It is possible for adoption to race with an owner's 2083 * cte_publish_all(); we must only enqueue events that 2084 * have not already been enqueued. 2085 */ 2086 if (!list_link_active((list_node_t *) 2087 ((uintptr_t)e + newq->ctq_events.list_offset))) { 2088 list_insert_tail(&newq->ctq_events, e); 2089 cte_hold(e); 2090 } 2091 } 2092 } 2093 2094 mutex_exit(&q->ctq_lock); 2095 2096 if (first) 2097 cte_qwakeup(newq, first); 2098 2099 mutex_exit(&newq->ctq_lock); 2100 } 2101 2102 /* 2103 * cte_trim 2104 * 2105 * Trims unneeded events from an event queue. Algorithm works as 2106 * follows: 2107 * 2108 * Removes all informative and acknowledged critical events until the 2109 * first referenced event is found. 2110 * 2111 * If a contract is specified, removes all events (regardless of 2112 * acknowledgement) generated by that contract until the first event 2113 * referenced by a reliable listener is found. Reference events are 2114 * removed by marking them "trimmed". Such events will be removed 2115 * when the last reference is dropped and will be skipped by future 2116 * listeners. 2117 * 2118 * This is pretty basic. Ideally this should remove from the middle of 2119 * the list (i.e. beyond the first referenced event), and even 2120 * referenced events. 2121 */ 2122 static void 2123 cte_trim(ct_equeue_t *q, contract_t *ct) 2124 { 2125 ct_kevent_t *e, *next; 2126 int flags, stopper; 2127 int start = 1; 2128 2129 VERIFY(MUTEX_HELD(&q->ctq_lock)); 2130 2131 for (e = list_head(&q->ctq_events); e != NULL; e = next) { 2132 next = list_next(&q->ctq_events, e); 2133 flags = e->cte_flags; 2134 stopper = (q->ctq_listno != CTEL_PBUNDLE) && 2135 (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0); 2136 if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) { 2137 if ((start && (flags & (CTE_INFO | CTE_ACK))) || 2138 (e->cte_contract == ct)) { 2139 /* 2140 * Toss informative and ACKed critical messages. 2141 */ 2142 list_remove(&q->ctq_events, e); 2143 cte_rele(e); 2144 } 2145 } else if ((e->cte_contract == ct) && !stopper) { 2146 ASSERT(q->ctq_nlisteners != 0); 2147 e->cte_nodes[q->ctq_listno].ctm_trimmed = 1; 2148 } else if (ct && !stopper) { 2149 start = 0; 2150 } else { 2151 /* 2152 * Don't free messages past the first reader. 2153 */ 2154 break; 2155 } 2156 } 2157 } 2158 2159 /* 2160 * cte_queue_drain 2161 * 2162 * Drain all events from the specified queue, and mark it dead. If 2163 * "ack" is set, acknowledge any critical events we find along the 2164 * way. 2165 */ 2166 static void 2167 cte_queue_drain(ct_equeue_t *q, int ack) 2168 { 2169 ct_kevent_t *e, *next; 2170 ct_listener_t *l; 2171 2172 mutex_enter(&q->ctq_lock); 2173 2174 for (e = list_head(&q->ctq_events); e != NULL; e = next) { 2175 next = list_next(&q->ctq_events, e); 2176 if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) { 2177 /* 2178 * Make sure critical messages are eventually 2179 * removed from the bundle queues. 2180 */ 2181 mutex_enter(&e->cte_lock); 2182 e->cte_flags |= CTE_ACK; 2183 mutex_exit(&e->cte_lock); 2184 ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock)); 2185 e->cte_contract->ct_evcnt--; 2186 } 2187 list_remove(&q->ctq_events, e); 2188 e->cte_nodes[q->ctq_listno].ctm_refs = 0; 2189 e->cte_nodes[q->ctq_listno].ctm_nreliable = 0; 2190 e->cte_nodes[q->ctq_listno].ctm_trimmed = 0; 2191 cte_rele(e); 2192 } 2193 2194 /* 2195 * This is necessary only because of CTEL_PBUNDLE listeners; 2196 * the events they point to can move from one pbundle to 2197 * another. Fortunately, this only happens if the contract is 2198 * inherited, which (in turn) only happens if the process 2199 * exits, which means it's an all-or-nothing deal. If this 2200 * wasn't the case, we would instead need to keep track of 2201 * listeners on a per-event basis, not just a per-queue basis. 2202 * This would have the side benefit of letting us clean up 2203 * trimmed events sooner (i.e. immediately), but would 2204 * unfortunately make events even bigger than they already 2205 * are. 2206 */ 2207 for (l = list_head(&q->ctq_listeners); l; 2208 l = list_next(&q->ctq_listeners, l)) { 2209 l->ctl_flags |= CTLF_DEAD; 2210 if (l->ctl_position) { 2211 l->ctl_position = NULL; 2212 list_insert_tail(&q->ctq_tail, l); 2213 } 2214 cv_broadcast(&l->ctl_cv); 2215 } 2216 2217 /* 2218 * Disallow events. 2219 */ 2220 q->ctq_flags |= CTQ_DEAD; 2221 2222 /* 2223 * If we represent the last reference to a reference counted 2224 * process bundle queue, free it. 2225 */ 2226 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0)) 2227 cte_queue_destroy(q); 2228 else 2229 mutex_exit(&q->ctq_lock); 2230 } 2231 2232 /* 2233 * cte_publish 2234 * 2235 * Publishes an event to a specific queue. Only called by 2236 * cte_publish_all. 2237 */ 2238 static void 2239 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist) 2240 { 2241 ASSERT(MUTEX_HELD(&q->ctq_lock)); 2242 2243 q->ctq_atime = *tsp; 2244 2245 /* 2246 * If this event may already exist on this queue, check to see if it 2247 * is already there and return if so. 2248 */ 2249 if (mayexist && list_link_active((list_node_t *)((uintptr_t)e + 2250 q->ctq_events.list_offset))) { 2251 mutex_exit(&q->ctq_lock); 2252 cte_rele(e); 2253 return; 2254 } 2255 2256 /* 2257 * Don't publish if the event is informative and there aren't 2258 * any listeners, or if the queue has been shut down. 2259 */ 2260 if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) || 2261 (q->ctq_flags & CTQ_DEAD)) { 2262 mutex_exit(&q->ctq_lock); 2263 cte_rele(e); 2264 return; 2265 } 2266 2267 /* 2268 * Enqueue event 2269 */ 2270 VERIFY(!list_link_active((list_node_t *) 2271 ((uintptr_t)e + q->ctq_events.list_offset))); 2272 list_insert_tail(&q->ctq_events, e); 2273 2274 /* 2275 * Check for waiting listeners 2276 */ 2277 cte_qwakeup(q, e); 2278 2279 /* 2280 * Trim unnecessary events from the queue. 2281 */ 2282 cte_trim(q, NULL); 2283 mutex_exit(&q->ctq_lock); 2284 } 2285 2286 /* 2287 * cte_publish_all 2288 * 2289 * Publish an event to all necessary event queues. The event, e, must 2290 * be zallocated by the caller, and the event's flags and type must be 2291 * set. The rest of the event's fields are initialized here. 2292 */ 2293 uint64_t 2294 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata) 2295 { 2296 ct_equeue_t *q; 2297 timespec_t ts; 2298 uint64_t evid; 2299 ct_kevent_t *negev; 2300 int negend; 2301 2302 e->cte_contract = ct; 2303 e->cte_data = data; 2304 e->cte_gdata = gdata; 2305 e->cte_refs = 3; 2306 evid = e->cte_id = atomic_inc_64_nv(&ct->ct_type->ct_type_evid); 2307 contract_hold(ct); 2308 2309 /* 2310 * For a negotiation event we set the ct->ct_nevent field of the 2311 * contract for the duration of the negotiation 2312 */ 2313 negend = 0; 2314 if (e->cte_flags & CTE_NEG) { 2315 cte_hold(e); 2316 ct->ct_nevent = e; 2317 } else if (e->cte_type == CT_EV_NEGEND) { 2318 negend = 1; 2319 } 2320 2321 gethrestime(&ts); 2322 2323 /* 2324 * ct_evtlock simply (and only) ensures that two events sent 2325 * from the same contract are delivered to all queues in the 2326 * same order. 2327 */ 2328 mutex_enter(&ct->ct_evtlock); 2329 2330 /* 2331 * CTEL_CONTRACT - First deliver to the contract queue, acking 2332 * the event if the contract has been orphaned. 2333 */ 2334 mutex_enter(&ct->ct_lock); 2335 mutex_enter(&ct->ct_events.ctq_lock); 2336 if ((e->cte_flags & CTE_INFO) == 0) { 2337 if (ct->ct_state >= CTS_ORPHAN) 2338 e->cte_flags |= CTE_ACK; 2339 else 2340 ct->ct_evcnt++; 2341 } 2342 mutex_exit(&ct->ct_lock); 2343 cte_publish(&ct->ct_events, e, &ts, B_FALSE); 2344 2345 /* 2346 * CTEL_BUNDLE - Next deliver to the contract type's bundle 2347 * queue. 2348 */ 2349 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock); 2350 cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE); 2351 2352 /* 2353 * CTEL_PBUNDLE - Finally, if the contract has an owner, 2354 * deliver to the owner's process bundle queue. 2355 */ 2356 mutex_enter(&ct->ct_lock); 2357 if (ct->ct_owner) { 2358 /* 2359 * proc_exit doesn't free event queues until it has 2360 * abandoned all contracts. 2361 */ 2362 ASSERT(ct->ct_owner->p_ct_equeue); 2363 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]); 2364 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]; 2365 mutex_enter(&q->ctq_lock); 2366 mutex_exit(&ct->ct_lock); 2367 2368 /* 2369 * It is possible for this code to race with adoption; we 2370 * publish the event indicating that the event may already 2371 * be enqueued because adoption beat us to it (in which case 2372 * cte_pubish() does nothing). 2373 */ 2374 cte_publish(q, e, &ts, B_TRUE); 2375 } else { 2376 mutex_exit(&ct->ct_lock); 2377 cte_rele(e); 2378 } 2379 2380 if (negend) { 2381 mutex_enter(&ct->ct_lock); 2382 negev = ct->ct_nevent; 2383 ct->ct_nevent = NULL; 2384 cte_rele(negev); 2385 mutex_exit(&ct->ct_lock); 2386 } 2387 2388 mutex_exit(&ct->ct_evtlock); 2389 2390 return (evid); 2391 } 2392 2393 /* 2394 * cte_add_listener 2395 * 2396 * Add a new listener to an event queue. 2397 */ 2398 void 2399 cte_add_listener(ct_equeue_t *q, ct_listener_t *l) 2400 { 2401 cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL); 2402 l->ctl_equeue = q; 2403 l->ctl_position = NULL; 2404 l->ctl_flags = 0; 2405 2406 mutex_enter(&q->ctq_lock); 2407 list_insert_head(&q->ctq_tail, l); 2408 list_insert_head(&q->ctq_listeners, l); 2409 q->ctq_nlisteners++; 2410 mutex_exit(&q->ctq_lock); 2411 } 2412 2413 /* 2414 * cte_remove_listener 2415 * 2416 * Remove a listener from an event queue. No other queue activities 2417 * (e.g. cte_get event) may be in progress at this endpoint when this 2418 * is called. 2419 */ 2420 void 2421 cte_remove_listener(ct_listener_t *l) 2422 { 2423 ct_equeue_t *q = l->ctl_equeue; 2424 ct_kevent_t *e; 2425 2426 mutex_enter(&q->ctq_lock); 2427 2428 ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0); 2429 2430 if ((e = l->ctl_position) != NULL) 2431 cte_qrele(q, l, e); 2432 else 2433 list_remove(&q->ctq_tail, l); 2434 l->ctl_position = NULL; 2435 2436 q->ctq_nlisteners--; 2437 list_remove(&q->ctq_listeners, l); 2438 2439 if (l->ctl_flags & CTLF_RELIABLE) 2440 q->ctq_nreliable--; 2441 2442 /* 2443 * If we are a the last listener of a dead reference counted 2444 * queue (i.e. a process bundle) we free it. Otherwise we just 2445 * trim any events which may have been kept around for our 2446 * benefit. 2447 */ 2448 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) && 2449 (q->ctq_nlisteners == 0)) { 2450 cte_queue_destroy(q); 2451 } else { 2452 cte_trim(q, NULL); 2453 mutex_exit(&q->ctq_lock); 2454 } 2455 } 2456 2457 /* 2458 * cte_reset_listener 2459 * 2460 * Moves a listener's queue pointer to the beginning of the queue. 2461 */ 2462 void 2463 cte_reset_listener(ct_listener_t *l) 2464 { 2465 ct_equeue_t *q = l->ctl_equeue; 2466 2467 mutex_enter(&q->ctq_lock); 2468 2469 /* 2470 * We allow an asynchronous reset because it doesn't make a 2471 * whole lot of sense to make reset block or fail. We already 2472 * have most of the mechanism needed thanks to queue trimming, 2473 * so implementing it isn't a big deal. 2474 */ 2475 if (l->ctl_flags & CTLF_COPYOUT) 2476 l->ctl_flags |= CTLF_RESET; 2477 2478 (void) cte_qmove(q, l, list_head(&q->ctq_events)); 2479 2480 /* 2481 * Inform blocked readers. 2482 */ 2483 cv_broadcast(&l->ctl_cv); 2484 pollwakeup(&l->ctl_pollhead, POLLIN); 2485 mutex_exit(&q->ctq_lock); 2486 } 2487 2488 /* 2489 * cte_next_event 2490 * 2491 * Moves the event pointer for the specified listener to the next event 2492 * on the queue. To avoid races, this movement only occurs if the 2493 * specified event id matches that of the current event. This is used 2494 * primarily to skip events that have been read but whose extended data 2495 * haven't been copied out. 2496 */ 2497 int 2498 cte_next_event(ct_listener_t *l, uint64_t id) 2499 { 2500 ct_equeue_t *q = l->ctl_equeue; 2501 ct_kevent_t *old; 2502 2503 mutex_enter(&q->ctq_lock); 2504 2505 if (l->ctl_flags & CTLF_COPYOUT) 2506 l->ctl_flags |= CTLF_RESET; 2507 2508 if (((old = l->ctl_position) != NULL) && (old->cte_id == id)) 2509 (void) cte_qmove(q, l, list_next(&q->ctq_events, old)); 2510 2511 mutex_exit(&q->ctq_lock); 2512 2513 return (0); 2514 } 2515 2516 /* 2517 * cte_get_event 2518 * 2519 * Reads an event from an event endpoint. If "nonblock" is clear, we 2520 * block until a suitable event is ready. If "crit" is set, we only 2521 * read critical events. Note that while "cr" is the caller's cred, 2522 * "zuniqid" is the unique id of the zone the calling contract 2523 * filesystem was mounted in. 2524 */ 2525 int 2526 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr, 2527 uint64_t zuniqid, int crit) 2528 { 2529 ct_equeue_t *q = l->ctl_equeue; 2530 ct_kevent_t *temp; 2531 int result = 0; 2532 int partial = 0; 2533 size_t size, gsize, len; 2534 model_t mdl = get_udatamodel(); 2535 STRUCT_DECL(ct_event, ev); 2536 STRUCT_INIT(ev, mdl); 2537 2538 /* 2539 * cte_qreadable checks for CTLF_COPYOUT as well as ensures 2540 * that there exists, and we are pointing to, an appropriate 2541 * event. It may temporarily drop ctq_lock, but that doesn't 2542 * really matter to us. 2543 */ 2544 mutex_enter(&q->ctq_lock); 2545 while (cte_qreadable(q, l, cr, zuniqid, crit)) { 2546 if (nonblock) { 2547 result = EAGAIN; 2548 goto error; 2549 } 2550 if (q->ctq_flags & CTQ_DEAD) { 2551 result = EIDRM; 2552 goto error; 2553 } 2554 result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock); 2555 if (result == 0) { 2556 result = EINTR; 2557 goto error; 2558 } 2559 } 2560 temp = l->ctl_position; 2561 cte_hold(temp); 2562 l->ctl_flags |= CTLF_COPYOUT; 2563 mutex_exit(&q->ctq_lock); 2564 2565 /* 2566 * We now have an event. Copy in the user event structure to 2567 * see how much space we have to work with. 2568 */ 2569 result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev)); 2570 if (result) 2571 goto copyerr; 2572 2573 /* 2574 * Determine what data we have and what the user should be 2575 * allowed to see. 2576 */ 2577 size = gsize = 0; 2578 if (temp->cte_data) { 2579 VERIFY(nvlist_size(temp->cte_data, &size, 2580 NV_ENCODE_NATIVE) == 0); 2581 ASSERT(size != 0); 2582 } 2583 if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) { 2584 VERIFY(nvlist_size(temp->cte_gdata, &gsize, 2585 NV_ENCODE_NATIVE) == 0); 2586 ASSERT(gsize != 0); 2587 } 2588 2589 /* 2590 * If we have enough space, copy out the extended event data. 2591 */ 2592 len = size + gsize; 2593 if (len) { 2594 if (STRUCT_FGET(ev, ctev_nbytes) >= len) { 2595 char *buf = kmem_alloc(len, KM_SLEEP); 2596 2597 if (size) 2598 VERIFY(nvlist_pack(temp->cte_data, &buf, &size, 2599 NV_ENCODE_NATIVE, KM_SLEEP) == 0); 2600 if (gsize) { 2601 char *tmp = buf + size; 2602 2603 VERIFY(nvlist_pack(temp->cte_gdata, &tmp, 2604 &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0); 2605 } 2606 2607 /* This shouldn't have changed */ 2608 ASSERT(size + gsize == len); 2609 result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer), 2610 len); 2611 kmem_free(buf, len); 2612 if (result) 2613 goto copyerr; 2614 } else { 2615 partial = 1; 2616 } 2617 } 2618 2619 /* 2620 * Copy out the common event data. 2621 */ 2622 STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id); 2623 STRUCT_FSET(ev, ctev_evid, temp->cte_id); 2624 STRUCT_FSET(ev, ctev_cttype, 2625 temp->cte_contract->ct_type->ct_type_index); 2626 STRUCT_FSET(ev, ctev_flags, temp->cte_flags & 2627 (CTE_ACK|CTE_INFO|CTE_NEG)); 2628 STRUCT_FSET(ev, ctev_type, temp->cte_type); 2629 STRUCT_FSET(ev, ctev_nbytes, len); 2630 STRUCT_FSET(ev, ctev_goffset, size); 2631 result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev)); 2632 2633 copyerr: 2634 /* 2635 * Only move our location in the queue if all copyouts were 2636 * successful, the caller provided enough space for the entire 2637 * event, and our endpoint wasn't reset or otherwise moved by 2638 * another thread. 2639 */ 2640 mutex_enter(&q->ctq_lock); 2641 if (result) 2642 result = EFAULT; 2643 else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) && 2644 (l->ctl_position == temp)) 2645 (void) cte_qmove(q, l, list_next(&q->ctq_events, temp)); 2646 l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET); 2647 /* 2648 * Signal any readers blocked on our CTLF_COPYOUT. 2649 */ 2650 cv_signal(&l->ctl_cv); 2651 cte_rele(temp); 2652 2653 error: 2654 mutex_exit(&q->ctq_lock); 2655 return (result); 2656 } 2657 2658 /* 2659 * cte_set_reliable 2660 * 2661 * Requests that events be reliably delivered to an event endpoint. 2662 * Unread informative and acknowledged critical events will not be 2663 * removed from the queue until this listener reads or skips them. 2664 * Because a listener could maliciously request reliable delivery and 2665 * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the 2666 * caller's effective set. 2667 */ 2668 int 2669 cte_set_reliable(ct_listener_t *l, const cred_t *cr) 2670 { 2671 ct_equeue_t *q = l->ctl_equeue; 2672 int error; 2673 2674 if ((error = secpolicy_contract_event(cr)) != 0) 2675 return (error); 2676 2677 mutex_enter(&q->ctq_lock); 2678 if ((l->ctl_flags & CTLF_RELIABLE) == 0) { 2679 l->ctl_flags |= CTLF_RELIABLE; 2680 q->ctq_nreliable++; 2681 if (l->ctl_position != NULL) 2682 l->ctl_position->cte_nodes[q->ctq_listno]. 2683 ctm_nreliable++; 2684 } 2685 mutex_exit(&q->ctq_lock); 2686 2687 return (0); 2688 } 2689