1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 */
25
26 #include <sys/mutex.h>
27 #include <sys/debug.h>
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/kmem.h>
31 #include <sys/thread.h>
32 #include <sys/id_space.h>
33 #include <sys/avl.h>
34 #include <sys/list.h>
35 #include <sys/sysmacros.h>
36 #include <sys/proc.h>
37 #include <sys/contract.h>
38 #include <sys/contract_impl.h>
39 #include <sys/contract/device.h>
40 #include <sys/contract/device_impl.h>
41 #include <sys/cmn_err.h>
42 #include <sys/nvpair.h>
43 #include <sys/policy.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/ddi_implfuncs.h>
46 #include <sys/systm.h>
47 #include <sys/stat.h>
48 #include <sys/sunddi.h>
49 #include <sys/esunddi.h>
50 #include <sys/ddi.h>
51 #include <sys/fs/dv_node.h>
52 #include <sys/sunndi.h>
53 #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */
54
55 /*
56 * Device Contracts
57 * -----------------
58 * This file contains the core code for the device contracts framework.
59 * A device contract is an agreement or a contract between a process and
60 * the kernel regarding the state of the device. A device contract may be
61 * created when a relationship is formed between a device and a process
62 * i.e. at open(2) time, or it may be created at some point after the device
63 * has been opened. A device contract once formed may be broken by either party.
64 * A device contract can be broken by the process by an explicit abandon of the
65 * contract or by an implicit abandon when the process exits. A device contract
66 * can be broken by the kernel either asynchronously (without negotiation) or
67 * synchronously (with negotiation). Exactly which happens depends on the device
68 * state transition. The following state diagram shows the transitions between
69 * device states. Only device state transitions currently supported by device
70 * contracts is shown.
71 *
72 * <-- A -->
73 * /-----------------> DEGRADED
74 * | |
75 * | |
76 * | | S
77 * | | |
78 * | | v
79 * v S --> v
80 * ONLINE ------------> OFFLINE
81 *
82 *
83 * In the figure above, the arrows indicate the direction of transition. The
84 * letter S refers to transitions which are inherently synchronous i.e.
85 * require negotiation and the letter A indicates transitions which are
86 * asynchronous i.e. are done without contract negotiations. A good example
87 * of a synchronous transition is the ONLINE -> OFFLINE transition. This
88 * transition cannot happen as long as there are consumers which have the
89 * device open. Thus some form of negotiation needs to happen between the
90 * consumers and the kernel to ensure that consumers either close devices
91 * or disallow the move to OFFLINE. Certain other transitions such as
92 * ONLINE --> DEGRADED for example, are inherently asynchronous i.e.
93 * non-negotiable. A device that suffers a fault that degrades its
94 * capabilities will become degraded irrespective of what consumers it has,
95 * so a negotiation in this case is pointless.
96 *
97 * The following device states are currently defined for device contracts:
98 *
99 * CT_DEV_EV_ONLINE
100 * The device is online and functioning normally
101 * CT_DEV_EV_DEGRADED
102 * The device is online but is functioning in a degraded capacity
103 * CT_DEV_EV_OFFLINE
104 * The device is offline and is no longer configured
105 *
106 * A typical consumer of device contracts starts out with a contract
107 * template and adds terms to that template. These include the
108 * "acceptable set" (A-set) term, which is a bitset of device states which
109 * are guaranteed by the contract. If the device moves out of a state in
110 * the A-set, the contract is broken. The breaking of the contract can
111 * be asynchronous in which case a critical contract event is sent to the
112 * contract holder but no negotiations take place. If the breaking of the
113 * contract is synchronous, negotations are opened between the affected
114 * consumer and the kernel. The kernel does this by sending a critical
115 * event to the consumer with the CTE_NEG flag set indicating that this
116 * is a negotiation event. The consumer can accept this change by sending
117 * a ACK message to the kernel. Alternatively, if it has the necessary
118 * privileges, it can send a NACK message to the kernel which will block
119 * the device state change. To NACK a negotiable event, a process must
120 * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
121 *
122 * Other terms include the "minor path" term, specified explicitly if the
123 * contract is not being created at open(2) time or specified implicitly
124 * if the contract is being created at open time via an activated template.
125 *
126 * A contract event is sent on any state change to which the contract
127 * owner has subscribed via the informative or critical event sets. Only
128 * critical events are guaranteed to be delivered. Since all device state
129 * changes are controlled by the kernel and cannot be arbitrarily generated
130 * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not
131 * need to be asserted in a process's effective set to designate an event as
132 * critical. To ensure privacy, a process must either have the same effective
133 * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege
134 * asserted in its effective set in order to observe device contract events
135 * off the device contract type specific endpoint.
136 *
137 * Yet another term available with device contracts is the "non-negotiable"
138 * term. This term is used to pre-specify a NACK to any contract negotiation.
139 * This term is ignored for asynchronous state changes. For example, a
140 * provcess may have the A-set {ONLINE|DEGRADED} and make the contract
141 * non-negotiable. In this case, the device contract framework assumes a
142 * NACK for any transition to OFFLINE and blocks the offline. If the A-set
143 * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE
144 * are NACKed but transitions to DEGRADE succeed.
145 *
146 * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract)
147 * happens just before the I/O framework attempts to offline a device
148 * (i.e. detach a device and set the offline flag so that it cannot be
149 * reattached). A device contract holder is expected to either NACK the offline
150 * (if privileged) or release the device and allow the offline to proceed.
151 *
152 * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract)
153 * is generated just before the I/O framework transitions the device state
154 * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology).
155 *
156 * The contract holder is expected to ACK or NACK a negotiation event
157 * within a certain period of time. If the ACK/NACK is not received
158 * within the timeout period, the device contract framework will behave
159 * as if the contract does not exist and will proceed with the event.
160 *
161 * Unlike a process contract a device contract does not need to exist
162 * once it is abandoned, since it does not define a fault boundary. It
163 * merely represents an agreement between a process and the kernel
164 * regarding the state of the device. Once the process has abandoned
165 * the contract (either implicitly via a process exit or explicitly)
166 * the kernel has no reason to retain the contract. As a result
167 * device contracts are neither inheritable nor need to exist in an
168 * orphan state.
169 *
170 * A device unlike a process may exist in multiple contracts and has
171 * a "life" outside a device contract. A device unlike a process
172 * may exist without an associated contract. Unlike a process contract
173 * a device contract may be formed after a binding relationship is
174 * formed between a process and a device.
175 *
176 * IMPLEMENTATION NOTES
177 * ====================
178 * DATA STRUCTURES
179 * ----------------
180 * The heart of the device contracts implementation is the device contract
181 * private cont_device_t (or ctd for short) data structure. It encapsulates
182 * the generic contract_t data structure and has a number of private
183 * fields.
184 * These include:
185 * cond_minor: The minor device that is the subject of the contract
186 * cond_aset: The bitset of states which are guaranteed by the
187 * contract
188 * cond_noneg: If set, indicates that the result of negotiation has
189 * been predefined to be a NACK
190 * In addition, there are other device identifiers such the devinfo node,
191 * dev_t and spec_type of the minor node. There are also a few fields that
192 * are used during negotiation to maintain state. See
193 * uts/common/sys/contract/device_impl.h
194 * for details.
195 * The ctd structure represents the device private part of a contract of
196 * type "device"
197 *
198 * Another data structure used by device contracts is ctmpl_device. It is
199 * the device contracts private part of the contract template structure. It
200 * encapsulates the generic template structure "ct_template_t" and includes
201 * the following device contract specific fields
202 * ctd_aset: The bitset of states that should be guaranteed by a
203 * contract
204 * ctd_noneg: If set, indicates that contract should NACK a
205 * negotiation
206 * ctd_minor: The devfs_path (without the /devices prefix) of the
207 * minor node that is the subject of the contract.
208 *
209 * ALGORITHMS
210 * ---------
211 * There are three sets of routines in this file
212 * Template related routines
213 * -------------------------
214 * These routines provide support for template related operations initated
215 * via the generic template operations. These include routines that dup
216 * a template, free it, and set various terms in the template
217 * (such as the minor node path, the acceptable state set (or A-set)
218 * and the non-negotiable term) as well as a routine to query the
219 * device specific portion of the template for the abovementioned terms.
220 * There is also a routine to create (ctmpl_device_create) that is used to
221 * create a contract from a template. This routine calls (after initial
222 * setup) the common function used to create a device contract
223 * (contract_device_create).
224 *
225 * core device contract implementation
226 * ----------------------------------
227 * These routines support the generic contract framework to provide
228 * functionality that allows contracts to be created, managed and
229 * destroyed. The contract_device_create() routine is a routine used
230 * to create a contract from a template (either via an explicit create
231 * operation on a template or implicitly via an open with an
232 * activated template.). The contract_device_free() routine assists
233 * in freeing the device contract specific parts. There are routines
234 * used to abandon (contract_device_abandon) a device contract as well
235 * as a routine to destroy (which despite its name does not destroy,
236 * it only moves a contract to a dead state) a contract.
237 * There is also a routine to return status information about a
238 * contract - the level of detail depends on what is requested by the
239 * user. A value of CTD_FIXED only returns fixed length fields such
240 * as the A-set, state of device and value of the "noneg" term. If
241 * CTD_ALL is specified, the minor node path is returned as well.
242 *
243 * In addition there are interfaces (contract_device_ack/nack) which
244 * are used to support negotiation between userland processes and
245 * device contracts. These interfaces record the acknowledgement
246 * or lack thereof for negotiation events and help determine if the
247 * negotiated event should occur.
248 *
249 * "backend routines"
250 * -----------------
251 * The backend routines form the interface between the I/O framework
252 * and the device contract subsystem. These routines, allow the I/O
253 * framework to call into the device contract subsystem to notify it of
254 * impending changes to a device state as well as to inform of the
255 * final disposition of such attempted state changes. Routines in this
256 * class include contract_device_offline() that indicates an attempt to
257 * offline a device, contract_device_degrade() that indicates that
258 * a device is moving to the degraded state and contract_device_negend()
259 * that is used by the I/O framework to inform the contracts subsystem of
260 * the final disposition of an attempted operation.
261 *
262 * SUMMARY
263 * -------
264 * A contract starts its life as a template. A process allocates a device
265 * contract template and sets various terms:
266 * The A-set
267 * The device minor node
268 * Critical and informative events
269 * The noneg i.e. no negotition term
270 * Setting of these terms in the template is done via the
271 * ctmpl_device_set() entry point in this file. A process can query a
272 * template to determine the terms already set in the template - this is
273 * facilitated by the ctmpl_device_get() routine.
274 *
275 * Once all the appropriate terms are set, the contract is instantiated via
276 * one of two methods
277 * - via an explicit create operation - this is facilitated by the
278 * ctmpl_device_create() entry point
279 * - synchronously with the open(2) system call - this is achieved via the
280 * contract_device_open() routine.
281 * The core work for both these above functions is done by
282 * contract_device_create()
283 *
284 * A contract once created can be queried for its status. Support for
285 * status info is provided by both the common contracts framework and by
286 * the "device" contract type. If the level of detail requested is
287 * CTD_COMMON, only the common contract framework data is used. Higher
288 * levels of detail result in calls to contract_device_status() to supply
289 * device contract type specific status information.
290 *
291 * A contract once created may be abandoned either explicitly or implictly.
292 * In either case, the contract_device_abandon() function is invoked. This
293 * function merely calls contract_destroy() which moves the contract to
294 * the DEAD state. The device contract portion of destroy processing is
295 * provided by contract_device_destroy() which merely disassociates the
296 * contract from its device devinfo node. A contract in the DEAD state is
297 * not freed. It hanbgs around until all references to the contract are
298 * gone. When that happens, the contract is finally deallocated. The
299 * device contract specific portion of the free is done by
300 * contract_device_free() which finally frees the device contract specific
301 * data structure (cont_device_t).
302 *
303 * When a device undergoes a state change, the I/O framework calls the
304 * corresponding device contract entry point. For example, when a device
305 * is about to go OFFLINE, the routine contract_device_offline() is
306 * invoked. Similarly if a device moves to DEGRADED state, the routine
307 * contract_device_degrade() function is called. These functions call the
308 * core routine contract_device_publish(). This function determines via
309 * the function is_sync_neg() whether an event is a synchronous (i.e.
310 * negotiable) event or not. In the former case contract_device_publish()
311 * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs
312 * and/or NACKs from contract holders. In the latter case, it simply
313 * publishes the event and does not wait. In the negotiation case, ACKs or
314 * NACKs from userland consumers results in contract_device_ack_nack()
315 * being called where the result of the negotiation is recorded in the
316 * contract data structure. Once all outstanding contract owners have
317 * responded, the device contract code in wait_for_acks() determines the
318 * final result of the negotiation. A single NACK overrides all other ACKs
319 * If there is no NACK, then a single ACK will result in an overall ACK
320 * result. If there are no ACKs or NACKs, then the result CT_NONE is
321 * returned back to the I/O framework. Once the event is permitted or
322 * blocked, the I/O framework proceeds or aborts the state change. The
323 * I/O framework then calls contract_device_negend() with a result code
324 * indicating final disposition of the event. This call releases the
325 * barrier and other state associated with the previous negotiation,
326 * which permits the next event (if any) to come into the device contract
327 * framework.
328 *
329 * Finally, a device that has outstanding contracts may be removed from
330 * the system which results in its devinfo node being freed. The devinfo
331 * free routine in the I/O framework, calls into the device contract
332 * function - contract_device_remove_dip(). This routine, disassociates
333 * the dip from all contracts associated with the contract being freed,
334 * allowing the devinfo node to be freed.
335 *
336 * LOCKING
337 * ---------
338 * There are four sets of data that need to be protected by locks
339 *
340 * i) device contract specific portion of the contract template - This data
341 * is protected by the template lock ctmpl_lock.
342 *
343 * ii) device contract specific portion of the contract - This data is
344 * protected by the contract lock ct_lock
345 *
346 * iii) The linked list of contracts hanging off a devinfo node - This
347 * list is protected by the per-devinfo node lock devi_ct_lock
348 *
349 * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv
350 * and devi_ct_count that controls state changes to a dip
351 *
352 * The template lock is independent in that none of the other locks in this
353 * file may be taken while holding the template lock (and vice versa).
354 *
355 * The remaining three locks have the following lock order
356 *
357 * devi_ct_lock -> ct_count barrier -> ct_lock
358 *
359 */
360
361 static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev,
362 int spec_type, proc_t *owner, int *errorp);
363
364 /* barrier routines */
365 static void ct_barrier_acquire(dev_info_t *dip);
366 static void ct_barrier_release(dev_info_t *dip);
367 static int ct_barrier_held(dev_info_t *dip);
368 static int ct_barrier_empty(dev_info_t *dip);
369 static void ct_barrier_wait_for_release(dev_info_t *dip);
370 static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs);
371 static void ct_barrier_decr(dev_info_t *dip);
372 static void ct_barrier_incr(dev_info_t *dip);
373
374 ct_type_t *device_type;
375
376 /*
377 * Macro predicates for determining when events should be sent and how.
378 */
379 #define EVSENDP(ctd, flag) \
380 ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag)
381
382 #define EVINFOP(ctd, flag) \
383 ((ctd->cond_contract.ct_ev_crit & flag) == 0)
384
385 /*
386 * State transition table showing which transitions are synchronous and which
387 * are not.
388 */
389 struct ct_dev_negtable {
390 uint_t st_old;
391 uint_t st_new;
392 uint_t st_neg;
393 } ct_dev_negtable[] = {
394 {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1},
395 {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0},
396 {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0},
397 {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1},
398 {0}
399 };
400
401 /*
402 * Device contract template implementation
403 */
404
405 /*
406 * ctmpl_device_dup
407 *
408 * The device contract template dup entry point.
409 * This simply copies all the fields (generic as well as device contract
410 * specific) fields of the original.
411 */
412 static struct ct_template *
ctmpl_device_dup(struct ct_template * template)413 ctmpl_device_dup(struct ct_template *template)
414 {
415 ctmpl_device_t *new;
416 ctmpl_device_t *old = template->ctmpl_data;
417 char *buf;
418 char *minor;
419
420 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
421 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
422
423 /*
424 * copy generic fields.
425 * ctmpl_copy returns with old template lock held
426 */
427 ctmpl_copy(&new->ctd_ctmpl, template);
428
429 new->ctd_ctmpl.ctmpl_data = new;
430 new->ctd_aset = old->ctd_aset;
431 new->ctd_minor = NULL;
432 new->ctd_noneg = old->ctd_noneg;
433
434 if (old->ctd_minor) {
435 ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN);
436 bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1);
437 } else {
438 kmem_free(buf, MAXPATHLEN);
439 buf = NULL;
440 }
441
442 mutex_exit(&template->ctmpl_lock);
443 if (buf) {
444 minor = i_ddi_strdup(buf, KM_SLEEP);
445 kmem_free(buf, MAXPATHLEN);
446 buf = NULL;
447 } else {
448 minor = NULL;
449 }
450 mutex_enter(&template->ctmpl_lock);
451
452 if (minor) {
453 new->ctd_minor = minor;
454 }
455
456 ASSERT(buf == NULL);
457 return (&new->ctd_ctmpl);
458 }
459
460 /*
461 * ctmpl_device_free
462 *
463 * The device contract template free entry point. Just
464 * frees the template.
465 */
466 static void
ctmpl_device_free(struct ct_template * template)467 ctmpl_device_free(struct ct_template *template)
468 {
469 ctmpl_device_t *dtmpl = template->ctmpl_data;
470
471 if (dtmpl->ctd_minor)
472 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
473
474 kmem_free(dtmpl, sizeof (ctmpl_device_t));
475 }
476
477 /*
478 * SAFE_EV is the set of events which a non-privileged process is
479 * allowed to make critical. An unprivileged device contract owner has
480 * no control over when a device changes state, so all device events
481 * can be in the critical set.
482 *
483 * EXCESS tells us if "value", a critical event set, requires
484 * additional privilege. For device contracts EXCESS currently
485 * evaluates to 0.
486 */
487 #define SAFE_EV (CT_DEV_ALLEVENT)
488 #define EXCESS(value) ((value) & ~SAFE_EV)
489
490
491 /*
492 * ctmpl_device_set
493 *
494 * The device contract template set entry point. Sets various terms in the
495 * template. The non-negotiable term can only be set if the process has
496 * the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
497 */
498 static int
ctmpl_device_set(struct ct_template * tmpl,ct_kparam_t * kparam,const cred_t * cr)499 ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam,
500 const cred_t *cr)
501 {
502 ctmpl_device_t *dtmpl = tmpl->ctmpl_data;
503 ct_param_t *param = &kparam->param;
504 int error;
505 dev_info_t *dip;
506 int spec_type;
507 uint64_t param_value;
508 char *str_value;
509
510 ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock));
511
512 param_value = SAFE_EV;
513 if (param->ctpm_id == CTDP_MINOR) {
514 str_value = (char *)kparam->ctpm_kbuf;
515 str_value[param->ctpm_size - 1] = '\0';
516 } else {
517 if (param->ctpm_size < sizeof (uint64_t))
518 return (EINVAL);
519 param_value = *(uint64_t *)kparam->ctpm_kbuf;
520 }
521
522 switch (param->ctpm_id) {
523 case CTDP_ACCEPT:
524 if (param_value & ~CT_DEV_ALLEVENT)
525 return (EINVAL);
526 if (param_value == 0)
527 return (EINVAL);
528 if (param_value == CT_DEV_ALLEVENT)
529 return (EINVAL);
530
531 dtmpl->ctd_aset = param_value;
532 break;
533 case CTDP_NONEG:
534 if (param_value != CTDP_NONEG_SET &&
535 param_value != CTDP_NONEG_CLEAR)
536 return (EINVAL);
537
538 /*
539 * only privileged processes can designate a contract
540 * non-negotiatble.
541 */
542 if (param_value == CTDP_NONEG_SET &&
543 (error = secpolicy_sys_devices(cr)) != 0) {
544 return (error);
545 }
546
547 dtmpl->ctd_noneg = param_value;
548 break;
549
550 case CTDP_MINOR:
551 if (*str_value != '/' ||
552 strncmp(str_value, "/devices/",
553 strlen("/devices/")) == 0 ||
554 strstr(str_value, "../devices/") != NULL ||
555 strchr(str_value, ':') == NULL) {
556 return (EINVAL);
557 }
558
559 spec_type = 0;
560 dip = NULL;
561 if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) {
562 return (ERANGE);
563 }
564 ddi_release_devi(dip);
565
566 if (spec_type != S_IFCHR && spec_type != S_IFBLK) {
567 return (EINVAL);
568 }
569
570 if (dtmpl->ctd_minor != NULL) {
571 kmem_free(dtmpl->ctd_minor,
572 strlen(dtmpl->ctd_minor) + 1);
573 }
574 dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP);
575 break;
576 case CTP_EV_CRITICAL:
577 /*
578 * Currently for device contracts, any event
579 * may be added to the critical set. We retain the
580 * following code however for future enhancements.
581 */
582 if (EXCESS(param_value) &&
583 (error = secpolicy_contract_event(cr)) != 0)
584 return (error);
585 tmpl->ctmpl_ev_crit = param_value;
586 break;
587 default:
588 return (EINVAL);
589 }
590
591 return (0);
592 }
593
594 /*
595 * ctmpl_device_get
596 *
597 * The device contract template get entry point. Simply fetches and
598 * returns the value of the requested term.
599 */
600 static int
ctmpl_device_get(struct ct_template * template,ct_kparam_t * kparam)601 ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam)
602 {
603 ctmpl_device_t *dtmpl = template->ctmpl_data;
604 ct_param_t *param = &kparam->param;
605 uint64_t *param_value = kparam->ctpm_kbuf;
606
607 ASSERT(MUTEX_HELD(&template->ctmpl_lock));
608
609 if (param->ctpm_id == CTDP_ACCEPT ||
610 param->ctpm_id == CTDP_NONEG) {
611 if (param->ctpm_size < sizeof (uint64_t))
612 return (EINVAL);
613 kparam->ret_size = sizeof (uint64_t);
614 }
615
616 switch (param->ctpm_id) {
617 case CTDP_ACCEPT:
618 *param_value = dtmpl->ctd_aset;
619 break;
620 case CTDP_NONEG:
621 *param_value = dtmpl->ctd_noneg;
622 break;
623 case CTDP_MINOR:
624 if (dtmpl->ctd_minor) {
625 kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf,
626 dtmpl->ctd_minor, param->ctpm_size);
627 kparam->ret_size++;
628 } else {
629 return (ENOENT);
630 }
631 break;
632 default:
633 return (EINVAL);
634 }
635
636 return (0);
637 }
638
639 /*
640 * Device contract type specific portion of creating a contract using
641 * a specified template
642 */
643 /*ARGSUSED*/
644 int
ctmpl_device_create(ct_template_t * template,ctid_t * ctidp)645 ctmpl_device_create(ct_template_t *template, ctid_t *ctidp)
646 {
647 ctmpl_device_t *dtmpl;
648 char *buf;
649 dev_t dev;
650 int spec_type;
651 int error;
652 cont_device_t *ctd;
653
654 if (ctidp == NULL)
655 return (EINVAL);
656
657 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
658
659 dtmpl = template->ctmpl_data;
660
661 mutex_enter(&template->ctmpl_lock);
662 if (dtmpl->ctd_minor == NULL) {
663 /* incomplete template */
664 mutex_exit(&template->ctmpl_lock);
665 kmem_free(buf, MAXPATHLEN);
666 return (EINVAL);
667 } else {
668 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
669 bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1);
670 }
671 mutex_exit(&template->ctmpl_lock);
672
673 spec_type = 0;
674 dev = NODEV;
675 if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 ||
676 dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE ||
677 (spec_type != S_IFCHR && spec_type != S_IFBLK)) {
678 CT_DEBUG((CE_WARN,
679 "tmpl_create: failed to find device: %s", buf));
680 kmem_free(buf, MAXPATHLEN);
681 return (ERANGE);
682 }
683 kmem_free(buf, MAXPATHLEN);
684
685 ctd = contract_device_create(template->ctmpl_data,
686 dev, spec_type, curproc, &error);
687
688 if (ctd == NULL) {
689 CT_DEBUG((CE_WARN, "Failed to create device contract for "
690 "process (%d) with device (devt = %lu, spec_type = %s)",
691 curproc->p_pid, dev,
692 spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK"));
693 return (error);
694 }
695
696 mutex_enter(&ctd->cond_contract.ct_lock);
697 *ctidp = ctd->cond_contract.ct_id;
698 mutex_exit(&ctd->cond_contract.ct_lock);
699
700 return (0);
701 }
702
703 /*
704 * Device contract specific template entry points
705 */
706 static ctmplops_t ctmpl_device_ops = {
707 ctmpl_device_dup, /* ctop_dup */
708 ctmpl_device_free, /* ctop_free */
709 ctmpl_device_set, /* ctop_set */
710 ctmpl_device_get, /* ctop_get */
711 ctmpl_device_create, /* ctop_create */
712 CT_DEV_ALLEVENT /* all device events bitmask */
713 };
714
715
716 /*
717 * Device contract implementation
718 */
719
720 /*
721 * contract_device_default
722 *
723 * The device contract default template entry point. Creates a
724 * device contract template with a default A-set and no "noneg" ,
725 * with informative degrade events and critical offline events.
726 * There is no default minor path.
727 */
728 static ct_template_t *
contract_device_default(void)729 contract_device_default(void)
730 {
731 ctmpl_device_t *new;
732
733 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
734 ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new);
735
736 new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED;
737 new->ctd_noneg = 0;
738 new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED;
739 new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE;
740
741 return (&new->ctd_ctmpl);
742 }
743
744 /*
745 * contract_device_free
746 *
747 * Destroys the device contract specific portion of a contract and
748 * frees the contract.
749 */
750 static void
contract_device_free(contract_t * ct)751 contract_device_free(contract_t *ct)
752 {
753 cont_device_t *ctd = ct->ct_data;
754
755 ASSERT(ctd->cond_minor);
756 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
757 kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1);
758
759 ASSERT(ctd->cond_devt != DDI_DEV_T_ANY &&
760 ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV);
761
762 ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR);
763
764 ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT));
765 ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1);
766
767 ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT));
768 ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK)));
769
770 ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0));
771 ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0));
772
773 ASSERT(!list_link_active(&ctd->cond_next));
774
775 kmem_free(ctd, sizeof (cont_device_t));
776 }
777
778 /*
779 * contract_device_abandon
780 *
781 * The device contract abandon entry point.
782 */
783 static void
contract_device_abandon(contract_t * ct)784 contract_device_abandon(contract_t *ct)
785 {
786 ASSERT(MUTEX_HELD(&ct->ct_lock));
787
788 /*
789 * device contracts cannot be inherited or orphaned.
790 * Move the contract to the DEAD_STATE. It will be freed
791 * once all references to it are gone.
792 */
793 contract_destroy(ct);
794 }
795
796 /*
797 * contract_device_destroy
798 *
799 * The device contract destroy entry point.
800 * Called from contract_destroy() to do any type specific destroy. Note
801 * that destroy is a misnomer - this does not free the contract, it only
802 * moves it to the dead state. A contract is actually freed via
803 * contract_rele() -> contract_dtor(), contop_free()
804 */
805 static void
contract_device_destroy(contract_t * ct)806 contract_device_destroy(contract_t *ct)
807 {
808 cont_device_t *ctd;
809 dev_info_t *dip;
810
811 ASSERT(MUTEX_HELD(&ct->ct_lock));
812
813 for (;;) {
814 ctd = ct->ct_data;
815 dip = ctd->cond_dip;
816 if (dip == NULL) {
817 /*
818 * The dip has been removed, this is a dangling contract
819 * Check that dip linkages are NULL
820 */
821 ASSERT(!list_link_active(&ctd->cond_next));
822 CT_DEBUG((CE_NOTE, "contract_device_destroy:"
823 " contract has no devinfo node. contract ctid : %d",
824 ct->ct_id));
825 return;
826 }
827
828 /*
829 * The intended lock order is : devi_ct_lock -> ct_count
830 * barrier -> ct_lock.
831 * However we can't do this here as dropping the ct_lock allows
832 * a race condition with i_ddi_free_node()/
833 * contract_device_remove_dip() which may free off dip before
834 * we can take devi_ct_lock. So use mutex_tryenter to avoid
835 * dropping ct_lock until we have acquired devi_ct_lock.
836 */
837 if (mutex_tryenter(&(DEVI(dip)->devi_ct_lock)) != 0)
838 break;
839 mutex_exit(&ct->ct_lock);
840 delay(drv_usectohz(1000));
841 mutex_enter(&ct->ct_lock);
842 }
843 mutex_exit(&ct->ct_lock);
844
845 /*
846 * Waiting for the barrier to be released is strictly speaking not
847 * necessary. But it simplifies the implementation of
848 * contract_device_publish() by establishing the invariant that
849 * device contracts cannot go away during negotiation.
850 */
851 ct_barrier_wait_for_release(dip);
852 mutex_enter(&ct->ct_lock);
853
854 list_remove(&(DEVI(dip)->devi_ct), ctd);
855 ctd->cond_dip = NULL; /* no longer linked to dip */
856 contract_rele(ct); /* remove hold for dip linkage */
857
858 mutex_exit(&ct->ct_lock);
859 mutex_exit(&(DEVI(dip)->devi_ct_lock));
860 mutex_enter(&ct->ct_lock);
861 }
862
863 /*
864 * contract_device_status
865 *
866 * The device contract status entry point. Called when level of "detail"
867 * is either CTD_FIXED or CTD_ALL
868 *
869 */
870 static void
contract_device_status(contract_t * ct,zone_t * zone,int detail,nvlist_t * nvl,void * status,model_t model)871 contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
872 void *status, model_t model)
873 {
874 cont_device_t *ctd = ct->ct_data;
875
876 ASSERT(detail == CTD_FIXED || detail == CTD_ALL);
877
878 mutex_enter(&ct->ct_lock);
879 contract_status_common(ct, zone, status, model);
880
881 /*
882 * There's no need to hold the contract lock while accessing static
883 * data like aset or noneg. But since we need the lock to access other
884 * data like state, we hold it anyway.
885 */
886 VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0);
887 VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0);
888 VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0);
889
890 if (detail == CTD_FIXED) {
891 mutex_exit(&ct->ct_lock);
892 return;
893 }
894
895 ASSERT(ctd->cond_minor);
896 VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0);
897
898 mutex_exit(&ct->ct_lock);
899 }
900
901 /*
902 * Converts a result integer into the corresponding string. Used for printing
903 * messages
904 */
905 static char *
result_str(uint_t result)906 result_str(uint_t result)
907 {
908 switch (result) {
909 case CT_ACK:
910 return ("CT_ACK");
911 case CT_NACK:
912 return ("CT_NACK");
913 case CT_NONE:
914 return ("CT_NONE");
915 default:
916 return ("UNKNOWN");
917 }
918 }
919
920 /*
921 * Converts a device state integer constant into the corresponding string.
922 * Used to print messages.
923 */
924 static char *
state_str(uint_t state)925 state_str(uint_t state)
926 {
927 switch (state) {
928 case CT_DEV_EV_ONLINE:
929 return ("ONLINE");
930 case CT_DEV_EV_DEGRADED:
931 return ("DEGRADED");
932 case CT_DEV_EV_OFFLINE:
933 return ("OFFLINE");
934 default:
935 return ("UNKNOWN");
936 }
937 }
938
939 /*
940 * Routine that determines if a particular CT_DEV_EV_? event corresponds to a
941 * synchronous state change or not.
942 */
943 static int
is_sync_neg(uint_t old,uint_t new)944 is_sync_neg(uint_t old, uint_t new)
945 {
946 int i;
947
948 ASSERT(old & CT_DEV_ALLEVENT);
949 ASSERT(new & CT_DEV_ALLEVENT);
950
951 if (old == new) {
952 CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s",
953 state_str(new)));
954 return (-2);
955 }
956
957 for (i = 0; ct_dev_negtable[i].st_new != 0; i++) {
958 if (old == ct_dev_negtable[i].st_old &&
959 new == ct_dev_negtable[i].st_new) {
960 return (ct_dev_negtable[i].st_neg);
961 }
962 }
963
964 CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: "
965 "old = %s -> new = %s", state_str(old), state_str(new)));
966
967 return (-1);
968 }
969
970 /*
971 * Used to cleanup cached dv_nodes so that when a device is released by
972 * a contract holder, its devinfo node can be successfully detached.
973 */
974 static int
contract_device_dvclean(dev_info_t * dip)975 contract_device_dvclean(dev_info_t *dip)
976 {
977 char *devnm;
978 dev_info_t *pdip;
979
980 ASSERT(dip);
981
982 /* pdip can be NULL if we have contracts against the root dip */
983 pdip = ddi_get_parent(dip);
984
985 if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) {
986 char *path;
987
988 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
989 (void) ddi_pathname(dip, path);
990 CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, "
991 "device=%s", path));
992 kmem_free(path, MAXPATHLEN);
993 return (EDEADLOCK);
994 }
995
996 if (pdip) {
997 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
998 (void) ddi_deviname(dip, devnm);
999 (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
1000 kmem_free(devnm, MAXNAMELEN + 1);
1001 } else {
1002 (void) devfs_clean(dip, NULL, DV_CLEAN_FORCE);
1003 }
1004
1005 return (0);
1006 }
1007
1008 /*
1009 * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland.
1010 * Results in the ACK or NACK being recorded on the dip for one particular
1011 * contract. The device contracts framework evaluates the ACK/NACKs for all
1012 * contracts against a device to determine if a particular device state change
1013 * should be allowed.
1014 */
1015 static int
contract_device_ack_nack(contract_t * ct,uint_t evtype,uint64_t evid,uint_t cmd)1016 contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid,
1017 uint_t cmd)
1018 {
1019 cont_device_t *ctd = ct->ct_data;
1020 dev_info_t *dip;
1021 ctid_t ctid;
1022 int error;
1023
1024 ctid = ct->ct_id;
1025
1026 CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid));
1027
1028 mutex_enter(&ct->ct_lock);
1029 CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid));
1030
1031 dip = ctd->cond_dip;
1032
1033 ASSERT(ctd->cond_minor);
1034 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
1035
1036 /*
1037 * Negotiation only if new state is not in A-set
1038 */
1039 ASSERT(!(ctd->cond_aset & evtype));
1040
1041 /*
1042 * Negotiation only if transition is synchronous
1043 */
1044 ASSERT(is_sync_neg(ctd->cond_state, evtype));
1045
1046 /*
1047 * We shouldn't be negotiating if the "noneg" flag is set
1048 */
1049 ASSERT(!ctd->cond_noneg);
1050
1051 if (dip)
1052 ndi_hold_devi(dip);
1053
1054 mutex_exit(&ct->ct_lock);
1055
1056 /*
1057 * dv_clean only if !NACK and offline state change
1058 */
1059 if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) {
1060 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid));
1061 error = contract_device_dvclean(dip);
1062 if (error != 0) {
1063 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d",
1064 ctid));
1065 ddi_release_devi(dip);
1066 }
1067 }
1068
1069 mutex_enter(&ct->ct_lock);
1070
1071 if (dip)
1072 ddi_release_devi(dip);
1073
1074 if (dip == NULL) {
1075 if (ctd->cond_currev_id != evid) {
1076 CT_DEBUG((CE_WARN, "%sACK for non-current event "
1077 "(type=%s, id=%llu) on removed device",
1078 cmd == CT_NACK ? "N" : "",
1079 state_str(evtype), (unsigned long long)evid));
1080 CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d",
1081 ctid));
1082 } else {
1083 ASSERT(ctd->cond_currev_type == evtype);
1084 CT_DEBUG((CE_WARN, "contract_ack: no such device: "
1085 "ctid: %d", ctid));
1086 }
1087 error = (ct->ct_state == CTS_DEAD) ? ESRCH :
1088 ((cmd == CT_NACK) ? ETIMEDOUT : 0);
1089 mutex_exit(&ct->ct_lock);
1090 return (error);
1091 }
1092
1093 /*
1094 * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock
1095 */
1096 mutex_exit(&ct->ct_lock);
1097
1098 mutex_enter(&DEVI(dip)->devi_ct_lock);
1099 mutex_enter(&ct->ct_lock);
1100 if (ctd->cond_currev_id != evid) {
1101 char *buf;
1102 mutex_exit(&ct->ct_lock);
1103 mutex_exit(&DEVI(dip)->devi_ct_lock);
1104 ndi_hold_devi(dip);
1105 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1106 (void) ddi_pathname(dip, buf);
1107 ddi_release_devi(dip);
1108 CT_DEBUG((CE_WARN, "%sACK for non-current event"
1109 "(type=%s, id=%llu) on device %s",
1110 cmd == CT_NACK ? "N" : "",
1111 state_str(evtype), (unsigned long long)evid, buf));
1112 kmem_free(buf, MAXPATHLEN);
1113 CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d",
1114 cmd == CT_NACK ? ETIMEDOUT : 0, ctid));
1115 return (cmd == CT_ACK ? 0 : ETIMEDOUT);
1116 }
1117
1118 ASSERT(ctd->cond_currev_type == evtype);
1119 ASSERT(cmd == CT_ACK || cmd == CT_NACK);
1120
1121 CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d",
1122 cmd == CT_NACK ? "N" : "", ctid));
1123
1124 ctd->cond_currev_ack = cmd;
1125 mutex_exit(&ct->ct_lock);
1126
1127 ct_barrier_decr(dip);
1128 mutex_exit(&DEVI(dip)->devi_ct_lock);
1129
1130 CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid));
1131
1132 return (0);
1133 }
1134
1135 /*
1136 * Invoked when a userland contract holder approves (i.e. ACKs) a state change
1137 */
1138 static int
contract_device_ack(contract_t * ct,uint_t evtype,uint64_t evid)1139 contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid)
1140 {
1141 return (contract_device_ack_nack(ct, evtype, evid, CT_ACK));
1142 }
1143
1144 /*
1145 * Invoked when a userland contract holder blocks (i.e. NACKs) a state change
1146 */
1147 static int
contract_device_nack(contract_t * ct,uint_t evtype,uint64_t evid)1148 contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid)
1149 {
1150 return (contract_device_ack_nack(ct, evtype, evid, CT_NACK));
1151 }
1152
1153 /*
1154 * Creates a new contract synchronously with the breaking of an existing
1155 * contract. Currently not supported.
1156 */
1157 /*ARGSUSED*/
1158 static int
contract_device_newct(contract_t * ct)1159 contract_device_newct(contract_t *ct)
1160 {
1161 return (ENOTSUP);
1162 }
1163
1164 /*
1165 * Core device contract implementation entry points
1166 */
1167 static contops_t contract_device_ops = {
1168 contract_device_free, /* contop_free */
1169 contract_device_abandon, /* contop_abandon */
1170 contract_device_destroy, /* contop_destroy */
1171 contract_device_status, /* contop_status */
1172 contract_device_ack, /* contop_ack */
1173 contract_device_nack, /* contop_nack */
1174 contract_qack_notsup, /* contop_qack */
1175 contract_device_newct /* contop_newct */
1176 };
1177
1178 /*
1179 * contract_device_init
1180 *
1181 * Initializes the device contract type.
1182 */
1183 void
contract_device_init(void)1184 contract_device_init(void)
1185 {
1186 device_type = contract_type_init(CTT_DEVICE, "device",
1187 &contract_device_ops, contract_device_default);
1188 }
1189
1190 /*
1191 * contract_device_create
1192 *
1193 * create a device contract given template "tmpl" and the "owner" process.
1194 * May fail and return NULL if project.max-contracts would have been exceeded.
1195 *
1196 * Common device contract creation routine called for both open-time and
1197 * non-open time device contract creation
1198 */
1199 static cont_device_t *
contract_device_create(ctmpl_device_t * dtmpl,dev_t dev,int spec_type,proc_t * owner,int * errorp)1200 contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type,
1201 proc_t *owner, int *errorp)
1202 {
1203 cont_device_t *ctd;
1204 char *minor;
1205 char *path;
1206 dev_info_t *dip;
1207
1208 ASSERT(dtmpl != NULL);
1209 ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE);
1210 ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK);
1211 ASSERT(errorp);
1212
1213 *errorp = 0;
1214
1215 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1216
1217 mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
1218 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
1219 bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1);
1220 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1221
1222 dip = e_ddi_hold_devi_by_path(path, 0);
1223 if (dip == NULL) {
1224 cmn_err(CE_WARN, "contract_create: Cannot find devinfo node "
1225 "for device path (%s)", path);
1226 kmem_free(path, MAXPATHLEN);
1227 *errorp = ERANGE;
1228 return (NULL);
1229 }
1230
1231 /*
1232 * Lock out any parallel contract negotiations
1233 */
1234 mutex_enter(&(DEVI(dip)->devi_ct_lock));
1235 ct_barrier_acquire(dip);
1236 mutex_exit(&(DEVI(dip)->devi_ct_lock));
1237
1238 minor = i_ddi_strdup(path, KM_SLEEP);
1239 kmem_free(path, MAXPATHLEN);
1240
1241 (void) contract_type_pbundle(device_type, owner);
1242
1243 ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP);
1244
1245 /*
1246 * Only we hold a refernce to this contract. Safe to access
1247 * the fields without a ct_lock
1248 */
1249 ctd->cond_minor = minor;
1250 /*
1251 * It is safe to set the dip pointer in the contract
1252 * as the contract will always be destroyed before the dip
1253 * is released
1254 */
1255 ctd->cond_dip = dip;
1256 ctd->cond_devt = dev;
1257 ctd->cond_spec = spec_type;
1258
1259 /*
1260 * Since we are able to lookup the device, it is either
1261 * online or degraded
1262 */
1263 ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ?
1264 CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE;
1265
1266 mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
1267 ctd->cond_aset = dtmpl->ctd_aset;
1268 ctd->cond_noneg = dtmpl->ctd_noneg;
1269
1270 /*
1271 * contract_ctor() initailizes the common portion of a contract
1272 * contract_dtor() destroys the common portion of a contract
1273 */
1274 if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl,
1275 ctd, 0, owner, B_TRUE)) {
1276 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1277 /*
1278 * contract_device_free() destroys the type specific
1279 * portion of a contract and frees the contract.
1280 * The "minor" path and "cred" is a part of the type specific
1281 * portion of the contract and will be freed by
1282 * contract_device_free()
1283 */
1284 contract_device_free(&ctd->cond_contract);
1285
1286 /* release barrier */
1287 mutex_enter(&(DEVI(dip)->devi_ct_lock));
1288 ct_barrier_release(dip);
1289 mutex_exit(&(DEVI(dip)->devi_ct_lock));
1290
1291 ddi_release_devi(dip);
1292 *errorp = EAGAIN;
1293 return (NULL);
1294 }
1295 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1296
1297 mutex_enter(&ctd->cond_contract.ct_lock);
1298 ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME;
1299 ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME;
1300 ctd->cond_contract.ct_ntime.ctm_start = -1;
1301 ctd->cond_contract.ct_qtime.ctm_start = -1;
1302 mutex_exit(&ctd->cond_contract.ct_lock);
1303
1304 /*
1305 * Insert device contract into list hanging off the dip
1306 * Bump up the ref-count on the contract to reflect this
1307 */
1308 contract_hold(&ctd->cond_contract);
1309 mutex_enter(&(DEVI(dip)->devi_ct_lock));
1310 list_insert_tail(&(DEVI(dip)->devi_ct), ctd);
1311
1312 /* release barrier */
1313 ct_barrier_release(dip);
1314 mutex_exit(&(DEVI(dip)->devi_ct_lock));
1315
1316 ddi_release_devi(dip);
1317
1318 return (ctd);
1319 }
1320
1321 /*
1322 * Called when a device is successfully opened to create an open-time contract
1323 * i.e. synchronously with a device open.
1324 */
1325 int
contract_device_open(dev_t dev,int spec_type,contract_t ** ctpp)1326 contract_device_open(dev_t dev, int spec_type, contract_t **ctpp)
1327 {
1328 ctmpl_device_t *dtmpl;
1329 ct_template_t *tmpl;
1330 cont_device_t *ctd;
1331 char *path;
1332 klwp_t *lwp;
1333 int error;
1334
1335 if (ctpp)
1336 *ctpp = NULL;
1337
1338 /*
1339 * Check if we are in user-context i.e. if we have an lwp
1340 */
1341 lwp = ttolwp(curthread);
1342 if (lwp == NULL) {
1343 CT_DEBUG((CE_NOTE, "contract_open: Not user-context"));
1344 return (0);
1345 }
1346
1347 tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]);
1348 if (tmpl == NULL) {
1349 return (0);
1350 }
1351 dtmpl = tmpl->ctmpl_data;
1352
1353 /*
1354 * If the user set a minor path in the template before an open,
1355 * ignore it. We use the minor path of the actual minor opened.
1356 */
1357 mutex_enter(&tmpl->ctmpl_lock);
1358 if (dtmpl->ctd_minor != NULL) {
1359 CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: "
1360 "ignoring device minor path in active template: %s",
1361 curproc->p_pid, dtmpl->ctd_minor));
1362 /*
1363 * This is a copy of the actual activated template.
1364 * Safe to make changes such as freeing the minor
1365 * path in the template.
1366 */
1367 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
1368 dtmpl->ctd_minor = NULL;
1369 }
1370 mutex_exit(&tmpl->ctmpl_lock);
1371
1372 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1373
1374 if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) {
1375 CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive "
1376 "minor path from dev_t,spec {%lu, %d} for process (%d)",
1377 dev, spec_type, curproc->p_pid));
1378 ctmpl_free(tmpl);
1379 kmem_free(path, MAXPATHLEN);
1380 return (1);
1381 }
1382
1383 mutex_enter(&tmpl->ctmpl_lock);
1384 ASSERT(dtmpl->ctd_minor == NULL);
1385 dtmpl->ctd_minor = path;
1386 mutex_exit(&tmpl->ctmpl_lock);
1387
1388 ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error);
1389
1390 mutex_enter(&tmpl->ctmpl_lock);
1391 ASSERT(dtmpl->ctd_minor);
1392 dtmpl->ctd_minor = NULL;
1393 mutex_exit(&tmpl->ctmpl_lock);
1394 ctmpl_free(tmpl);
1395 kmem_free(path, MAXPATHLEN);
1396
1397 if (ctd == NULL) {
1398 cmn_err(CE_NOTE, "contract_device_open(): Failed to "
1399 "create device contract for process (%d) holding "
1400 "device (devt = %lu, spec_type = %d)",
1401 curproc->p_pid, dev, spec_type);
1402 return (1);
1403 }
1404
1405 if (ctpp) {
1406 mutex_enter(&ctd->cond_contract.ct_lock);
1407 *ctpp = &ctd->cond_contract;
1408 mutex_exit(&ctd->cond_contract.ct_lock);
1409 }
1410 return (0);
1411 }
1412
1413 /*
1414 * Called during contract negotiation by the device contract framework to wait
1415 * for ACKs or NACKs from contract holders. If all responses are not received
1416 * before a specified timeout, this routine times out.
1417 */
1418 static uint_t
wait_for_acks(dev_info_t * dip,dev_t dev,int spec_type,uint_t evtype)1419 wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype)
1420 {
1421 cont_device_t *ctd;
1422 int timed_out = 0;
1423 int result = CT_NONE;
1424 int ack;
1425 char *f = "wait_for_acks";
1426
1427 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
1428 ASSERT(dip);
1429 ASSERT(evtype & CT_DEV_ALLEVENT);
1430 ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
1431 ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
1432 (spec_type == S_IFBLK || spec_type == S_IFCHR));
1433
1434 CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip));
1435
1436 if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) {
1437 /*
1438 * some contract owner(s) didn't respond in time
1439 */
1440 CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip));
1441 timed_out = 1;
1442 }
1443
1444 ack = 0;
1445 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1446 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1447
1448 mutex_enter(&ctd->cond_contract.ct_lock);
1449
1450 ASSERT(ctd->cond_dip == dip);
1451
1452 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
1453 mutex_exit(&ctd->cond_contract.ct_lock);
1454 continue;
1455 }
1456 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
1457 mutex_exit(&ctd->cond_contract.ct_lock);
1458 continue;
1459 }
1460
1461 /* skip if non-negotiable contract */
1462 if (ctd->cond_noneg) {
1463 mutex_exit(&ctd->cond_contract.ct_lock);
1464 continue;
1465 }
1466
1467 ASSERT(ctd->cond_currev_type == evtype);
1468 if (ctd->cond_currev_ack == CT_NACK) {
1469 CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p",
1470 f, (void *)dip));
1471 mutex_exit(&ctd->cond_contract.ct_lock);
1472 return (CT_NACK);
1473 } else if (ctd->cond_currev_ack == CT_ACK) {
1474 ack = 1;
1475 CT_DEBUG((CE_NOTE, "%s: found a ACK: %p",
1476 f, (void *)dip));
1477 }
1478 mutex_exit(&ctd->cond_contract.ct_lock);
1479 }
1480
1481 if (ack) {
1482 result = CT_ACK;
1483 CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip));
1484 } else if (timed_out) {
1485 result = CT_NONE;
1486 CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p",
1487 f, (void *)dip));
1488 } else {
1489 CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p",
1490 f, (void *)dip));
1491 }
1492
1493
1494 return (result);
1495 }
1496
1497 /*
1498 * Determines the current state of a device (i.e a devinfo node
1499 */
1500 static int
get_state(dev_info_t * dip)1501 get_state(dev_info_t *dip)
1502 {
1503 if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip))
1504 return (CT_DEV_EV_OFFLINE);
1505 else if (DEVI_IS_DEVICE_DEGRADED(dip))
1506 return (CT_DEV_EV_DEGRADED);
1507 else
1508 return (CT_DEV_EV_ONLINE);
1509 }
1510
1511 /*
1512 * Sets the current state of a device in a device contract
1513 */
1514 static void
set_cond_state(dev_info_t * dip)1515 set_cond_state(dev_info_t *dip)
1516 {
1517 uint_t state = get_state(dip);
1518 cont_device_t *ctd;
1519
1520 /* verify that barrier is held */
1521 ASSERT(ct_barrier_held(dip));
1522
1523 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1524 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1525 mutex_enter(&ctd->cond_contract.ct_lock);
1526 ASSERT(ctd->cond_dip == dip);
1527 ctd->cond_state = state;
1528 mutex_exit(&ctd->cond_contract.ct_lock);
1529 }
1530 }
1531
1532 /*
1533 * Core routine called by event-specific routines when an event occurs.
1534 * Determines if an event should be be published, and if it is to be
1535 * published, whether a negotiation should take place. Also implements
1536 * NEGEND events which publish the final disposition of an event after
1537 * negotiations are complete.
1538 *
1539 * When an event occurs on a minor node, this routine walks the list of
1540 * contracts hanging off a devinfo node and for each contract on the affected
1541 * dip, evaluates the following cases
1542 *
1543 * a. an event that is synchronous, breaks the contract and NONEG not set
1544 * - bumps up the outstanding negotiation counts on the dip
1545 * - marks the dip as undergoing negotiation (devi_ct_neg)
1546 * - event of type CTE_NEG is published
1547 * b. an event that is synchronous, breaks the contract and NONEG is set
1548 * - sets the final result to CT_NACK, event is blocked
1549 * - does not publish an event
1550 * c. event is asynchronous and breaks the contract
1551 * - publishes a critical event irrespect of whether the NONEG
1552 * flag is set, since the contract will be broken and contract
1553 * owner needs to be informed.
1554 * d. No contract breakage but the owner has subscribed to the event
1555 * - publishes the event irrespective of the NONEG event as the
1556 * owner has explicitly subscribed to the event.
1557 * e. NEGEND event
1558 * - publishes a critical event. Should only be doing this if
1559 * if NONEG is not set.
1560 * f. all other events
1561 * - Since a contract is not broken and this event has not been
1562 * subscribed to, this event does not need to be published for
1563 * for this contract.
1564 *
1565 * Once an event is published, what happens next depends on the type of
1566 * event:
1567 *
1568 * a. NEGEND event
1569 * - cleanup all state associated with the preceding negotiation
1570 * and return CT_ACK to the caller of contract_device_publish()
1571 * b. NACKed event
1572 * - One or more contracts had the NONEG term, so the event was
1573 * blocked. Return CT_NACK to the caller.
1574 * c. Negotiated event
1575 * - Call wait_for_acks() to wait for responses from contract
1576 * holders. The end result is either CT_ACK (event is permitted),
1577 * CT_NACK (event is blocked) or CT_NONE (no contract owner)
1578 * responded. This result is returned back to the caller.
1579 * d. All other events
1580 * - If the event was asynchronous (i.e. not negotiated) or
1581 * a contract was not broken return CT_ACK to the caller.
1582 */
1583 static uint_t
contract_device_publish(dev_info_t * dip,dev_t dev,int spec_type,uint_t evtype,nvlist_t * tnvl)1584 contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type,
1585 uint_t evtype, nvlist_t *tnvl)
1586 {
1587 cont_device_t *ctd;
1588 uint_t result = CT_NONE;
1589 uint64_t evid = 0;
1590 uint64_t nevid = 0;
1591 char *path = NULL;
1592 int negend;
1593 int match;
1594 int sync = 0;
1595 contract_t *ct;
1596 ct_kevent_t *event;
1597 nvlist_t *nvl;
1598 int broken = 0;
1599
1600 ASSERT(dip);
1601 ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
1602 ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
1603 (spec_type == S_IFBLK || spec_type == S_IFCHR));
1604 ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT));
1605
1606 /* Is this a synchronous state change ? */
1607 if (evtype != CT_EV_NEGEND) {
1608 sync = is_sync_neg(get_state(dip), evtype);
1609 /* NOP if unsupported transition */
1610 if (sync == -2 || sync == -1) {
1611 DEVI(dip)->devi_flags |= DEVI_CT_NOP;
1612 result = (sync == -2) ? CT_ACK : CT_NONE;
1613 goto out;
1614 }
1615 CT_DEBUG((CE_NOTE, "publish: is%s sync state change",
1616 sync ? "" : " not"));
1617 } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) {
1618 DEVI(dip)->devi_flags &= ~DEVI_CT_NOP;
1619 result = CT_ACK;
1620 goto out;
1621 }
1622
1623 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1624 (void) ddi_pathname(dip, path);
1625
1626 mutex_enter(&(DEVI(dip)->devi_ct_lock));
1627
1628 /*
1629 * Negotiation end - set the state of the device in the contract
1630 */
1631 if (evtype == CT_EV_NEGEND) {
1632 CT_DEBUG((CE_NOTE, "publish: negend: setting cond state"));
1633 set_cond_state(dip);
1634 }
1635
1636 /*
1637 * If this device didn't go through negotiation, don't publish
1638 * a NEGEND event - simply release the barrier to allow other
1639 * device events in.
1640 */
1641 negend = 0;
1642 if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) {
1643 CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier"));
1644 ct_barrier_release(dip);
1645 mutex_exit(&(DEVI(dip)->devi_ct_lock));
1646 result = CT_ACK;
1647 goto out;
1648 } else if (evtype == CT_EV_NEGEND) {
1649 /*
1650 * There are negotiated contract breakages that
1651 * need a NEGEND event
1652 */
1653 ASSERT(ct_barrier_held(dip));
1654 negend = 1;
1655 CT_DEBUG((CE_NOTE, "publish: setting negend flag"));
1656 } else {
1657 /*
1658 * This is a new event, not a NEGEND event. Wait for previous
1659 * contract events to complete.
1660 */
1661 ct_barrier_acquire(dip);
1662 }
1663
1664
1665 match = 0;
1666 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1667 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1668
1669 ctid_t ctid;
1670 size_t len = strlen(path);
1671
1672 mutex_enter(&ctd->cond_contract.ct_lock);
1673
1674 ASSERT(ctd->cond_dip == dip);
1675 ASSERT(ctd->cond_minor);
1676 ASSERT(strncmp(ctd->cond_minor, path, len) == 0 &&
1677 ctd->cond_minor[len] == ':');
1678
1679 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
1680 mutex_exit(&ctd->cond_contract.ct_lock);
1681 continue;
1682 }
1683 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
1684 mutex_exit(&ctd->cond_contract.ct_lock);
1685 continue;
1686 }
1687
1688 /* We have a matching contract */
1689 match = 1;
1690 ctid = ctd->cond_contract.ct_id;
1691 CT_DEBUG((CE_NOTE, "publish: found matching contract: %d",
1692 ctid));
1693
1694 /*
1695 * There are 4 possible cases
1696 * 1. A contract is broken (dev not in acceptable state) and
1697 * the state change is synchronous - start negotiation
1698 * by sending a CTE_NEG critical event.
1699 * 2. A contract is broken and the state change is
1700 * asynchronous - just send a critical event and
1701 * break the contract.
1702 * 3. Contract is not broken, but consumer has subscribed
1703 * to the event as a critical or informative event
1704 * - just send the appropriate event
1705 * 4. contract waiting for negend event - just send the critical
1706 * NEGEND event.
1707 */
1708 broken = 0;
1709 if (!negend && !(evtype & ctd->cond_aset)) {
1710 broken = 1;
1711 CT_DEBUG((CE_NOTE, "publish: Contract broken: %d",
1712 ctid));
1713 }
1714
1715 /*
1716 * Don't send event if
1717 * - contract is not broken AND
1718 * - contract holder has not subscribed to this event AND
1719 * - contract not waiting for a NEGEND event
1720 */
1721 if (!broken && !EVSENDP(ctd, evtype) &&
1722 !ctd->cond_neg) {
1723 CT_DEBUG((CE_NOTE, "contract_device_publish(): "
1724 "contract (%d): no publish reqd: event %d",
1725 ctd->cond_contract.ct_id, evtype));
1726 mutex_exit(&ctd->cond_contract.ct_lock);
1727 continue;
1728 }
1729
1730 /*
1731 * Note: need to kmem_zalloc() the event so mutexes are
1732 * initialized automatically
1733 */
1734 ct = &ctd->cond_contract;
1735 event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
1736 event->cte_type = evtype;
1737
1738 if (broken && sync) {
1739 CT_DEBUG((CE_NOTE, "publish: broken + sync: "
1740 "ctid: %d", ctid));
1741 ASSERT(!negend);
1742 ASSERT(ctd->cond_currev_id == 0);
1743 ASSERT(ctd->cond_currev_type == 0);
1744 ASSERT(ctd->cond_currev_ack == 0);
1745 ASSERT(ctd->cond_neg == 0);
1746 if (ctd->cond_noneg) {
1747 /* Nothing to publish. Event has been blocked */
1748 CT_DEBUG((CE_NOTE, "publish: sync and noneg:"
1749 "not publishing blocked ev: ctid: %d",
1750 ctid));
1751 result = CT_NACK;
1752 kmem_free(event, sizeof (ct_kevent_t));
1753 mutex_exit(&ctd->cond_contract.ct_lock);
1754 continue;
1755 }
1756 event->cte_flags = CTE_NEG; /* critical neg. event */
1757 ctd->cond_currev_type = event->cte_type;
1758 ct_barrier_incr(dip);
1759 DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */
1760 ctd->cond_neg = 1;
1761 } else if (broken && !sync) {
1762 CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d",
1763 ctid));
1764 ASSERT(!negend);
1765 ASSERT(ctd->cond_currev_id == 0);
1766 ASSERT(ctd->cond_currev_type == 0);
1767 ASSERT(ctd->cond_currev_ack == 0);
1768 ASSERT(ctd->cond_neg == 0);
1769 event->cte_flags = 0; /* critical event */
1770 } else if (EVSENDP(ctd, event->cte_type)) {
1771 CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d",
1772 ctid));
1773 ASSERT(!negend);
1774 ASSERT(ctd->cond_currev_id == 0);
1775 ASSERT(ctd->cond_currev_type == 0);
1776 ASSERT(ctd->cond_currev_ack == 0);
1777 ASSERT(ctd->cond_neg == 0);
1778 event->cte_flags = EVINFOP(ctd, event->cte_type) ?
1779 CTE_INFO : 0;
1780 } else if (ctd->cond_neg) {
1781 CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid));
1782 ASSERT(negend);
1783 ASSERT(ctd->cond_noneg == 0);
1784 nevid = ctd->cond_contract.ct_nevent ?
1785 ctd->cond_contract.ct_nevent->cte_id : 0;
1786 ASSERT(ctd->cond_currev_id == nevid);
1787 event->cte_flags = 0; /* NEGEND is always critical */
1788 ctd->cond_currev_id = 0;
1789 ctd->cond_currev_type = 0;
1790 ctd->cond_currev_ack = 0;
1791 ctd->cond_neg = 0;
1792 } else {
1793 CT_DEBUG((CE_NOTE, "publish: not publishing event for "
1794 "ctid: %d, evtype: %d",
1795 ctd->cond_contract.ct_id, event->cte_type));
1796 ASSERT(!negend);
1797 ASSERT(ctd->cond_currev_id == 0);
1798 ASSERT(ctd->cond_currev_type == 0);
1799 ASSERT(ctd->cond_currev_ack == 0);
1800 ASSERT(ctd->cond_neg == 0);
1801 kmem_free(event, sizeof (ct_kevent_t));
1802 mutex_exit(&ctd->cond_contract.ct_lock);
1803 continue;
1804 }
1805
1806 nvl = NULL;
1807 if (tnvl) {
1808 VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0);
1809 if (negend) {
1810 int32_t newct = 0;
1811 ASSERT(ctd->cond_noneg == 0);
1812 VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid)
1813 == 0);
1814 VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT,
1815 &newct) == 0);
1816 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
1817 newct == 1 ? 0 :
1818 ctd->cond_contract.ct_id) == 0);
1819 CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d "
1820 "CTS_NEVID: %llu, CTS_NEWCT: %s",
1821 ctid, (unsigned long long)nevid,
1822 newct ? "success" : "failure"));
1823
1824 }
1825 }
1826
1827 if (ctd->cond_neg) {
1828 ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1);
1829 ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1);
1830 ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt();
1831 ctd->cond_contract.ct_qtime.ctm_start =
1832 ctd->cond_contract.ct_ntime.ctm_start;
1833 }
1834
1835 /*
1836 * by holding the dip's devi_ct_lock we ensure that
1837 * all ACK/NACKs are held up until we have finished
1838 * publishing to all contracts.
1839 */
1840 mutex_exit(&ctd->cond_contract.ct_lock);
1841 evid = cte_publish_all(ct, event, nvl, NULL);
1842 mutex_enter(&ctd->cond_contract.ct_lock);
1843
1844 if (ctd->cond_neg) {
1845 ASSERT(!negend);
1846 ASSERT(broken);
1847 ASSERT(sync);
1848 ASSERT(!ctd->cond_noneg);
1849 CT_DEBUG((CE_NOTE, "publish: sync break, setting evid"
1850 ": %d", ctid));
1851 ctd->cond_currev_id = evid;
1852 } else if (negend) {
1853 ctd->cond_contract.ct_ntime.ctm_start = -1;
1854 ctd->cond_contract.ct_qtime.ctm_start = -1;
1855 }
1856 mutex_exit(&ctd->cond_contract.ct_lock);
1857 }
1858
1859 /*
1860 * If "negend" set counter back to initial state (-1) so that
1861 * other events can be published. Also clear the negotiation flag
1862 * on dip.
1863 *
1864 * 0 .. n are used for counting.
1865 * -1 indicates counter is available for use.
1866 */
1867 if (negend) {
1868 /*
1869 * devi_ct_count not necessarily 0. We may have
1870 * timed out in which case, count will be non-zero.
1871 */
1872 ct_barrier_release(dip);
1873 DEVI(dip)->devi_ct_neg = 0;
1874 CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p",
1875 (void *)dip));
1876 } else if (DEVI(dip)->devi_ct_neg) {
1877 ASSERT(match);
1878 ASSERT(!ct_barrier_empty(dip));
1879 CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p",
1880 DEVI(dip)->devi_ct_count, (void *)dip));
1881 } else {
1882 /*
1883 * for non-negotiated events or subscribed events or no
1884 * matching contracts
1885 */
1886 ASSERT(ct_barrier_empty(dip));
1887 ASSERT(DEVI(dip)->devi_ct_neg == 0);
1888 CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: "
1889 "dip=%p", (void *)dip));
1890
1891 /*
1892 * only this function when called from contract_device_negend()
1893 * can reset the counter to READY state i.e. -1. This function
1894 * is so called for every event whether a NEGEND event is needed
1895 * or not, but the negend event is only published if the event
1896 * whose end they signal is a negotiated event for the contract.
1897 */
1898 }
1899
1900 if (!match) {
1901 /* No matching contracts */
1902 CT_DEBUG((CE_NOTE, "publish: No matching contract"));
1903 result = CT_NONE;
1904 } else if (result == CT_NACK) {
1905 /* a non-negotiable contract exists and this is a neg. event */
1906 CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract"));
1907 (void) wait_for_acks(dip, dev, spec_type, evtype);
1908 } else if (DEVI(dip)->devi_ct_neg) {
1909 /* one or more contracts going through negotations */
1910 CT_DEBUG((CE_NOTE, "publish: sync contract: waiting"));
1911 result = wait_for_acks(dip, dev, spec_type, evtype);
1912 } else {
1913 /* no negotiated contracts or no broken contracts or NEGEND */
1914 CT_DEBUG((CE_NOTE, "publish: async/no-break/negend"));
1915 result = CT_ACK;
1916 }
1917
1918 /*
1919 * Release the lock only now so that the only point where we
1920 * drop the lock is in wait_for_acks(). This is so that we don't
1921 * miss cv_signal/cv_broadcast from contract holders
1922 */
1923 CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock"));
1924 mutex_exit(&(DEVI(dip)->devi_ct_lock));
1925
1926 out:
1927 nvlist_free(tnvl);
1928 if (path)
1929 kmem_free(path, MAXPATHLEN);
1930
1931
1932 CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result)));
1933 return (result);
1934 }
1935
1936
1937 /*
1938 * contract_device_offline
1939 *
1940 * Event publishing routine called by I/O framework when a device is offlined.
1941 */
1942 ct_ack_t
contract_device_offline(dev_info_t * dip,dev_t dev,int spec_type)1943 contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type)
1944 {
1945 nvlist_t *nvl;
1946 uint_t result;
1947 uint_t evtype;
1948
1949 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1950
1951 evtype = CT_DEV_EV_OFFLINE;
1952 result = contract_device_publish(dip, dev, spec_type, evtype, nvl);
1953
1954 /*
1955 * If a contract offline is NACKED, the framework expects us to call
1956 * NEGEND ourselves, since we know the final result
1957 */
1958 if (result == CT_NACK) {
1959 contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
1960 }
1961
1962 return (result);
1963 }
1964
1965 /*
1966 * contract_device_degrade
1967 *
1968 * Event publishing routine called by I/O framework when a device
1969 * moves to degrade state.
1970 */
1971 /*ARGSUSED*/
1972 void
contract_device_degrade(dev_info_t * dip,dev_t dev,int spec_type)1973 contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type)
1974 {
1975 nvlist_t *nvl;
1976 uint_t evtype;
1977
1978 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1979
1980 evtype = CT_DEV_EV_DEGRADED;
1981 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
1982 }
1983
1984 /*
1985 * contract_device_undegrade
1986 *
1987 * Event publishing routine called by I/O framework when a device
1988 * moves from degraded state to online state.
1989 */
1990 /*ARGSUSED*/
1991 void
contract_device_undegrade(dev_info_t * dip,dev_t dev,int spec_type)1992 contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type)
1993 {
1994 nvlist_t *nvl;
1995 uint_t evtype;
1996
1997 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1998
1999 evtype = CT_DEV_EV_ONLINE;
2000 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
2001 }
2002
2003 /*
2004 * For all contracts which have undergone a negotiation (because the device
2005 * moved out of the acceptable state for that contract and the state
2006 * change is synchronous i.e. requires negotiation) this routine publishes
2007 * a CT_EV_NEGEND event with the final disposition of the event.
2008 *
2009 * This event is always a critical event.
2010 */
2011 void
contract_device_negend(dev_info_t * dip,dev_t dev,int spec_type,int result)2012 contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result)
2013 {
2014 nvlist_t *nvl;
2015 uint_t evtype;
2016
2017 ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE);
2018
2019 CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, "
2020 "dip: %p", result, (void *)dip));
2021
2022 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2023 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
2024 result == CT_EV_SUCCESS ? 1 : 0) == 0);
2025
2026 evtype = CT_EV_NEGEND;
2027 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
2028
2029 CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p",
2030 (void *)dip));
2031 }
2032
2033 /*
2034 * Wrapper routine called by other subsystems (such as LDI) to start
2035 * negotiations when a synchronous device state change occurs.
2036 * Returns CT_ACK or CT_NACK.
2037 */
2038 ct_ack_t
contract_device_negotiate(dev_info_t * dip,dev_t dev,int spec_type,uint_t evtype)2039 contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
2040 uint_t evtype)
2041 {
2042 int result;
2043
2044 ASSERT(dip);
2045 ASSERT(dev != NODEV);
2046 ASSERT(dev != DDI_DEV_T_ANY);
2047 ASSERT(dev != DDI_DEV_T_NONE);
2048 ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
2049
2050 result = CT_NACK;
2051 switch (evtype) {
2052 case CT_DEV_EV_OFFLINE:
2053 result = contract_device_offline(dip, dev, spec_type);
2054 break;
2055 default:
2056 cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation "
2057 "not supported: event (%d) for dev_t (%lu) and spec (%d), "
2058 "dip (%p)", evtype, dev, spec_type, (void *)dip);
2059 break;
2060 }
2061
2062 return (result);
2063 }
2064
2065 /*
2066 * A wrapper routine called by other subsystems (such as the LDI) to
2067 * finalize event processing for a state change event. For synchronous
2068 * state changes, this publishes NEGEND events. For asynchronous i.e.
2069 * non-negotiable events this publishes the event.
2070 */
2071 void
contract_device_finalize(dev_info_t * dip,dev_t dev,int spec_type,uint_t evtype,int ct_result)2072 contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
2073 uint_t evtype, int ct_result)
2074 {
2075 ASSERT(dip);
2076 ASSERT(dev != NODEV);
2077 ASSERT(dev != DDI_DEV_T_ANY);
2078 ASSERT(dev != DDI_DEV_T_NONE);
2079 ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
2080
2081 switch (evtype) {
2082 case CT_DEV_EV_OFFLINE:
2083 contract_device_negend(dip, dev, spec_type, ct_result);
2084 break;
2085 case CT_DEV_EV_DEGRADED:
2086 contract_device_degrade(dip, dev, spec_type);
2087 contract_device_negend(dip, dev, spec_type, ct_result);
2088 break;
2089 case CT_DEV_EV_ONLINE:
2090 contract_device_undegrade(dip, dev, spec_type);
2091 contract_device_negend(dip, dev, spec_type, ct_result);
2092 break;
2093 default:
2094 cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported "
2095 "event (%d) for dev_t (%lu) and spec (%d), dip (%p)",
2096 evtype, dev, spec_type, (void *)dip);
2097 break;
2098 }
2099 }
2100
2101 /*
2102 * Called by I/O framework when a devinfo node is freed to remove the
2103 * association between a devinfo node and its contracts.
2104 */
2105 void
contract_device_remove_dip(dev_info_t * dip)2106 contract_device_remove_dip(dev_info_t *dip)
2107 {
2108 cont_device_t *ctd;
2109 cont_device_t *next;
2110 contract_t *ct;
2111
2112 mutex_enter(&(DEVI(dip)->devi_ct_lock));
2113 ct_barrier_wait_for_release(dip);
2114
2115 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) {
2116 next = list_next(&(DEVI(dip)->devi_ct), ctd);
2117 list_remove(&(DEVI(dip)->devi_ct), ctd);
2118 ct = &ctd->cond_contract;
2119 /*
2120 * Unlink the dip associated with this contract
2121 */
2122 mutex_enter(&ct->ct_lock);
2123 ASSERT(ctd->cond_dip == dip);
2124 ctd->cond_dip = NULL; /* no longer linked to dip */
2125 contract_rele(ct); /* remove hold for dip linkage */
2126 CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: "
2127 "ctid: %d", ct->ct_id));
2128 mutex_exit(&ct->ct_lock);
2129 }
2130 ASSERT(list_is_empty(&(DEVI(dip)->devi_ct)));
2131 mutex_exit(&(DEVI(dip)->devi_ct_lock));
2132 }
2133
2134 /*
2135 * Barrier related routines
2136 */
2137 static void
ct_barrier_acquire(dev_info_t * dip)2138 ct_barrier_acquire(dev_info_t *dip)
2139 {
2140 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2141 CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier"));
2142 while (DEVI(dip)->devi_ct_count != -1)
2143 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
2144 DEVI(dip)->devi_ct_count = 0;
2145 CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier"));
2146 }
2147
2148 static void
ct_barrier_release(dev_info_t * dip)2149 ct_barrier_release(dev_info_t *dip)
2150 {
2151 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2152 ASSERT(DEVI(dip)->devi_ct_count != -1);
2153 DEVI(dip)->devi_ct_count = -1;
2154 cv_broadcast(&(DEVI(dip)->devi_ct_cv));
2155 CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier"));
2156 }
2157
2158 static int
ct_barrier_held(dev_info_t * dip)2159 ct_barrier_held(dev_info_t *dip)
2160 {
2161 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2162 return (DEVI(dip)->devi_ct_count != -1);
2163 }
2164
2165 static int
ct_barrier_empty(dev_info_t * dip)2166 ct_barrier_empty(dev_info_t *dip)
2167 {
2168 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2169 ASSERT(DEVI(dip)->devi_ct_count != -1);
2170 return (DEVI(dip)->devi_ct_count == 0);
2171 }
2172
2173 static void
ct_barrier_wait_for_release(dev_info_t * dip)2174 ct_barrier_wait_for_release(dev_info_t *dip)
2175 {
2176 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2177 while (DEVI(dip)->devi_ct_count != -1)
2178 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
2179 }
2180
2181 static void
ct_barrier_decr(dev_info_t * dip)2182 ct_barrier_decr(dev_info_t *dip)
2183 {
2184 CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d",
2185 DEVI(dip)->devi_ct_count));
2186
2187 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2188 ASSERT(DEVI(dip)->devi_ct_count > 0);
2189
2190 DEVI(dip)->devi_ct_count--;
2191 if (DEVI(dip)->devi_ct_count == 0) {
2192 cv_broadcast(&DEVI(dip)->devi_ct_cv);
2193 CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast"));
2194 }
2195 }
2196
2197 static void
ct_barrier_incr(dev_info_t * dip)2198 ct_barrier_incr(dev_info_t *dip)
2199 {
2200 ASSERT(ct_barrier_held(dip));
2201 DEVI(dip)->devi_ct_count++;
2202 }
2203
2204 static int
ct_barrier_wait_for_empty(dev_info_t * dip,int secs)2205 ct_barrier_wait_for_empty(dev_info_t *dip, int secs)
2206 {
2207 clock_t abstime;
2208
2209 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2210
2211 abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000);
2212 while (DEVI(dip)->devi_ct_count) {
2213 if (cv_timedwait(&(DEVI(dip)->devi_ct_cv),
2214 &(DEVI(dip)->devi_ct_lock), abstime) == -1) {
2215 return (-1);
2216 }
2217 }
2218 return (0);
2219 }
2220