xref: /titanic_41/usr/src/uts/sun4v/io/mdeg.c (revision 0bc07884d74faf3093bc1ed2c66a29745a0d5604)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * MD Event Generator (MDEG) Module
29  */
30 
31 #include <sys/machsystm.h>
32 #include <sys/taskq.h>
33 #include <sys/disp.h>
34 #include <sys/cmn_err.h>
35 #include <sys/note.h>
36 
37 #include <sys/mdeg.h>
38 #include <sys/mach_descrip.h>
39 #include <sys/mdesc.h>
40 
41 /*
42  * A single client registration
43  */
44 typedef struct mdeg_clnt {
45 	boolean_t		valid;		/* structure is in active use */
46 	mdeg_node_match_t	*nmatch;	/* node match filter */
47 	mdeg_node_spec_t	*pspec;		/* parent match filter */
48 	mdeg_cb_t		cb;		/* the client callback */
49 	caddr_t			cb_arg;		/* argument to the callback */
50 	uint64_t		magic;		/* sanity checking magic */
51 	mdeg_handle_t		hdl;		/* handle assigned by MDEG */
52 } mdeg_clnt_t;
53 
54 /*
55  * Global MDEG data
56  *
57  * Locking Strategy:
58  *
59  *   mdeg.lock - lock used to synchronize system-wide MD updates. An
60  *	MD update must be treated as an atomic event. The lock is
61  *	taken when notification that a new MD is available and held
62  *	until all clients have been notified.
63  *
64  *   mdeg.rwlock - lock used to synchronize access to the table of
65  *	registered clients. The reader lock must be held when looking
66  *	up client information in the table. The writer lock must be
67  *	held when modifying any client information.
68  */
69 static struct mdeg {
70 	taskq_t 	*taskq;		/* for internal processing */
71 	boolean_t	enabled;	/* enable/disable taskq processing */
72 	kmutex_t	lock;		/* synchronize MD updates */
73 	md_t		*md_prev;	/* previous MD */
74 	md_t		*md_curr;	/* current MD */
75 	mdeg_clnt_t	*tbl;		/* table of registered clients */
76 	krwlock_t	rwlock;		/* client table lock */
77 	uint_t		maxclnts;	/* client table size */
78 	uint_t		nclnts;		/* current number of clients */
79 } mdeg;
80 
81 /*
82  * Debugging routines
83  */
84 #ifdef DEBUG
85 uint_t mdeg_debug = 0x0;
86 
87 static void mdeg_dump_clnt(mdeg_clnt_t *clnt);
88 static void mdeg_dump_table(void);
89 
90 #define	MDEG_DBG		if (mdeg_debug) printf
91 #define	MDEG_DUMP_CLNT		mdeg_dump_clnt
92 #define	MDEG_DUMP_TABLE		mdeg_dump_table
93 
94 #else /* DEBUG */
95 
96 #define	MDEG_DBG		_NOTE(CONSTCOND) if (0) printf
97 #define	MDEG_DUMP_CLNT
98 #define	MDEG_DUMP_TABLE()
99 
100 #endif /* DEBUG */
101 
102 /*
103  * Global constants
104  */
105 #define	MDEG_MAX_TASKQ_THR	512	/* maximum number of taskq threads */
106 #define	MDEG_MAX_CLNTS_INIT	64	/* initial client table size */
107 
108 #define	MDEG_MAGIC		0x4D4445475F48444Cull	/* 'MDEG_HDL' */
109 
110 /*
111  * A client handle is a 64 bit value with two pieces of
112  * information encoded in it. The upper 32 bits are the
113  * index into the table of a particular client structure.
114  * The lower 32 bits are a counter that is incremented
115  * each time a client structure is reused.
116  */
117 #define	MDEG_IDX_SHIFT			32
118 #define	MDEG_COUNT_MASK			0xfffffffful
119 
120 #define	MDEG_ALLOC_HDL(_idx, _count)	(((uint64_t)_idx << MDEG_IDX_SHIFT) | \
121 					((uint64_t)(_count + 1) &	      \
122 					MDEG_COUNT_MASK))
123 #define	MDEG_HDL2IDX(hdl)		(hdl >> MDEG_IDX_SHIFT)
124 #define	MDEG_HDL2COUNT(hdl)		(hdl & MDEG_COUNT_MASK)
125 
126 static const char trunc_str[] = " ... }";
127 
128 /*
129  * Utility routines
130  */
131 static mdeg_clnt_t *mdeg_alloc_clnt(void);
132 static void mdeg_notify_client(void *);
133 static mde_cookie_t mdeg_find_start_node(md_t *, mdeg_node_spec_t *);
134 static boolean_t mdeg_node_spec_match(md_t *, mde_cookie_t, mdeg_node_spec_t *);
135 static void mdeg_get_diff_results(md_diff_cookie_t, mdeg_result_t *);
136 
137 int
138 mdeg_init(void)
139 {
140 	int	tblsz;
141 
142 	/*
143 	 * Grab the current MD
144 	 */
145 	if ((mdeg.md_curr = md_get_handle()) == NULL) {
146 		cmn_err(CE_WARN, "unable to cache snapshot of MD");
147 		return (-1);
148 	}
149 
150 	/*
151 	 * Initialize table of registered clients
152 	 */
153 	mdeg.maxclnts = MDEG_MAX_CLNTS_INIT;
154 
155 	tblsz = mdeg.maxclnts * sizeof (mdeg_clnt_t);
156 	mdeg.tbl = kmem_zalloc(tblsz, KM_SLEEP);
157 
158 	rw_init(&mdeg.rwlock, NULL, RW_DRIVER, NULL);
159 
160 	mdeg.nclnts = 0;
161 
162 	/*
163 	 * Initialize global lock
164 	 */
165 	mutex_init(&mdeg.lock, NULL, MUTEX_DRIVER, NULL);
166 
167 	/*
168 	 * Initialize the task queue
169 	 */
170 	mdeg.taskq = taskq_create("mdeg_taskq", 1, minclsyspri, 1,
171 	    MDEG_MAX_TASKQ_THR, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
172 
173 	/* ready to begin handling clients */
174 	mdeg.enabled = B_TRUE;
175 
176 	return (0);
177 }
178 
179 void
180 mdeg_fini(void)
181 {
182 	/*
183 	 * Flip the enabled switch off to make sure that
184 	 * no events get dispatched while things are being
185 	 * torn down.
186 	 */
187 	mdeg.enabled = B_FALSE;
188 
189 	/* destroy the task queue */
190 	taskq_destroy(mdeg.taskq);
191 
192 	/*
193 	 * Deallocate the table of registered clients
194 	 */
195 	kmem_free(mdeg.tbl, mdeg.maxclnts * sizeof (mdeg_clnt_t));
196 	rw_destroy(&mdeg.rwlock);
197 
198 	/*
199 	 * Free up the cached MDs.
200 	 */
201 	if (mdeg.md_curr)
202 		(void) md_fini_handle(mdeg.md_curr);
203 
204 	if (mdeg.md_prev)
205 		(void) md_fini_handle(mdeg.md_prev);
206 
207 	mutex_destroy(&mdeg.lock);
208 }
209 
210 static mdeg_clnt_t *
211 mdeg_alloc_clnt(void)
212 {
213 	mdeg_clnt_t	*clnt;
214 	int		idx;
215 	mdeg_clnt_t	*newtbl;
216 	uint_t		newmaxclnts;
217 	uint_t		newtblsz;
218 	uint_t		oldtblsz;
219 
220 	ASSERT(RW_WRITE_HELD(&mdeg.rwlock));
221 
222 	/* search for an unused slot in the table */
223 	for (idx = 0; idx < mdeg.maxclnts; idx++) {
224 		clnt = &mdeg.tbl[idx];
225 		if (!clnt->valid) {
226 			break;
227 		}
228 	}
229 
230 	/* found any empty slot */
231 	if (idx != mdeg.maxclnts) {
232 		goto found;
233 	}
234 
235 	/*
236 	 * There was no free space in the table. Grow
237 	 * the table to double its current size.
238 	 */
239 
240 	MDEG_DBG("client table full:\n");
241 	MDEG_DUMP_TABLE();
242 
243 	newmaxclnts = mdeg.maxclnts * 2;
244 	newtblsz = newmaxclnts * sizeof (mdeg_clnt_t);
245 
246 	newtbl = kmem_zalloc(newtblsz, KM_SLEEP);
247 
248 	/* copy old table data to the new table */
249 	oldtblsz = mdeg.maxclnts * sizeof (mdeg_clnt_t);
250 	bcopy(mdeg.tbl, newtbl, oldtblsz);
251 
252 	/*
253 	 * Since the old table was full, the first free entry
254 	 * will be just past the end of the old table data in
255 	 * the new table.
256 	 */
257 	clnt = &newtbl[mdeg.maxclnts];
258 
259 	/* clean up the old table */
260 	kmem_free(mdeg.tbl, oldtblsz);
261 	mdeg.tbl = newtbl;
262 	mdeg.maxclnts = newmaxclnts;
263 
264 found:
265 	ASSERT(clnt->valid == 0);
266 
267 	clnt->hdl = MDEG_ALLOC_HDL(idx, MDEG_HDL2COUNT(clnt->hdl));
268 
269 	return (clnt);
270 }
271 
272 static mdeg_clnt_t *
273 mdeg_get_client(mdeg_handle_t hdl)
274 {
275 	int		idx;
276 	mdeg_clnt_t	*clnt;
277 
278 	idx = MDEG_HDL2IDX(hdl);
279 
280 	/* check if index is out of bounds */
281 	if ((idx < 0) || (idx >= mdeg.maxclnts)) {
282 		MDEG_DBG("mdeg_get_client: index out of bounds\n");
283 		return (NULL);
284 	}
285 
286 	clnt = &mdeg.tbl[idx];
287 
288 	/* check for a valid client */
289 	if (!clnt->valid) {
290 		MDEG_DBG("mdeg_get_client: client is not valid\n");
291 		return (NULL);
292 	}
293 
294 	/* make sure the handle is an exact match */
295 	if (clnt->hdl != hdl) {
296 		MDEG_DBG("mdeg_get_client: bad handle\n");
297 		return (NULL);
298 	}
299 
300 	if (clnt->magic != MDEG_MAGIC) {
301 		MDEG_DBG("mdeg_get_client: bad magic\n");
302 		return (NULL);
303 	}
304 
305 	return (clnt);
306 }
307 
308 /*
309  * Send a notification to a client immediately after it registers.
310  * The result_t is a list of all the nodes that match their specified
311  * nodes of interest, all returned on the added list. This serves
312  * as a base of reference to the client. All future MD updates are
313  * relative to this list.
314  */
315 static int
316 mdeg_notify_client_reg(mdeg_clnt_t *clnt)
317 {
318 	md_t			*mdp = NULL;
319 	mde_str_cookie_t	nname;
320 	mde_str_cookie_t	aname;
321 	mde_cookie_t		startnode;
322 	int			nnodes;
323 	int			nodechk;
324 	mde_cookie_t		*listp = NULL;
325 	mdeg_result_t		*mdeg_res = NULL;
326 	int			rv = MDEG_SUCCESS;
327 
328 	mutex_enter(&mdeg.lock);
329 
330 	/*
331 	 * Handle the special case where the node specification
332 	 * is NULL. In this case, call the client callback without
333 	 * any results. All processing is left to the client.
334 	 */
335 	if (clnt->pspec == NULL) {
336 		/* call the client callback */
337 		(*clnt->cb)(clnt->cb_arg, NULL);
338 		goto done;
339 	}
340 
341 	if ((mdp = md_get_handle()) == NULL) {
342 		cmn_err(CE_WARN, "unable to retrieve current MD");
343 		rv = MDEG_FAILURE;
344 		goto done;
345 	}
346 
347 	startnode = mdeg_find_start_node(mdp, clnt->pspec);
348 	if (startnode == MDE_INVAL_ELEM_COOKIE) {
349 		/* not much we can do */
350 		cmn_err(CE_WARN, "unable to match node specifier");
351 		rv = MDEG_FAILURE;
352 		goto done;
353 	}
354 
355 	/*
356 	 * Use zalloc to provide correct default values for the
357 	 * unused removed, match_prev, and match_curr lists.
358 	 */
359 	mdeg_res = kmem_zalloc(sizeof (mdeg_result_t), KM_SLEEP);
360 
361 	nname = md_find_name(mdp, clnt->nmatch->namep);
362 	aname = md_find_name(mdp, "fwd");
363 
364 	nnodes = md_scan_dag(mdp, startnode, nname, aname, NULL);
365 
366 	if (nnodes == 0) {
367 		MDEG_DBG("mdeg_notify_client_reg: no nodes of interest\n");
368 		rv = MDEG_SUCCESS;
369 		goto done;
370 	} else if (nnodes == -1) {
371 		MDEG_DBG("error scanning DAG\n");
372 		rv = MDEG_FAILURE;
373 		goto done;
374 	}
375 
376 	MDEG_DBG("mdeg_notify_client_reg: %d node%s of interest\n",
377 	    nnodes, (nnodes == 1) ? "" : "s");
378 
379 	/* get the list of nodes of interest */
380 	listp = kmem_alloc(sizeof (mde_cookie_t) * nnodes, KM_SLEEP);
381 	nodechk = md_scan_dag(mdp, startnode, nname, aname, listp);
382 
383 	ASSERT(nodechk == nnodes);
384 
385 	mdeg_res->added.mdp = mdp;
386 	mdeg_res->added.mdep = listp;
387 	mdeg_res->added.nelem = nnodes;
388 
389 	/* call the client callback */
390 	(*clnt->cb)(clnt->cb_arg, mdeg_res);
391 
392 done:
393 	mutex_exit(&mdeg.lock);
394 
395 	if (mdp)
396 		(void) md_fini_handle(mdp);
397 
398 	if (listp)
399 		kmem_free(listp, sizeof (mde_cookie_t) * nnodes);
400 
401 	if (mdeg_res)
402 		kmem_free(mdeg_res, sizeof (mdeg_result_t));
403 
404 	return (rv);
405 }
406 
407 /*
408  * Register to receive an event notification when the system
409  * machine description is updated.
410  *
411  * Passing NULL for the node specification parameter is valid
412  * as long as the match specification is also NULL. In this
413  * case, the client will receive a notification when the MD
414  * has been updated, but the callback will not include any
415  * information. The client is then responsible for obtaining
416  * its own copy of the system MD and performing any processing
417  * manually.
418  */
419 int
420 mdeg_register(mdeg_node_spec_t *pspecp, mdeg_node_match_t *nmatchp,
421     mdeg_cb_t cb, void *cb_arg, mdeg_handle_t *hdlp)
422 {
423 	mdeg_clnt_t	*clnt;
424 
425 	/* should never be called from a callback */
426 	ASSERT(!taskq_member(mdeg.taskq, curthread));
427 
428 	/* node spec and node match must both be valid, or both NULL */
429 	if (((pspecp != NULL) && (nmatchp == NULL)) ||
430 	    ((pspecp == NULL) && (nmatchp != NULL))) {
431 		MDEG_DBG("mdeg_register: invalid parameters\n");
432 		return (MDEG_FAILURE);
433 	}
434 
435 	rw_enter(&mdeg.rwlock, RW_WRITER);
436 
437 	clnt = mdeg_alloc_clnt();
438 
439 	ASSERT(clnt);
440 
441 	/*
442 	 * Fill in the rest of the data
443 	 */
444 	clnt->nmatch = nmatchp;
445 	clnt->pspec = pspecp;
446 	clnt->cb = cb;
447 	clnt->cb_arg = cb_arg;
448 	clnt->magic = MDEG_MAGIC;
449 
450 	/* do this last */
451 	clnt->valid = B_TRUE;
452 
453 	MDEG_DBG("client registered (0x%lx):\n", clnt->hdl);
454 	MDEG_DUMP_CLNT(clnt);
455 
456 	mdeg.nclnts++;
457 
458 	if (mdeg_notify_client_reg(clnt) != MDEG_SUCCESS) {
459 		bzero(clnt, sizeof (mdeg_clnt_t));
460 		rw_exit(&mdeg.rwlock);
461 		return (MDEG_FAILURE);
462 	}
463 
464 	rw_exit(&mdeg.rwlock);
465 
466 	*hdlp = clnt->hdl;
467 
468 	return (MDEG_SUCCESS);
469 }
470 
471 int
472 mdeg_unregister(mdeg_handle_t hdl)
473 {
474 	mdeg_clnt_t	*clnt;
475 	mdeg_handle_t	mdh;
476 
477 	/* should never be called from a callback */
478 	ASSERT(!taskq_member(mdeg.taskq, curthread));
479 
480 	rw_enter(&mdeg.rwlock, RW_WRITER);
481 
482 	/* lookup the client */
483 	if ((clnt = mdeg_get_client(hdl)) == NULL) {
484 		rw_exit(&mdeg.rwlock);
485 		return (MDEG_FAILURE);
486 	}
487 
488 	MDEG_DBG("client unregistered (0x%lx):\n", hdl);
489 	MDEG_DUMP_CLNT(clnt);
490 
491 	/* save the handle to prevent reuse */
492 	mdh = clnt->hdl;
493 	bzero(clnt, sizeof (mdeg_clnt_t));
494 
495 	clnt->hdl = mdh;
496 
497 	mdeg.nclnts--;
498 
499 	rw_exit(&mdeg.rwlock);
500 
501 	return (MDEG_SUCCESS);
502 }
503 
504 /*
505  * Simple algorithm for now, grab the global lock and let all
506  * the clients update themselves in parallel. There is a lot of
507  * room for improvement here. We could eliminate some scans of
508  * the DAG by incrementally scanning at lower levels of the DAG
509  * rather than having each client start its own scan from the root.
510  */
511 void
512 mdeg_notify_clients(void)
513 {
514 	md_t		*md_new;
515 	mdeg_clnt_t	*clnt;
516 	int		idx;
517 	int		nclnt;
518 
519 	rw_enter(&mdeg.rwlock, RW_READER);
520 	mutex_enter(&mdeg.lock);
521 
522 	/*
523 	 * Rotate the MDs
524 	 */
525 	if ((md_new = md_get_handle()) == NULL) {
526 		cmn_err(CE_WARN, "unable to retrieve new MD");
527 		goto done;
528 	}
529 
530 	if (mdeg.md_prev) {
531 		(void) md_fini_handle(mdeg.md_prev);
532 	}
533 
534 	mdeg.md_prev = mdeg.md_curr;
535 	mdeg.md_curr = md_new;
536 
537 	if (mdeg.nclnts == 0) {
538 		MDEG_DBG("mdeg_notify_clients: no clients registered\n");
539 		goto done;
540 	}
541 
542 	/* dispatch the update notification to all clients */
543 	for (idx = 0, nclnt = 0; idx < mdeg.maxclnts; idx++) {
544 		clnt = &mdeg.tbl[idx];
545 
546 		if (!clnt->valid)
547 			continue;
548 
549 		MDEG_DBG("notifying client 0x%lx (%d/%d)\n", clnt->hdl,
550 		    ++nclnt, mdeg.nclnts);
551 
552 		(void) taskq_dispatch(mdeg.taskq, mdeg_notify_client,
553 		    (void *)clnt, TQ_SLEEP);
554 	}
555 
556 	/*
557 	 * Wait for all mdeg_notify_client notifications to
558 	 * finish while we are still holding mdeg.rwlock.
559 	 */
560 	taskq_wait(mdeg.taskq);
561 
562 done:
563 	mutex_exit(&mdeg.lock);
564 	rw_exit(&mdeg.rwlock);
565 }
566 
567 static void
568 mdeg_notify_client(void *arg)
569 {
570 	mdeg_clnt_t		*clnt = (mdeg_clnt_t *)arg;
571 	md_diff_cookie_t	mdd = MD_INVAL_DIFF_COOKIE;
572 	mdeg_result_t		mdeg_res;
573 	mde_cookie_t		md_prev_start;
574 	mde_cookie_t		md_curr_start;
575 
576 	/*
577 	 * mdeg.rwlock must be held as a reader while this function
578 	 * executes. However, we do not need to acquire the lock as a
579 	 * reader here because it is held as a reader by the thread
580 	 * executing mdeg_notify_clients which triggers the execution
581 	 * of this function from a taskq. Since mdeg_notify_clients
582 	 * holds the lock as a reader until the taskq callbacks have
583 	 * completed, it will be held for the life of this function call.
584 	 * Furthermore, we must not attempt to acquire the lock as a
585 	 * reader with rw_enter because if there is a pending writer,
586 	 * we will block, creating a circular deadlock with this function,
587 	 * the writer, and mdeg_notify_clients. Since we do not need
588 	 * to acquire the lock, just assert that it is held.
589 	 */
590 	ASSERT(RW_READ_HELD(&mdeg.rwlock));
591 
592 	if (!mdeg.enabled) {
593 		/* trying to shutdown */
594 		MDEG_DBG("mdeg_notify_client: mdeg disabled, aborting\n");
595 		goto cleanup;
596 	}
597 
598 	/*
599 	 * Handle the special case where the node specification
600 	 * is NULL. In this case, call the client callback without
601 	 * any results. All processing is left to the client.
602 	 */
603 	if (clnt->pspec == NULL) {
604 		/* call the client callback */
605 		(*clnt->cb)(clnt->cb_arg, NULL);
606 
607 		MDEG_DBG("MDEG client callback done\n");
608 		goto cleanup;
609 	}
610 
611 	/* find our start nodes */
612 	md_prev_start = mdeg_find_start_node(mdeg.md_prev, clnt->pspec);
613 	if (md_prev_start == MDE_INVAL_ELEM_COOKIE) {
614 		goto cleanup;
615 	}
616 
617 	md_curr_start = mdeg_find_start_node(mdeg.md_curr, clnt->pspec);
618 	if (md_curr_start == MDE_INVAL_ELEM_COOKIE) {
619 		goto cleanup;
620 	}
621 
622 	/* diff the MDs */
623 	mdd = md_diff_init(mdeg.md_prev, md_prev_start, mdeg.md_curr,
624 	    md_curr_start, clnt->nmatch->namep, clnt->nmatch->matchp);
625 
626 	if (mdd == MD_INVAL_DIFF_COOKIE) {
627 		MDEG_DBG("unable to diff MDs\n");
628 		goto cleanup;
629 	}
630 
631 	/*
632 	 * Cache the results of the diff
633 	 */
634 	mdeg_get_diff_results(mdd, &mdeg_res);
635 
636 	/* call the client callback */
637 	(*clnt->cb)(clnt->cb_arg, &mdeg_res);
638 
639 	MDEG_DBG("MDEG client callback done\n");
640 
641 cleanup:
642 	if (mdd != MD_INVAL_DIFF_COOKIE)
643 		(void) md_diff_fini(mdd);
644 }
645 
646 static mde_cookie_t
647 mdeg_find_start_node(md_t *md, mdeg_node_spec_t *nspec)
648 {
649 	mde_cookie_t		*nodesp;
650 	mde_str_cookie_t	nname;
651 	mde_str_cookie_t	aname;
652 	int			nnodes;
653 	int			idx;
654 
655 	if ((md == NULL) || (nspec == NULL))
656 		return (MDE_INVAL_ELEM_COOKIE);
657 
658 	nname = md_find_name(md, nspec->namep);
659 	aname = md_find_name(md, "fwd");
660 
661 	nnodes = md_scan_dag(md, NULL, nname, aname, NULL);
662 	if (nnodes == 0)
663 		return (MDE_INVAL_ELEM_COOKIE);
664 
665 	nodesp = kmem_alloc(sizeof (mde_cookie_t) * nnodes, KM_SLEEP);
666 
667 	(void) md_scan_dag(md, NULL, nname, aname, nodesp);
668 
669 	for (idx = 0; idx < nnodes; idx++) {
670 
671 		if (mdeg_node_spec_match(md, nodesp[idx], nspec)) {
672 			mde_cookie_t res = nodesp[idx];
673 
674 			kmem_free(nodesp, sizeof (mde_cookie_t) * nnodes);
675 			return (res);
676 		}
677 	}
678 
679 	kmem_free(nodesp, sizeof (mde_cookie_t) * nnodes);
680 	return (MDE_INVAL_ELEM_COOKIE);
681 }
682 
683 static boolean_t
684 mdeg_node_spec_match(md_t *md, mde_cookie_t node, mdeg_node_spec_t *nspec)
685 {
686 	mdeg_prop_spec_t	*prop;
687 
688 	ASSERT(md && nspec);
689 	ASSERT(node != MDE_INVAL_ELEM_COOKIE);
690 
691 	prop = nspec->specp;
692 
693 	while (prop->type != MDET_LIST_END) {
694 
695 		switch (prop->type) {
696 		case MDET_PROP_VAL: {
697 			uint64_t val;
698 
699 			if (md_get_prop_val(md, node, prop->namep, &val) != 0)
700 				return (B_FALSE);
701 
702 			if (prop->ps_val != val)
703 				return (B_FALSE);
704 
705 			break;
706 		}
707 		case MDET_PROP_STR: {
708 			char	*str;
709 
710 			if (md_get_prop_str(md, node, prop->namep, &str) != 0)
711 				return (B_FALSE);
712 
713 			if (strcmp(prop->ps_str, str) != 0)
714 				return (B_FALSE);
715 
716 			break;
717 		}
718 
719 		default:
720 			return (B_FALSE);
721 		}
722 
723 		prop++;
724 	}
725 
726 	return (B_TRUE);
727 }
728 
729 static void
730 mdeg_get_diff_results(md_diff_cookie_t mdd, mdeg_result_t *res)
731 {
732 	/*
733 	 * Cache added nodes.
734 	 */
735 	res->added.mdp = mdeg.md_curr;
736 	res->added.nelem = md_diff_added(mdd, &(res->added.mdep));
737 
738 	if (res->added.nelem == -1) {
739 		bzero(&(res->added), sizeof (mdeg_diff_t));
740 	}
741 
742 	/*
743 	 * Cache removed nodes.
744 	 */
745 	res->removed.mdp = mdeg.md_prev;
746 	res->removed.nelem = md_diff_removed(mdd, &(res->removed.mdep));
747 
748 	if (res->removed.nelem == -1) {
749 		bzero(&(res->removed), sizeof (mdeg_diff_t));
750 	}
751 
752 	/*
753 	 * Cache matching node pairs.
754 	 */
755 	res->match_curr.mdp = mdeg.md_curr;
756 	res->match_prev.mdp = mdeg.md_prev;
757 	res->match_curr.nelem = md_diff_matched(mdd, &(res->match_prev.mdep),
758 	    &(res->match_curr.mdep));
759 	res->match_prev.nelem = res->match_curr.nelem;
760 
761 	if (res->match_prev.nelem == -1) {
762 		bzero(&(res->match_prev), sizeof (mdeg_diff_t));
763 		bzero(&(res->match_curr), sizeof (mdeg_diff_t));
764 	}
765 }
766 
767 #ifdef DEBUG
768 /*
769  * Generate a string that represents the node specifier
770  * structure. Clamp the string length if the specifier
771  * structure contains too much information.
772  *
773  *	General form:
774  *
775  *		<nodename>:{<propname>=<propval>,...}
776  *	e.g.
777  *		vdevice:{name=vsw,reg=0x0}
778  */
779 static void
780 mdeg_spec_str(mdeg_node_spec_t *spec, char *buf, int len)
781 {
782 	mdeg_prop_spec_t	*prop;
783 	int			offset;
784 	boolean_t		first = B_TRUE;
785 	char			*end = buf + len;
786 
787 	offset = snprintf(buf, len, "%s:{", spec->namep);
788 
789 	buf += offset;
790 	len -= offset;
791 	if (len <= 0)
792 		goto trunc;
793 
794 	prop = spec->specp;
795 
796 	while (prop->type != MDET_LIST_END) {
797 
798 		switch (prop->type) {
799 		case MDET_PROP_VAL:
800 			offset = snprintf(buf, len, "%s%s=0x%lx",
801 			    (first) ? "" : ",", prop->namep, prop->ps_val);
802 			buf += offset;
803 			len -= offset;
804 			if (len <= 0)
805 				goto trunc;
806 			break;
807 
808 		case MDET_PROP_STR:
809 			offset = snprintf(buf, len, "%s%s=%s",
810 			    (first) ? "" : ",", prop->namep, prop->ps_str);
811 			buf += offset;
812 			len -= offset;
813 			if (len <= 0)
814 				goto trunc;
815 			break;
816 
817 		default:
818 			(void) snprintf(buf, len, "}");
819 			return;
820 		}
821 
822 		if (first)
823 			first = B_FALSE;
824 		prop++;
825 	}
826 
827 	(void) snprintf(buf, len, "}");
828 	return;
829 
830 trunc:
831 	/* string too long, truncate it */
832 	buf = end - (strlen(trunc_str) + 1);
833 	(void) sprintf(buf, trunc_str);
834 }
835 
836 /*
837  * Generate a string that represents the match structure.
838  * Clamp the string length if the match structure contains
839  * too much information.
840  *
841  *	General form:
842  *
843  *		<nodename>:{<propname>,...}
844  *	e.g.
845  *		nmatch=vport:{reg}
846  */
847 static void
848 mdeg_match_str(mdeg_node_match_t *match, char *buf, int len)
849 {
850 	md_prop_match_t	*prop;
851 	int		offset;
852 	boolean_t	first = B_TRUE;
853 	char		*end = buf + len;
854 
855 	offset = snprintf(buf, len, "%s:{", match->namep);
856 
857 	buf += offset;
858 	len -= offset;
859 	if (len <= 0)
860 		goto trunc;
861 
862 	prop = match->matchp;
863 
864 	while (prop->type != MDET_LIST_END) {
865 		offset = snprintf(buf, len, "%s%s", (first) ? "" : ",",
866 		    prop->namep);
867 		buf += offset;
868 		len -= offset;
869 		if (len <= 0)
870 			goto trunc;
871 
872 		if (first)
873 			first = B_FALSE;
874 		prop++;
875 	}
876 
877 	(void) snprintf(buf, len, "}");
878 	return;
879 
880 trunc:
881 	/* string too long, truncate it */
882 	buf = end - (strlen(trunc_str) + 1);
883 	(void) sprintf(buf, trunc_str);
884 }
885 
886 #define	MAX_FIELD_STR	80
887 
888 static void
889 mdeg_dump_clnt(mdeg_clnt_t *clnt)
890 {
891 	char	str[MAX_FIELD_STR] = "";
892 
893 	if (!clnt->valid) {
894 		MDEG_DBG("  valid=B_FALSE\n");
895 		return;
896 	}
897 
898 	if (clnt->pspec) {
899 		mdeg_spec_str(clnt->pspec, str, MAX_FIELD_STR);
900 		MDEG_DBG("  pspecp=%s\n", str);
901 	}
902 
903 	if (clnt->nmatch) {
904 		mdeg_match_str(clnt->nmatch, str, MAX_FIELD_STR);
905 		MDEG_DBG("  nmatch=%s\n", str);
906 	}
907 }
908 
909 static void
910 mdeg_dump_table(void)
911 {
912 	int		idx;
913 	mdeg_clnt_t	*clnt;
914 
915 	for (idx = 0; idx < mdeg.maxclnts; idx++) {
916 		clnt = &(mdeg.tbl[idx]);
917 
918 		MDEG_DBG("client %d (0x%lx):\n", idx, clnt->hdl);
919 		mdeg_dump_clnt(clnt);
920 	}
921 }
922 #endif /* DEBUG */
923