xref: /illumos-gate/usr/src/cmd/dlmgmtd/dlmgmt_db.c (revision 6faf52448e142b151fa3deade474be359e7c8698)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2023 Oxide Computer Company
25  */
26 
27 #include <assert.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <syslog.h>
36 #include <zone.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <stropts.h>
40 #include <sys/conf.h>
41 #include <pthread.h>
42 #include <unistd.h>
43 #include <wait.h>
44 #include <libcontract.h>
45 #include <libcontract_priv.h>
46 #include <sys/contract/process.h>
47 #include "dlmgmt_impl.h"
48 
49 typedef enum dlmgmt_db_op {
50 	DLMGMT_DB_OP_WRITE,
51 	DLMGMT_DB_OP_DELETE,
52 	DLMGMT_DB_OP_READ
53 } dlmgmt_db_op_t;
54 
55 typedef struct dlmgmt_db_req_s {
56 	struct dlmgmt_db_req_s	*ls_next;
57 	dlmgmt_db_op_t		ls_op;
58 	char			ls_link[MAXLINKNAMELEN];
59 	datalink_id_t		ls_linkid;
60 	zoneid_t		ls_zoneid;
61 	uint32_t		ls_flags;	/* Either DLMGMT_ACTIVE or   */
62 						/* DLMGMT_PERSIST, not both. */
63 } dlmgmt_db_req_t;
64 
65 /*
66  * List of pending db updates (e.g., because of a read-only filesystem).
67  */
68 static dlmgmt_db_req_t	*dlmgmt_db_req_head = NULL;
69 static dlmgmt_db_req_t	*dlmgmt_db_req_tail = NULL;
70 
71 /*
72  * rewrite_needed is set to B_TRUE by process_link_line() if it encounters a
73  * line with an old format.  This will cause the file being read to be
74  * re-written with the current format.
75  */
76 static boolean_t	rewrite_needed;
77 
78 static int		dlmgmt_db_update(dlmgmt_db_op_t, const char *,
79 			    dlmgmt_link_t *, uint32_t);
80 static int		dlmgmt_process_db_req(dlmgmt_db_req_t *);
81 static int		dlmgmt_process_db_onereq(dlmgmt_db_req_t *, boolean_t);
82 static void		*dlmgmt_db_update_thread(void *);
83 static boolean_t	process_link_line(char *, dlmgmt_link_t *);
84 static int		process_db_write(dlmgmt_db_req_t *, FILE *, FILE *);
85 static int		process_db_read(dlmgmt_db_req_t *, FILE *);
86 static void		generate_link_line(dlmgmt_link_t *, boolean_t, char *);
87 
88 #define	BUFLEN(lim, ptr)	(((lim) > (ptr)) ? ((lim) - (ptr)) : 0)
89 #define	MAXLINELEN		1024
90 
91 typedef void db_walk_func_t(dlmgmt_link_t *);
92 
93 /*
94  * Translator functions to go from dladm_datatype_t to character strings.
95  * Each function takes a pointer to a buffer, the size of the buffer,
96  * the name of the attribute, and the value to be written.  The functions
97  * return the number of bytes written to the buffer.  If the buffer is not big
98  * enough to hold the string representing the value, then nothing is written
99  * and 0 is returned.
100  */
101 typedef size_t write_func_t(char *, size_t, char *, void *);
102 
103 /*
104  * Translator functions to read from a NULL terminated string buffer into
105  * something of the given DLADM_TYPE_*.  The functions each return the number
106  * of bytes read from the string buffer.  If there is an error reading data
107  * from the buffer, then 0 is returned.  It is the caller's responsibility
108  * to free the data allocated by these functions.
109  */
110 typedef size_t read_func_t(char *, void **);
111 
112 typedef struct translator_s {
113 	const char	*type_name;
114 	write_func_t	*write_func;
115 	read_func_t	*read_func;
116 } translator_t;
117 
118 /*
119  * Translator functions, defined later but declared here so that
120  * the translator table can be defined.
121  */
122 static write_func_t	write_str, write_boolean, write_uint64;
123 static read_func_t	read_str, read_boolean, read_int64;
124 
125 /*
126  * Translator table, indexed by dladm_datatype_t.
127  */
128 static translator_t translators[] = {
129 	{ "string",	write_str,	read_str	},
130 	{ "boolean",	write_boolean,	read_boolean	},
131 	{ "int",	write_uint64,	read_int64	}
132 };
133 
134 static size_t ntranslators = sizeof (translators) / sizeof (translator_t);
135 
136 #define	LINK_PROPERTY_DELIMINATOR	";"
137 #define	LINK_PROPERTY_TYPE_VALUE_SEP	","
138 #define	BASE_PROPERTY_LENGTH(t, n) (strlen(translators[(t)].type_name) +\
139 				    strlen(LINK_PROPERTY_TYPE_VALUE_SEP) +\
140 				    strlen(LINK_PROPERTY_DELIMINATOR) +\
141 				    strlen((n)))
142 #define	GENERATE_PROPERTY_STRING(buf, length, conv, name, type, val) \
143 	    (snprintf((buf), (length), "%s=%s%s" conv "%s", (name), \
144 	    translators[(type)].type_name, \
145 	    LINK_PROPERTY_TYPE_VALUE_SEP, (val), LINK_PROPERTY_DELIMINATOR))
146 
147 /*
148  * Name of the cache file to keep the active <link name, linkid> mapping
149  */
150 char	cachefile[MAXPATHLEN];
151 
152 #define	DLMGMT_PERSISTENT_DB_PATH	"/etc/dladm/datalink.conf"
153 #define	DLMGMT_MAKE_FILE_DB_PATH(buffer, persistent)	\
154 	(void) snprintf((buffer), MAXPATHLEN, "%s", \
155 	(persistent) ? DLMGMT_PERSISTENT_DB_PATH : cachefile);
156 
157 typedef struct zopen_arg {
158 	const char	*zopen_modestr;
159 	int		*zopen_pipe;
160 	int		zopen_fd;
161 } zopen_arg_t;
162 
163 typedef struct zrename_arg {
164 	const char	*zrename_newname;
165 } zrename_arg_t;
166 
167 typedef union zfoparg {
168 	zopen_arg_t	zfop_openarg;
169 	zrename_arg_t	zfop_renamearg;
170 } zfoparg_t;
171 
172 typedef struct zfcbarg {
173 	boolean_t	zfarg_inglobalzone; /* is callback in global zone? */
174 	zoneid_t	zfarg_finglobalzone; /* is file in global zone? */
175 	const char	*zfarg_filename;
176 	zfoparg_t	*zfarg_oparg;
177 } zfarg_t;
178 #define	zfarg_openarg	zfarg_oparg->zfop_openarg
179 #define	zfarg_renamearg	zfarg_oparg->zfop_renamearg
180 
181 /* zone file callback */
182 typedef int zfcb_t(zfarg_t *);
183 
184 /*
185  * Execute an operation on filename relative to zoneid's zone root.  If the
186  * file is in the global zone, then the zfcb() callback will simply be called
187  * directly.  If the file is in a non-global zone, then zfcb() will be called
188  * both from the global zone's context, and from the non-global zone's context
189  * (from a fork()'ed child that has entered the non-global zone).  This is
190  * done to allow the callback to communicate with itself if needed (e.g. to
191  * pass back the file descriptor of an opened file).
192  */
193 static int
194 dlmgmt_zfop(const char *filename, zoneid_t zoneid, zfcb_t *zfcb,
195     zfoparg_t *zfoparg)
196 {
197 	int		ctfd;
198 	int		err;
199 	pid_t		childpid;
200 	siginfo_t	info;
201 	zfarg_t		zfarg;
202 	ctid_t		ct;
203 
204 	if (zoneid != GLOBAL_ZONEID) {
205 		/*
206 		 * We need to access a file that isn't in the global zone.
207 		 * Accessing non-global zone files from the global zone is
208 		 * unsafe (due to symlink attacks), we'll need to fork a child
209 		 * that enters the zone in question and executes the callback
210 		 * that will operate on the file.
211 		 *
212 		 * Before we proceed with this zone tango, we need to create a
213 		 * new process contract for the child, as required by
214 		 * zone_enter().
215 		 */
216 		errno = 0;
217 		ctfd = open64("/system/contract/process/template", O_RDWR);
218 		if (ctfd == -1)
219 			return (errno);
220 		if ((err = ct_tmpl_set_critical(ctfd, 0)) != 0 ||
221 		    (err = ct_tmpl_set_informative(ctfd, 0)) != 0 ||
222 		    (err = ct_pr_tmpl_set_fatal(ctfd, CT_PR_EV_HWERR)) != 0 ||
223 		    (err = ct_pr_tmpl_set_param(ctfd, CT_PR_PGRPONLY)) != 0 ||
224 		    (err = ct_tmpl_activate(ctfd)) != 0) {
225 			(void) close(ctfd);
226 			return (err);
227 		}
228 		childpid = fork();
229 		switch (childpid) {
230 		case -1:
231 			(void) ct_tmpl_clear(ctfd);
232 			(void) close(ctfd);
233 			return (err);
234 		case 0:
235 			(void) ct_tmpl_clear(ctfd);
236 			(void) close(ctfd);
237 			/*
238 			 * Elevate our privileges as zone_enter() requires all
239 			 * privileges.
240 			 */
241 			if ((err = dlmgmt_elevate_privileges()) != 0)
242 				_exit(err);
243 			if (zone_enter(zoneid) == -1)
244 				_exit(errno);
245 			if ((err = dlmgmt_drop_privileges()) != 0)
246 				_exit(err);
247 			break;
248 		default:
249 			if (contract_latest(&ct) == -1)
250 				ct = -1;
251 			(void) ct_tmpl_clear(ctfd);
252 			(void) close(ctfd);
253 			if (waitid(P_PID, childpid, &info, WEXITED) == -1) {
254 				(void) contract_abandon_id(ct);
255 				return (errno);
256 			}
257 			(void) contract_abandon_id(ct);
258 			if (info.si_status != 0)
259 				return (info.si_status);
260 		}
261 	}
262 
263 	zfarg.zfarg_inglobalzone = (zoneid == GLOBAL_ZONEID || childpid != 0);
264 	zfarg.zfarg_finglobalzone = (zoneid == GLOBAL_ZONEID);
265 	zfarg.zfarg_filename = filename;
266 	zfarg.zfarg_oparg = zfoparg;
267 	err = zfcb(&zfarg);
268 	if (!zfarg.zfarg_inglobalzone)
269 		_exit(err);
270 	return (err);
271 }
272 
273 static int
274 dlmgmt_zopen_cb(zfarg_t *zfarg)
275 {
276 	struct strrecvfd recvfd;
277 	boolean_t	newfile = B_FALSE;
278 	boolean_t	inglobalzone = zfarg->zfarg_inglobalzone;
279 	zoneid_t	finglobalzone = zfarg->zfarg_finglobalzone;
280 	const char	*filename = zfarg->zfarg_filename;
281 	const char	*modestr = zfarg->zfarg_openarg.zopen_modestr;
282 	int		*p = zfarg->zfarg_openarg.zopen_pipe;
283 	struct stat	statbuf;
284 	int		oflags;
285 	mode_t		mode;
286 	int		fd = -1;
287 	int		err;
288 
289 	/* We only ever open a file for reading or writing, not both. */
290 	oflags = (modestr[0] == 'r') ? O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC;
291 	mode = (modestr[0] == 'r') ? 0 : S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
292 
293 	/* Open the file if we're in the same zone as the file. */
294 	if (inglobalzone == finglobalzone) {
295 		/*
296 		 * First determine if we will be creating the file as part of
297 		 * opening it.  If so, then we'll need to ensure that it has
298 		 * the proper ownership after having opened it.
299 		 */
300 		if (oflags & O_CREAT) {
301 			if (stat(filename, &statbuf) == -1) {
302 				if (errno == ENOENT)
303 					newfile = B_TRUE;
304 				else
305 					return (errno);
306 			}
307 		}
308 		if ((fd = open(filename, oflags, mode)) == -1)
309 			return (errno);
310 		if (newfile) {
311 			if (chown(filename, UID_DLADM, GID_NETADM) == -1) {
312 				err = errno;
313 				(void) close(fd);
314 				return (err);
315 			}
316 		}
317 	}
318 
319 	/*
320 	 * If we're not in the global zone, send the file-descriptor back to
321 	 * our parent in the global zone.
322 	 */
323 	if (!inglobalzone) {
324 		assert(!finglobalzone);
325 		assert(fd != -1);
326 		return (ioctl(p[1], I_SENDFD, fd) == -1 ? errno : 0);
327 	}
328 
329 	/*
330 	 * At this point, we know we're in the global zone.  If the file was
331 	 * in a non-global zone, receive the file-descriptor from our child in
332 	 * the non-global zone.
333 	 */
334 	if (!finglobalzone) {
335 		if (ioctl(p[0], I_RECVFD, &recvfd) == -1)
336 			return (errno);
337 		fd = recvfd.fd;
338 	}
339 
340 	zfarg->zfarg_openarg.zopen_fd = fd;
341 	return (0);
342 }
343 
344 static int
345 dlmgmt_zunlink_cb(zfarg_t *zfarg)
346 {
347 	if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone)
348 		return (0);
349 	return (unlink(zfarg->zfarg_filename) == 0 ? 0 : errno);
350 }
351 
352 static int
353 dlmgmt_zrename_cb(zfarg_t *zfarg)
354 {
355 	if (zfarg->zfarg_inglobalzone != zfarg->zfarg_finglobalzone)
356 		return (0);
357 	return (rename(zfarg->zfarg_filename,
358 	    zfarg->zfarg_renamearg.zrename_newname) == 0 ? 0 : errno);
359 }
360 
361 /*
362  * Same as fopen(3C), except that it opens the file relative to zoneid's zone
363  * root.
364  */
365 static FILE *
366 dlmgmt_zfopen(const char *filename, const char *modestr, zoneid_t zoneid,
367     int *err)
368 {
369 	int		p[2];
370 	zfoparg_t	zfoparg;
371 	FILE		*fp = NULL;
372 
373 	if (zoneid != GLOBAL_ZONEID && pipe(p) == -1) {
374 		*err = errno;
375 		return (NULL);
376 	}
377 
378 	zfoparg.zfop_openarg.zopen_modestr = modestr;
379 	zfoparg.zfop_openarg.zopen_pipe = p;
380 	*err = dlmgmt_zfop(filename, zoneid, dlmgmt_zopen_cb, &zfoparg);
381 	if (zoneid != GLOBAL_ZONEID) {
382 		(void) close(p[0]);
383 		(void) close(p[1]);
384 	}
385 	if (*err == 0) {
386 		fp = fdopen(zfoparg.zfop_openarg.zopen_fd, modestr);
387 		if (fp == NULL) {
388 			*err = errno;
389 			(void) close(zfoparg.zfop_openarg.zopen_fd);
390 		}
391 	}
392 	return (fp);
393 }
394 
395 /*
396  * Same as rename(2), except that old and new are relative to zoneid's zone
397  * root.
398  */
399 static int
400 dlmgmt_zrename(const char *old, const char *new, zoneid_t zoneid)
401 {
402 	zfoparg_t zfoparg;
403 
404 	zfoparg.zfop_renamearg.zrename_newname = new;
405 	return (dlmgmt_zfop(old, zoneid, dlmgmt_zrename_cb, &zfoparg));
406 }
407 
408 /*
409  * Same as unlink(2), except that filename is relative to zoneid's zone root.
410  */
411 static int
412 dlmgmt_zunlink(const char *filename, zoneid_t zoneid)
413 {
414 	return (dlmgmt_zfop(filename, zoneid, dlmgmt_zunlink_cb, NULL));
415 }
416 
417 static size_t
418 write_str(char *buffer, size_t buffer_length, char *name, void *value)
419 {
420 	char	*ptr = value;
421 	size_t	data_length = strnlen(ptr, buffer_length);
422 
423 	/*
424 	 * Strings are assumed to be NULL terminated.  In order to fit in
425 	 * the buffer, the string's length must be less then buffer_length.
426 	 * If the value is empty, there's no point in writing it, in fact,
427 	 * we shouldn't even see that case.
428 	 */
429 	if (data_length + BASE_PROPERTY_LENGTH(DLADM_TYPE_STR, name) ==
430 	    buffer_length || data_length == 0)
431 		return (0);
432 
433 	/*
434 	 * Since we know the string will fit in the buffer, snprintf will
435 	 * always return less than buffer_length, so we can just return
436 	 * whatever snprintf returns.
437 	 */
438 	return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%s",
439 	    name, DLADM_TYPE_STR, ptr));
440 }
441 
442 static size_t
443 write_boolean(char *buffer, size_t buffer_length, char *name, void *value)
444 {
445 	boolean_t	*ptr = value;
446 
447 	/*
448 	 * Booleans are either zero or one, so we only need room for two
449 	 * characters in the buffer.
450 	 */
451 	if (buffer_length <= 1 + BASE_PROPERTY_LENGTH(DLADM_TYPE_BOOLEAN, name))
452 		return (0);
453 
454 	return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%d",
455 	    name, DLADM_TYPE_BOOLEAN, *ptr));
456 }
457 
458 static size_t
459 write_uint64(char *buffer, size_t buffer_length, char *name, void *value)
460 {
461 	uint64_t	*ptr = value;
462 
463 	/*
464 	 * Limit checking for uint64_t is a little trickier.
465 	 */
466 	if (snprintf(NULL, 0, "%lld", *ptr)  +
467 	    BASE_PROPERTY_LENGTH(DLADM_TYPE_UINT64, name) >= buffer_length)
468 		return (0);
469 
470 	return (GENERATE_PROPERTY_STRING(buffer, buffer_length, "%lld",
471 	    name, DLADM_TYPE_UINT64, *ptr));
472 }
473 
474 static size_t
475 read_str(char *buffer, void **value)
476 {
477 	char		*ptr = calloc(MAXLINKATTRVALLEN, sizeof (char));
478 	ssize_t		len;
479 
480 	if (ptr == NULL || (len = strlcpy(ptr, buffer, MAXLINKATTRVALLEN))
481 	    >= MAXLINKATTRVALLEN) {
482 		free(ptr);
483 		return (0);
484 	}
485 
486 	*(char **)value = ptr;
487 
488 	/* Account for NULL terminator */
489 	return (len + 1);
490 }
491 
492 static size_t
493 read_boolean(char *buffer, void **value)
494 {
495 	boolean_t	*ptr = calloc(1, sizeof (boolean_t));
496 
497 	if (ptr == NULL)
498 		return (0);
499 
500 	*ptr = atoi(buffer);
501 	*(boolean_t **)value = ptr;
502 
503 	return (sizeof (boolean_t));
504 }
505 
506 static size_t
507 read_int64(char *buffer, void **value)
508 {
509 	int64_t	*ptr = calloc(1, sizeof (int64_t));
510 
511 	if (ptr == NULL)
512 		return (0);
513 
514 	*ptr = (int64_t)atoll(buffer);
515 	*(int64_t **)value = ptr;
516 
517 	return (sizeof (int64_t));
518 }
519 
520 static dlmgmt_db_req_t *
521 dlmgmt_db_req_alloc(dlmgmt_db_op_t op, const char *linkname,
522     datalink_id_t linkid, zoneid_t zoneid, uint32_t flags, int *err)
523 {
524 	dlmgmt_db_req_t *req;
525 
526 	if ((req = calloc(1, sizeof (dlmgmt_db_req_t))) == NULL) {
527 		*err = errno;
528 	} else {
529 		req->ls_op = op;
530 		if (linkname != NULL)
531 			(void) strlcpy(req->ls_link, linkname, MAXLINKNAMELEN);
532 		req->ls_linkid = linkid;
533 		req->ls_zoneid = zoneid;
534 		req->ls_flags = flags;
535 	}
536 	return (req);
537 }
538 
539 /*
540  * Update the db entry with name "entryname" using information from "linkp".
541  */
542 static int
543 dlmgmt_db_update(dlmgmt_db_op_t op, const char *entryname, dlmgmt_link_t *linkp,
544     uint32_t flags)
545 {
546 	dlmgmt_db_req_t	*req;
547 	int		err;
548 
549 	/* It is either a persistent request or an active request, not both. */
550 	assert((flags == DLMGMT_PERSIST) || (flags == DLMGMT_ACTIVE));
551 
552 	if ((req = dlmgmt_db_req_alloc(op, entryname, linkp->ll_linkid,
553 	    linkp->ll_zoneid, flags, &err)) == NULL) {
554 		return (err);
555 	}
556 
557 	/*
558 	 * If this is a transient link, then use the global zone cache file.
559 	 * This is in order to allow recovery from a dlmgmtd failure that
560 	 * leaves a zone in a 'down' state. In that state it is not possible
561 	 * to read the zone's cache file (since it is always done from a sub
562 	 * process running in the zone's context). As a result, datalinks would
563 	 * otherwise remain stuck in the zone.
564 	 */
565 	if (flags == DLMGMT_ACTIVE && linkp->ll_transient)
566 		req->ls_zoneid = GLOBAL_ZONEID;
567 
568 	/*
569 	 * If the return error is EINPROGRESS, this request is handled
570 	 * asynchronously; return success.
571 	 */
572 	err = dlmgmt_process_db_req(req);
573 	if (err != EINPROGRESS)
574 		free(req);
575 	else
576 		err = 0;
577 	return (err);
578 }
579 
580 #define	DLMGMT_DB_OP_STR(op)					\
581 	(((op) == DLMGMT_DB_OP_READ) ? "read" :			\
582 	(((op) == DLMGMT_DB_OP_WRITE) ? "write" : "delete"))
583 
584 #define	DLMGMT_DB_CONF_STR(flag)				\
585 	(((flag) == DLMGMT_ACTIVE) ? "active" :			\
586 	(((flag) == DLMGMT_PERSIST) ? "persistent" : ""))
587 
588 static int
589 dlmgmt_process_db_req(dlmgmt_db_req_t *req)
590 {
591 	pthread_t	tid;
592 	boolean_t	writeop;
593 	int		err;
594 
595 	/*
596 	 * If there are already pending "write" requests, queue this request in
597 	 * the pending list.  Note that this function is called while the
598 	 * dlmgmt_rw_lock is held, so it is safe to access the global variables.
599 	 */
600 	writeop = (req->ls_op != DLMGMT_DB_OP_READ);
601 	if (writeop && (req->ls_flags == DLMGMT_PERSIST) &&
602 	    (dlmgmt_db_req_head != NULL)) {
603 		dlmgmt_db_req_tail->ls_next = req;
604 		dlmgmt_db_req_tail = req;
605 		return (EINPROGRESS);
606 	}
607 
608 	err = dlmgmt_process_db_onereq(req, writeop);
609 	if (err != EINPROGRESS && err != 0 && err != ENOENT) {
610 		/*
611 		 * Log the error unless the request processing is still in
612 		 * progress or if the configuration file hasn't been created
613 		 * yet (ENOENT).
614 		 */
615 		dlmgmt_log(LOG_WARNING, "dlmgmt_process_db_onereq() %s "
616 		    "operation on %s configuration failed: %s",
617 		    DLMGMT_DB_OP_STR(req->ls_op),
618 		    DLMGMT_DB_CONF_STR(req->ls_flags), strerror(err));
619 	}
620 
621 	if (err == EINPROGRESS) {
622 		assert(req->ls_flags == DLMGMT_PERSIST);
623 		assert(writeop && dlmgmt_db_req_head == NULL);
624 		dlmgmt_db_req_tail = dlmgmt_db_req_head = req;
625 		err = pthread_create(&tid, NULL, dlmgmt_db_update_thread, NULL);
626 		if (err == 0)
627 			return (EINPROGRESS);
628 	}
629 	return (err);
630 }
631 
632 static int
633 dlmgmt_process_db_onereq(dlmgmt_db_req_t *req, boolean_t writeop)
634 {
635 	int	err = 0;
636 	FILE	*fp, *nfp = NULL;
637 	char	file[MAXPATHLEN];
638 	char	newfile[MAXPATHLEN];
639 
640 	DLMGMT_MAKE_FILE_DB_PATH(file, (req->ls_flags == DLMGMT_PERSIST));
641 	fp = dlmgmt_zfopen(file, "r", req->ls_zoneid, &err);
642 	/*
643 	 * Note that it is not an error if the file doesn't exist.  If we're
644 	 * reading, we treat this case the same way as an empty file.  If
645 	 * we're writing, the file will be created when we open the file for
646 	 * writing below.
647 	 */
648 	if (fp == NULL && !writeop)
649 		return (err);
650 
651 	if (writeop) {
652 		(void) snprintf(newfile, MAXPATHLEN, "%s.new", file);
653 		nfp = dlmgmt_zfopen(newfile, "w", req->ls_zoneid, &err);
654 		if (nfp == NULL) {
655 			/*
656 			 * EROFS can happen at boot when the file system is
657 			 * read-only.  Return EINPROGRESS so that the caller
658 			 * can add this request to the pending request list
659 			 * and start a retry thread.
660 			 */
661 			err = (errno == EROFS ? EINPROGRESS : errno);
662 			goto done;
663 		}
664 	}
665 	if (writeop) {
666 		if ((err = process_db_write(req, fp, nfp)) == 0)
667 			err = dlmgmt_zrename(newfile, file, req->ls_zoneid);
668 	} else {
669 		err = process_db_read(req, fp);
670 	}
671 
672 done:
673 	if (nfp != NULL) {
674 		(void) fclose(nfp);
675 		if (err != 0)
676 			(void) dlmgmt_zunlink(newfile, req->ls_zoneid);
677 	}
678 	(void) fclose(fp);
679 	return (err);
680 }
681 
682 /*ARGSUSED*/
683 static void *
684 dlmgmt_db_update_thread(void *arg)
685 {
686 	dlmgmt_db_req_t	*req;
687 
688 	dlmgmt_table_lock(B_TRUE);
689 
690 	assert(dlmgmt_db_req_head != NULL);
691 	while ((req = dlmgmt_db_req_head) != NULL) {
692 		assert(req->ls_flags == DLMGMT_PERSIST);
693 		if (dlmgmt_process_db_onereq(req, B_TRUE) == EINPROGRESS) {
694 			/*
695 			 * The filesystem is still read only. Go to sleep and
696 			 * try again.
697 			 */
698 			dlmgmt_table_unlock();
699 			(void) sleep(5);
700 			dlmgmt_table_lock(B_TRUE);
701 			continue;
702 		}
703 
704 		/*
705 		 * The filesystem is no longer read only. Continue processing
706 		 * and remove the request from the pending list.
707 		 */
708 		dlmgmt_db_req_head = req->ls_next;
709 		if (dlmgmt_db_req_tail == req) {
710 			assert(dlmgmt_db_req_head == NULL);
711 			dlmgmt_db_req_tail = NULL;
712 		}
713 		free(req);
714 	}
715 
716 	dlmgmt_table_unlock();
717 	return (NULL);
718 }
719 
720 static int
721 parse_linkprops(char *buf, dlmgmt_link_t *linkp)
722 {
723 	boolean_t		found_type = B_FALSE;
724 	dladm_datatype_t	type = DLADM_TYPE_STR;
725 	int			i, len;
726 	char			*curr;
727 	char			attr_name[MAXLINKATTRLEN];
728 	size_t			attr_buf_len = 0;
729 	void			*attr_buf = NULL;
730 
731 	curr = buf;
732 	len = strlen(buf);
733 	attr_name[0] = '\0';
734 	for (i = 0; i < len; i++) {
735 		char		c = buf[i];
736 		boolean_t	match = (c == '=' ||
737 		    (c == ',' && !found_type) || c == ';');
738 		boolean_t	rename = B_FALSE;
739 
740 		/*
741 		 * Move to the next character if there is no match and
742 		 * if we have not reached the last character.
743 		 */
744 		if (!match && i != len - 1)
745 			continue;
746 
747 		if (match) {
748 			/*
749 			 * NUL-terminate the string pointed to by 'curr'.
750 			 */
751 			buf[i] = '\0';
752 			if (*curr == '\0')
753 				goto parse_fail;
754 		}
755 
756 		if (attr_name[0] != '\0' && found_type) {
757 			/*
758 			 * We get here after we have processed the "<prop>="
759 			 * pattern. The pattern we are now interested in is
760 			 * "<val>;".
761 			 */
762 			if (c == '=')
763 				goto parse_fail;
764 
765 			if (strcmp(attr_name, "linkid") == 0) {
766 				if (read_int64(curr, &attr_buf) == 0)
767 					goto parse_fail;
768 				linkp->ll_linkid =
769 				    (datalink_class_t)*(int64_t *)attr_buf;
770 			} else if (strcmp(attr_name, "name") == 0) {
771 				if (read_str(curr, &attr_buf) == 0)
772 					goto parse_fail;
773 				(void) snprintf(linkp->ll_link,
774 				    MAXLINKNAMELEN, "%s", attr_buf);
775 			} else if (strcmp(attr_name, "class") == 0) {
776 				if (read_int64(curr, &attr_buf) == 0)
777 					goto parse_fail;
778 				linkp->ll_class =
779 				    (datalink_class_t)*(int64_t *)attr_buf;
780 			} else if (strcmp(attr_name, "media") == 0) {
781 				if (read_int64(curr, &attr_buf) == 0)
782 					goto parse_fail;
783 				linkp->ll_media =
784 				    (uint32_t)*(int64_t *)attr_buf;
785 			} else if (strcmp(attr_name, "zone") == 0) {
786 				if (read_str(curr, &attr_buf) == 0)
787 					goto parse_fail;
788 				linkp->ll_zoneid = getzoneidbyname(attr_buf);
789 				if (linkp->ll_zoneid == -1) {
790 					if (errno == EFAULT)
791 						abort();
792 					/*
793 					 * If we can't find the zone, assign the
794 					 * link to the GZ and mark it for being
795 					 * renamed.
796 					 */
797 					linkp->ll_zoneid = 0;
798 					rename = B_TRUE;
799 				}
800 			} else if (strcmp(attr_name, "transient") == 0) {
801 				if (read_boolean(curr, &attr_buf) == 0)
802 					goto parse_fail;
803 				linkp->ll_transient = *(boolean_t *)attr_buf;
804 			} else {
805 				attr_buf_len = translators[type].read_func(curr,
806 				    &attr_buf);
807 				if (attr_buf_len == 0)
808 					goto parse_fail;
809 
810 				if (linkattr_set(&(linkp->ll_head), attr_name,
811 				    attr_buf, attr_buf_len, type) != 0) {
812 					free(attr_buf);
813 					goto parse_fail;
814 				}
815 			}
816 
817 			free(attr_buf);
818 			attr_name[0] = '\0';
819 			found_type = B_FALSE;
820 		} else if (attr_name[0] != '\0') {
821 			/*
822 			 * Non-zero length attr_name and found_type of false
823 			 * indicates that we have not found the type for this
824 			 * attribute.  The pattern now is "<type>,<val>;", we
825 			 * want the <type> part of the pattern.
826 			 */
827 			for (type = 0; type < ntranslators; type++) {
828 				if (strcmp(curr,
829 				    translators[type].type_name) == 0) {
830 					found_type = B_TRUE;
831 					break;
832 				}
833 			}
834 
835 			if (!found_type)
836 				goto parse_fail;
837 		} else {
838 			/*
839 			 * A zero length attr_name indicates we are looking
840 			 * at the beginning of a link attribute.
841 			 */
842 			if (c != '=')
843 				goto parse_fail;
844 
845 			(void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
846 		}
847 
848 		/*
849 		 * The zone that this link belongs to has died, we are
850 		 * reparenting it to the GZ and renaming it to avoid name
851 		 * collisions.
852 		 */
853 		if (rename) {
854 			(void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
855 			    "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
856 		}
857 
858 		curr = buf + i + 1;
859 	}
860 
861 	/* Correct any erroneous IPTUN datalink class constant in the file */
862 	if (linkp->ll_class == 0x60) {
863 		linkp->ll_class = DATALINK_CLASS_IPTUN;
864 		rewrite_needed = B_TRUE;
865 	}
866 
867 	return (0);
868 
869 parse_fail:
870 	/*
871 	 * Free linkp->ll_head (link attribute list)
872 	 */
873 	linkattr_destroy(linkp);
874 	return (-1);
875 }
876 
877 static boolean_t
878 process_link_line(char *buf, dlmgmt_link_t *linkp)
879 {
880 	int	i, len, llen;
881 	char	*str, *lasts;
882 	char	tmpbuf[MAXLINELEN];
883 
884 	bzero(linkp, sizeof (*linkp));
885 	linkp->ll_linkid = DATALINK_INVALID_LINKID;
886 	linkp->ll_zoneid = ALL_ZONES;
887 
888 	/*
889 	 * Use a copy of buf for parsing so that we can do whatever we want.
890 	 */
891 	(void) strlcpy(tmpbuf, buf, MAXLINELEN);
892 
893 	/*
894 	 * Skip leading spaces, blank lines, and comments.
895 	 */
896 	len = strlen(tmpbuf);
897 	for (i = 0; i < len; i++) {
898 		if (!isspace(tmpbuf[i]))
899 			break;
900 	}
901 	if (i == len || tmpbuf[i] == '#')
902 		return (B_TRUE);
903 
904 	str = tmpbuf + i;
905 	/*
906 	 * Find the link name and assign it to the link structure.
907 	 */
908 	if (strtok_r(str, " \n\t", &lasts) == NULL)
909 		goto fail;
910 
911 	llen = strlen(str);
912 	/*
913 	 * Note that a previous version of the persistent datalink.conf file
914 	 * stored the linkid as the first field.  In that case, the name will
915 	 * be obtained through parse_linkprops from a property with the format
916 	 * "name=<linkname>".  If we encounter such a format, we set
917 	 * rewrite_needed so that dlmgmt_db_init() can rewrite the file with
918 	 * the new format after it's done reading in the data.
919 	 */
920 	if (isdigit(str[0])) {
921 		linkp->ll_linkid = atoi(str);
922 		rewrite_needed = B_TRUE;
923 	} else {
924 		if (strlcpy(linkp->ll_link, str, sizeof (linkp->ll_link)) >=
925 		    sizeof (linkp->ll_link))
926 			goto fail;
927 	}
928 
929 	str += llen + 1;
930 	if (str >= tmpbuf + len)
931 		goto fail;
932 
933 	/*
934 	 * Now find the list of link properties.
935 	 */
936 	if ((str = strtok_r(str, " \n\t", &lasts)) == NULL)
937 		goto fail;
938 
939 	if (parse_linkprops(str, linkp) < 0)
940 		goto fail;
941 
942 	return (B_TRUE);
943 
944 fail:
945 	/*
946 	 * Delete corrupted line.
947 	 */
948 	buf[0] = '\0';
949 	return (B_FALSE);
950 }
951 
952 /*
953  * Find any properties in linkp that refer to "old", and rename to "new".
954  * Return B_TRUE if any renaming occurred.
955  */
956 static int
957 dlmgmt_attr_rename(dlmgmt_link_t *linkp, const char *old, const char *new,
958     boolean_t *renamed)
959 {
960 	dlmgmt_linkattr_t	*attrp;
961 	char			*newval = NULL, *pname;
962 	char			valcp[MAXLINKATTRVALLEN];
963 	size_t			newsize;
964 
965 	*renamed = B_FALSE;
966 
967 	if ((attrp = linkattr_find(linkp->ll_head, "linkover")) != NULL ||
968 	    (attrp = linkattr_find(linkp->ll_head, "simnetpeer")) != NULL) {
969 		if (strcmp(old, (char *)attrp->lp_val) == 0) {
970 			newsize = strlen(new) + 1;
971 			if ((newval = malloc(newsize)) == NULL)
972 				return (errno);
973 			(void) strcpy(newval, new);
974 			free(attrp->lp_val);
975 			attrp->lp_val = newval;
976 			attrp->lp_sz = newsize;
977 			*renamed = B_TRUE;
978 		}
979 		return (0);
980 	}
981 
982 	if ((attrp = linkattr_find(linkp->ll_head, "portnames")) == NULL)
983 		return (0);
984 
985 	/* <linkname>:[<linkname>:]... */
986 	if ((newval = calloc(MAXLINKATTRVALLEN, sizeof (char))) == NULL)
987 		return (errno);
988 
989 	bcopy(attrp->lp_val, valcp, sizeof (valcp));
990 	pname = strtok(valcp, ":");
991 	while (pname != NULL) {
992 		if (strcmp(pname, old) == 0) {
993 			(void) strcat(newval, new);
994 			*renamed = B_TRUE;
995 		} else {
996 			(void) strcat(newval, pname);
997 		}
998 		(void) strcat(newval, ":");
999 		pname = strtok(NULL, ":");
1000 	}
1001 	if (*renamed) {
1002 		free(attrp->lp_val);
1003 		attrp->lp_val = newval;
1004 		attrp->lp_sz = strlen(newval) + 1;
1005 	} else {
1006 		free(newval);
1007 	}
1008 	return (0);
1009 }
1010 
1011 static int
1012 process_db_write(dlmgmt_db_req_t *req, FILE *fp, FILE *nfp)
1013 {
1014 	boolean_t		done = B_FALSE;
1015 	int			err = 0;
1016 	dlmgmt_link_t		link_in_file, *linkp = NULL, *dblinkp;
1017 	boolean_t		persist = (req->ls_flags == DLMGMT_PERSIST);
1018 	boolean_t		writeall, rename, attr_renamed;
1019 	char			buf[MAXLINELEN];
1020 
1021 	writeall = (req->ls_linkid == DATALINK_ALL_LINKID);
1022 
1023 	if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall) {
1024 		/*
1025 		 * find the link in the avl tree with the given linkid.
1026 		 */
1027 		linkp = link_by_id(req->ls_linkid, req->ls_zoneid);
1028 		if (linkp == NULL || (linkp->ll_flags & req->ls_flags) == 0) {
1029 			/*
1030 			 * This link has already been changed. This could
1031 			 * happen if the request is pending because of
1032 			 * read-only file-system. If so, we are done.
1033 			 */
1034 			return (0);
1035 		}
1036 		/*
1037 		 * In the case of a rename, linkp's name has been updated to
1038 		 * the new name, and req->ls_link is the old link name.
1039 		 */
1040 		rename = (strcmp(req->ls_link, linkp->ll_link) != 0);
1041 	}
1042 
1043 	/*
1044 	 * fp can be NULL if the file didn't initially exist and we're
1045 	 * creating it as part of this write operation.
1046 	 */
1047 	if (fp == NULL)
1048 		goto write;
1049 
1050 	while (err == 0 && fgets(buf, sizeof (buf), fp) != NULL &&
1051 	    process_link_line(buf, &link_in_file)) {
1052 		/*
1053 		 * Only the link name is needed. Free the memory allocated for
1054 		 * the link attributes list of link_in_file.
1055 		 */
1056 		linkattr_destroy(&link_in_file);
1057 
1058 		if (link_in_file.ll_link[0] == '\0' || done) {
1059 			/*
1060 			 * this is a comment line or we are done updating the
1061 			 * line for the specified link, write the rest of
1062 			 * lines out.
1063 			 */
1064 			if (fputs(buf, nfp) == EOF)
1065 				err = errno;
1066 			continue;
1067 		}
1068 
1069 		switch (req->ls_op) {
1070 		case DLMGMT_DB_OP_WRITE:
1071 			/*
1072 			 * For write operations, we generate a new output line
1073 			 * if we're either writing all links (writeall) or if
1074 			 * the name of the link in the file matches the one
1075 			 * we're looking for.  Otherwise, we write out the
1076 			 * buffer as-is.
1077 			 *
1078 			 * If we're doing a rename operation, ensure that any
1079 			 * references to the link being renamed in link
1080 			 * properties are also updated before we write
1081 			 * anything.
1082 			 */
1083 			if (writeall) {
1084 				linkp = link_by_name(link_in_file.ll_link,
1085 				    req->ls_zoneid);
1086 			}
1087 			if (writeall || strcmp(req->ls_link,
1088 			    link_in_file.ll_link) == 0) {
1089 				generate_link_line(linkp, persist, buf);
1090 				if (!writeall && !rename)
1091 					done = B_TRUE;
1092 			} else if (rename && persist) {
1093 				dblinkp = link_by_name(link_in_file.ll_link,
1094 				    req->ls_zoneid);
1095 				err = dlmgmt_attr_rename(dblinkp, req->ls_link,
1096 				    linkp->ll_link, &attr_renamed);
1097 				if (err != 0)
1098 					break;
1099 				if (attr_renamed) {
1100 					generate_link_line(dblinkp, persist,
1101 					    buf);
1102 				}
1103 			}
1104 			if (fputs(buf, nfp) == EOF)
1105 				err = errno;
1106 			break;
1107 		case DLMGMT_DB_OP_DELETE:
1108 			/*
1109 			 * Delete is simple.  If buf does not represent the
1110 			 * link we're deleting, write it out.
1111 			 */
1112 			if (strcmp(req->ls_link, link_in_file.ll_link) != 0) {
1113 				if (fputs(buf, nfp) == EOF)
1114 					err = errno;
1115 			} else {
1116 				done = B_TRUE;
1117 			}
1118 			break;
1119 		case DLMGMT_DB_OP_READ:
1120 		default:
1121 			err = EINVAL;
1122 			break;
1123 		}
1124 	}
1125 
1126 write:
1127 	/*
1128 	 * If we get to the end of the file and have not seen what linkid
1129 	 * points to, write it out then.
1130 	 */
1131 	if (req->ls_op == DLMGMT_DB_OP_WRITE && !writeall && !rename && !done) {
1132 		generate_link_line(linkp, persist, buf);
1133 		done = B_TRUE;
1134 		if (fputs(buf, nfp) == EOF)
1135 			err = errno;
1136 	}
1137 
1138 	return (err);
1139 }
1140 
1141 static int
1142 process_db_read(dlmgmt_db_req_t *req, FILE *fp)
1143 {
1144 	avl_index_t	name_where, id_where;
1145 	dlmgmt_link_t	link_in_file, *newlink, *link_in_db;
1146 	char		buf[MAXLINELEN];
1147 	int		err = 0;
1148 
1149 	/*
1150 	 * This loop processes each line of the configuration file.
1151 	 */
1152 	while (fgets(buf, MAXLINELEN, fp) != NULL) {
1153 		if (!process_link_line(buf, &link_in_file)) {
1154 			err = EINVAL;
1155 			break;
1156 		}
1157 
1158 		/*
1159 		 * Skip the comment line.
1160 		 */
1161 		if (link_in_file.ll_link[0] == '\0') {
1162 			linkattr_destroy(&link_in_file);
1163 			continue;
1164 		}
1165 
1166 		if ((req->ls_flags & DLMGMT_ACTIVE) &&
1167 		    link_in_file.ll_linkid == DATALINK_INVALID_LINKID) {
1168 			linkattr_destroy(&link_in_file);
1169 			continue;
1170 		}
1171 
1172 		/*
1173 		 * Persistent configuration files do not include the "zone"
1174 		 * attribute. In that case, ll_zoneid will have the the
1175 		 * ALL_ZONES sentinel value. Adjust it here to the requesting
1176 		 * zone's ID.
1177 		 */
1178 		if (link_in_file.ll_zoneid == ALL_ZONES)
1179 			link_in_file.ll_zoneid = req->ls_zoneid;
1180 
1181 		assert(req->ls_zoneid == 0 ||
1182 		    link_in_file.ll_zoneid == req->ls_zoneid);
1183 		link_in_db = link_by_name(link_in_file.ll_link,
1184 		    link_in_file.ll_zoneid);
1185 		if (link_in_db != NULL) {
1186 			/*
1187 			 * If the link in the database already has the flag
1188 			 * for this request set, then the entry is a
1189 			 * duplicate.  If it's not a duplicate, then simply
1190 			 * turn on the appropriate flag on the existing link.
1191 			 */
1192 			if (link_in_db->ll_flags & req->ls_flags) {
1193 				dlmgmt_log(LOG_WARNING, "Duplicate links "
1194 				    "in the repository: %s",
1195 				    link_in_file.ll_link);
1196 				linkattr_destroy(&link_in_file);
1197 			} else {
1198 				if (req->ls_flags & DLMGMT_PERSIST) {
1199 					/*
1200 					 * Save the newly read properties into
1201 					 * the existing link.
1202 					 */
1203 					assert(link_in_db->ll_head == NULL);
1204 					link_in_db->ll_head =
1205 					    link_in_file.ll_head;
1206 				} else {
1207 					linkattr_destroy(&link_in_file);
1208 				}
1209 				link_in_db->ll_flags |= req->ls_flags;
1210 			}
1211 		} else {
1212 			/*
1213 			 * This is a new link.  Allocate a new dlmgmt_link_t
1214 			 * and add it to the trees.
1215 			 */
1216 			newlink = calloc(1, sizeof (*newlink));
1217 			if (newlink == NULL) {
1218 				dlmgmt_log(LOG_WARNING, "Unable to allocate "
1219 				    "memory to create new link %s",
1220 				    link_in_file.ll_link);
1221 				linkattr_destroy(&link_in_file);
1222 				continue;
1223 			}
1224 			bcopy(&link_in_file, newlink, sizeof (*newlink));
1225 
1226 			if (newlink->ll_linkid == DATALINK_INVALID_LINKID)
1227 				newlink->ll_linkid = dlmgmt_nextlinkid;
1228 			if (avl_find(&dlmgmt_id_avl, newlink, &id_where) !=
1229 			    NULL) {
1230 				dlmgmt_log(LOG_WARNING, "Link ID %d is already"
1231 				    " in use, destroying link %s",
1232 				    newlink->ll_linkid, newlink->ll_link);
1233 				link_destroy(newlink);
1234 				continue;
1235 			}
1236 
1237 			if ((req->ls_flags & DLMGMT_ACTIVE) &&
1238 			    link_activate(newlink) != 0) {
1239 				dlmgmt_log(LOG_WARNING, "Unable to activate %s",
1240 				    newlink->ll_link);
1241 				link_destroy(newlink);
1242 				continue;
1243 			}
1244 
1245 			avl_insert(&dlmgmt_id_avl, newlink, id_where);
1246 			/*
1247 			 * link_activate call above can insert newlink in
1248 			 * dlmgmt_name_avl tree when activating a link that is
1249 			 * assigned to a NGZ.
1250 			 */
1251 			if (avl_find(&dlmgmt_name_avl, newlink,
1252 			    &name_where) == NULL)
1253 				avl_insert(&dlmgmt_name_avl, newlink,
1254 				    name_where);
1255 
1256 			dlmgmt_advance(newlink);
1257 			newlink->ll_flags |= req->ls_flags;
1258 		}
1259 	}
1260 
1261 	return (err);
1262 }
1263 
1264 /*
1265  * Generate an entry in the link database.
1266  * Each entry has this format:
1267  * <link name>	<prop0>=<type>,<val>;...;<propn>=<type>,<val>;
1268  */
1269 static void
1270 generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf)
1271 {
1272 	char			tmpbuf[MAXLINELEN];
1273 	char			*ptr = tmpbuf;
1274 	char			*lim = tmpbuf + MAXLINELEN;
1275 	dlmgmt_linkattr_t	*cur_p = NULL;
1276 	uint64_t		u64;
1277 
1278 	ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
1279 	if (!persist) {
1280 		char zname[ZONENAME_MAX];
1281 
1282 		/*
1283 		 * We store the linkid and the zone name in the active database
1284 		 * so that dlmgmtd can recover in the event that it is
1285 		 * restarted.
1286 		 */
1287 		u64 = linkp->ll_linkid;
1288 		ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
1289 
1290 		if (getzonenamebyid(linkp->ll_zoneid, zname,
1291 		    sizeof (zname)) != -1) {
1292 			ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
1293 		}
1294 	}
1295 	u64 = linkp->ll_class;
1296 	ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
1297 	u64 = linkp->ll_media;
1298 	ptr += write_uint64(ptr, BUFLEN(lim, ptr), "media", &u64);
1299 
1300 	if (!persist && linkp->ll_transient) {
1301 		boolean_t b = B_TRUE;
1302 		ptr += write_boolean(ptr, BUFLEN(lim, ptr), "transient", &b);
1303 	}
1304 
1305 	/*
1306 	 * The daemon does not keep any active link attribute. Only store the
1307 	 * attributes if this request is for persistent configuration,
1308 	 */
1309 	if (persist) {
1310 		for (cur_p = linkp->ll_head; cur_p != NULL;
1311 		    cur_p = cur_p->lp_next) {
1312 			ptr += translators[cur_p->lp_type].write_func(ptr,
1313 			    BUFLEN(lim, ptr), cur_p->lp_name, cur_p->lp_val);
1314 		}
1315 	}
1316 
1317 	if (ptr <= lim)
1318 		(void) snprintf(buf, MAXLINELEN, "%s\n", tmpbuf);
1319 }
1320 
1321 int
1322 dlmgmt_delete_db_entry(dlmgmt_link_t *linkp, uint32_t flags)
1323 {
1324 	return (dlmgmt_db_update(DLMGMT_DB_OP_DELETE, linkp->ll_link, linkp,
1325 	    flags));
1326 }
1327 
1328 int
1329 dlmgmt_write_db_entry(const char *entryname, dlmgmt_link_t *linkp,
1330     uint32_t flags)
1331 {
1332 	int err;
1333 
1334 	if (flags & DLMGMT_PERSIST) {
1335 		if ((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname,
1336 		    linkp, DLMGMT_PERSIST)) != 0) {
1337 			return (err);
1338 		}
1339 	}
1340 
1341 	if (flags & DLMGMT_ACTIVE) {
1342 		if (((err = dlmgmt_db_update(DLMGMT_DB_OP_WRITE, entryname,
1343 		    linkp, DLMGMT_ACTIVE)) != 0) && (flags & DLMGMT_PERSIST)) {
1344 			(void) dlmgmt_db_update(DLMGMT_DB_OP_DELETE, entryname,
1345 			    linkp, DLMGMT_PERSIST);
1346 			return (err);
1347 		}
1348 	}
1349 
1350 	return (0);
1351 }
1352 
1353 /*
1354  * Upgrade properties that have link IDs as values to link names.  Because '.'
1355  * is a valid linkname character, the port separater for link aggregations
1356  * must be changed to ':'.
1357  */
1358 static void
1359 linkattr_upgrade(dlmgmt_linkattr_t *attrp)
1360 {
1361 	datalink_id_t	linkid;
1362 	char		*portidstr;
1363 	char		portname[MAXLINKNAMELEN + 1];
1364 	dlmgmt_link_t	*linkp;
1365 	char		*new_attr_val;
1366 	size_t		new_attr_sz;
1367 	boolean_t	upgraded = B_FALSE;
1368 
1369 	if (strcmp(attrp->lp_name, "linkover") == 0 ||
1370 	    strcmp(attrp->lp_name, "simnetpeer") == 0) {
1371 		if (attrp->lp_type == DLADM_TYPE_UINT64) {
1372 			linkid = (datalink_id_t)*(uint64_t *)attrp->lp_val;
1373 			if ((linkp = link_by_id(linkid, GLOBAL_ZONEID)) == NULL)
1374 				return;
1375 			new_attr_sz = strlen(linkp->ll_link) + 1;
1376 			if ((new_attr_val = malloc(new_attr_sz)) == NULL)
1377 				return;
1378 			(void) strcpy(new_attr_val, linkp->ll_link);
1379 			upgraded = B_TRUE;
1380 		}
1381 	} else if (strcmp(attrp->lp_name, "portnames") == 0) {
1382 		/*
1383 		 * The old format for "portnames" was
1384 		 * "<linkid>.[<linkid>.]...".  The new format is
1385 		 * "<linkname>:[<linkname>:]...".
1386 		 */
1387 		if (!isdigit(((char *)attrp->lp_val)[0]))
1388 			return;
1389 		new_attr_val = calloc(MAXLINKATTRVALLEN, sizeof (char));
1390 		if (new_attr_val == NULL)
1391 			return;
1392 		portidstr = (char *)attrp->lp_val;
1393 		while (*portidstr != '\0') {
1394 			errno = 0;
1395 			linkid = strtol(portidstr, &portidstr, 10);
1396 			if (linkid == 0 || *portidstr != '.' ||
1397 			    (linkp = link_by_id(linkid, GLOBAL_ZONEID)) ==
1398 			    NULL) {
1399 				free(new_attr_val);
1400 				return;
1401 			}
1402 			(void) snprintf(portname, sizeof (portname), "%s:",
1403 			    linkp->ll_link);
1404 			if (strlcat(new_attr_val, portname,
1405 			    MAXLINKATTRVALLEN) >= MAXLINKATTRVALLEN) {
1406 				free(new_attr_val);
1407 				return;
1408 			}
1409 			/* skip the '.' delimiter */
1410 			portidstr++;
1411 		}
1412 		new_attr_sz = strlen(new_attr_val) + 1;
1413 		upgraded = B_TRUE;
1414 	}
1415 
1416 	if (upgraded) {
1417 		attrp->lp_type = DLADM_TYPE_STR;
1418 		attrp->lp_sz = new_attr_sz;
1419 		free(attrp->lp_val);
1420 		attrp->lp_val = new_attr_val;
1421 	}
1422 }
1423 
1424 static void
1425 dlmgmt_db_upgrade(dlmgmt_link_t *linkp)
1426 {
1427 	dlmgmt_linkattr_t *attrp;
1428 
1429 	for (attrp = linkp->ll_head; attrp != NULL; attrp = attrp->lp_next)
1430 		linkattr_upgrade(attrp);
1431 }
1432 
1433 static void
1434 dlmgmt_db_phys_activate(dlmgmt_link_t *linkp)
1435 {
1436 	linkp->ll_flags |= DLMGMT_ACTIVE;
1437 	(void) dlmgmt_write_db_entry(linkp->ll_link, linkp, DLMGMT_ACTIVE);
1438 }
1439 
1440 static void
1441 dlmgmt_db_walk(zoneid_t zoneid, datalink_class_t class, db_walk_func_t *func)
1442 {
1443 	dlmgmt_link_t *linkp;
1444 
1445 	for (linkp = avl_first(&dlmgmt_id_avl); linkp != NULL;
1446 	    linkp = AVL_NEXT(&dlmgmt_id_avl, linkp)) {
1447 		if (linkp->ll_zoneid == zoneid && (linkp->ll_class & class))
1448 			func(linkp);
1449 	}
1450 }
1451 
1452 /*
1453  * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
1454  *
1455  * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
1456  * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
1457  * Because of the upcall architecture of dlmgmtd this can lead to deadlock
1458  * with the following scenario:
1459  *    a) the thread preparing to fork will have acquired the malloc locks
1460  *       then attempt to suspend every thread in preparation to fork.
1461  *    b) all of the upcalls will be blocked in door_ucred() trying to malloc()
1462  *       and get the credentials of their caller.
1463  *    c) we can't suspend the in-kernel thread making the upcall.
1464  *
1465  * Thus, we cannot serve door requests because we're blocked in malloc()
1466  * which fork() owns, but fork() is in turn blocked on the in-kernel thread
1467  * making the door upcall.  This is a fundamental architectural problem with
1468  * any server handling upcalls and also trying to fork().
1469  *
1470  * To minimize the chance of this deadlock occuring, we check ahead of time to
1471  * see if the file we want to read actually exists in the zone (which it almost
1472  * never does), so we don't need fork in that case (i.e. rarely to never).
1473  */
1474 static boolean_t
1475 zone_file_exists(char *zoneroot, char *filename)
1476 {
1477 	struct stat	sb;
1478 	char		fname[MAXPATHLEN];
1479 
1480 	(void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
1481 
1482 	if (stat(fname, &sb) == -1)
1483 		return (B_FALSE);
1484 
1485 	return (B_TRUE);
1486 }
1487 
1488 /*
1489  * Initialize the datalink <link name, linkid> mapping and the link's
1490  * attributes list based on the configuration file /etc/dladm/datalink.conf
1491  * and the active configuration cache file
1492  * /etc/svc/volatile/dladm/datalink-management:default.cache.
1493  */
1494 int
1495 dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
1496 {
1497 	dlmgmt_db_req_t	*req;
1498 	int		err;
1499 	boolean_t	boot = B_FALSE;
1500 
1501 	if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
1502 	    DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
1503 		return (err);
1504 
1505 	if (zone_file_exists(zoneroot, cachefile)) {
1506 		if ((err = dlmgmt_process_db_req(req)) != 0) {
1507 			/*
1508 			 * If we get back ENOENT, that means that the active
1509 			 * configuration file doesn't exist yet, and is not an
1510 			 * error.  We'll create it down below after we've
1511 			 * loaded the persistent configuration.
1512 			 */
1513 			if (err != ENOENT)
1514 				goto done;
1515 			boot = B_TRUE;
1516 		}
1517 	} else {
1518 		boot = B_TRUE;
1519 	}
1520 
1521 	if (zone_file_exists(zoneroot, DLMGMT_PERSISTENT_DB_PATH)) {
1522 		req->ls_flags = DLMGMT_PERSIST;
1523 		err = dlmgmt_process_db_req(req);
1524 		if (err != 0 && err != ENOENT)
1525 			goto done;
1526 	}
1527 	err = 0;
1528 	if (rewrite_needed) {
1529 		/*
1530 		 * First update links in memory, then dump the entire db to
1531 		 * disk.
1532 		 */
1533 		dlmgmt_db_walk(zoneid, DATALINK_CLASS_ALL, dlmgmt_db_upgrade);
1534 		req->ls_op = DLMGMT_DB_OP_WRITE;
1535 		req->ls_linkid = DATALINK_ALL_LINKID;
1536 		if ((err = dlmgmt_process_db_req(req)) != 0 &&
1537 		    err != EINPROGRESS)
1538 			goto done;
1539 	}
1540 	if (boot) {
1541 		dlmgmt_db_walk(zoneid, DATALINK_CLASS_PHYS,
1542 		    dlmgmt_db_phys_activate);
1543 	}
1544 
1545 done:
1546 	if (err == EINPROGRESS)
1547 		err = 0;
1548 	else
1549 		free(req);
1550 	return (err);
1551 }
1552 
1553 /*
1554  * Remove all links in the given zoneid.
1555  */
1556 void
1557 dlmgmt_db_fini(zoneid_t zoneid)
1558 {
1559 	dlmgmt_link_t *linkp = avl_first(&dlmgmt_name_avl), *next_linkp;
1560 
1561 	while (linkp != NULL) {
1562 		next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
1563 		if (linkp->ll_zoneid == zoneid) {
1564 			(void) dlmgmt_destroy_common(linkp,
1565 			    DLMGMT_ACTIVE | DLMGMT_PERSIST);
1566 		}
1567 		linkp = next_linkp;
1568 	}
1569 }
1570