xref: /titanic_51/usr/src/cmd/svc/configd/backend.c (revision 3ceb94da9cef3811ea656a9277c2c3f23a6e7f1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sqlite is not compatible with _FILE_OFFSET_BITS=64, but we need to
31  * be able to statvfs(2) possibly large systems.  This define gives us
32  * access to the transitional interfaces.  See lfcompile64(5) for how
33  * _LARGEFILE64_SOURCE works.
34  */
35 #define	_LARGEFILE64_SOURCE
36 
37 #include <assert.h>
38 #include <door.h>
39 #include <dirent.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <limits.h>
43 #include <pthread.h>
44 #include <stdarg.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <sys/stat.h>
49 #include <sys/statvfs.h>
50 #include <unistd.h>
51 #include <zone.h>
52 #include <libscf_priv.h>
53 
54 #include "configd.h"
55 #include "repcache_protocol.h"
56 
57 #include <sqlite.h>
58 #include <sqlite-misc.h>
59 
60 /*
61  * This file has two purposes:
62  *
63  * 1. It contains the database schema, and the code for setting up our backend
64  *    databases, including installing said schema.
65  *
66  * 2. It provides a simplified interface to the SQL database library, and
67  *    synchronizes MT access to the database.
68  */
69 
70 typedef struct backend_spent {
71 	uint64_t bs_count;
72 	hrtime_t bs_time;
73 	hrtime_t bs_vtime;
74 } backend_spent_t;
75 
76 typedef struct backend_totals {
77 	backend_spent_t	bt_lock;	/* waiting for lock */
78 	backend_spent_t	bt_exec;	/* time spent executing SQL */
79 } backend_totals_t;
80 
81 typedef struct sqlite_backend {
82 	pthread_mutex_t	be_lock;
83 	pthread_t	be_thread;	/* thread holding lock */
84 	struct sqlite	*be_db;
85 	const char	*be_path;	/* path to db */
86 	int		be_readonly;	/* readonly at start, and still is */
87 	int		be_writing;	/* held for writing */
88 	backend_type_t	be_type;	/* type of db */
89 	hrtime_t	be_lastcheck;	/* time of last read-only check */
90 	backend_totals_t be_totals[2];	/* one for reading, one for writing */
91 } sqlite_backend_t;
92 
93 struct backend_tx {
94 	sqlite_backend_t	*bt_be;
95 	int			bt_readonly;
96 	int			bt_type;
97 	int			bt_full;	/* SQLITE_FULL during tx */
98 };
99 
100 #define	UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
101 	backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
102 	__bsp->bs_count++;						\
103 	__bsp->bs_time += (gethrtime() - ts);				\
104 	__bsp->bs_vtime += (gethrvtime() - vts);			\
105 }
106 
107 #define	UPDATE_TOTALS(sb, field, ts, vts) \
108 	UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
109 
110 struct backend_query {
111 	char	*bq_buf;
112 	size_t	bq_size;
113 };
114 
115 struct backend_tbl_info {
116 	const char *bti_name;
117 	const char *bti_cols;
118 };
119 
120 struct backend_idx_info {
121 	const char *bxi_tbl;
122 	const char *bxi_idx;
123 	const char *bxi_cols;
124 };
125 
126 static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
127 static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
128 pthread_t backend_panic_thread = 0;
129 
130 int backend_do_trace = 0;		/* invoke tracing callback */
131 int backend_print_trace = 0;		/* tracing callback prints SQL */
132 int backend_panic_abort = 0;		/* abort when panicking */
133 
134 /* interval between read-only checks while starting up */
135 #define	BACKEND_READONLY_CHECK_INTERVAL	(2 * (hrtime_t)NANOSEC)
136 
137 /*
138  * Any incompatible change to the below schema should bump the version number.
139  * The schema has been changed to support value ordering,  but this change
140  * is backwards-compatible - i.e. a previous svc.configd can use a
141  * repository database with the new schema perfectly well.  As a result,
142  * the schema version has not been updated,  allowing downgrade of systems
143  * without losing repository data.
144  */
145 #define	BACKEND_SCHEMA_VERSION		5
146 
147 static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
148 	/*
149 	 * service_tbl holds all services.  svc_id is the identifier of the
150 	 * service.
151 	 */
152 	{
153 		"service_tbl",
154 		"svc_id          INTEGER PRIMARY KEY,"
155 		"svc_name        CHAR(256) NOT NULL"
156 	},
157 
158 	/*
159 	 * instance_tbl holds all of the instances.  The parent service id
160 	 * is instance_svc.
161 	 */
162 	{
163 		"instance_tbl",
164 		"instance_id     INTEGER PRIMARY KEY,"
165 		"instance_name   CHAR(256) NOT NULL,"
166 		"instance_svc    INTEGER NOT NULL"
167 	},
168 
169 	/*
170 	 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
171 	 */
172 	{
173 		"snapshot_lnk_tbl",
174 		"lnk_id          INTEGER PRIMARY KEY,"
175 		"lnk_inst_id     INTEGER NOT NULL,"
176 		"lnk_snap_name   CHAR(256) NOT NULL,"
177 		"lnk_snap_id     INTEGER NOT NULL"
178 	},
179 
180 	/*
181 	 * snaplevel_tbl maps a snapshot id to a set of named, ordered
182 	 * snaplevels.
183 	 */
184 	{
185 		"snaplevel_tbl",
186 		"snap_id                 INTEGER NOT NULL,"
187 		"snap_level_num          INTEGER NOT NULL,"
188 		"snap_level_id           INTEGER NOT NULL,"
189 		"snap_level_service_id   INTEGER NOT NULL,"
190 		"snap_level_service      CHAR(256) NOT NULL,"
191 		"snap_level_instance_id  INTEGER NULL,"
192 		"snap_level_instance     CHAR(256) NULL"
193 	},
194 
195 	/*
196 	 * snaplevel_lnk_tbl links snaplevels to property groups.
197 	 * snaplvl_pg_* is identical to the original property group,
198 	 * and snaplvl_gen_id overrides the generation number.
199 	 * The service/instance ids are as in the snaplevel.
200 	 */
201 	{
202 		"snaplevel_lnk_tbl",
203 		"snaplvl_level_id INTEGER NOT NULL,"
204 		"snaplvl_pg_id    INTEGER NOT NULL,"
205 		"snaplvl_pg_name  CHAR(256) NOT NULL,"
206 		"snaplvl_pg_type  CHAR(256) NOT NULL,"
207 		"snaplvl_pg_flags INTEGER NOT NULL,"
208 		"snaplvl_gen_id   INTEGER NOT NULL"
209 	},
210 
211 	{ NULL, NULL }
212 };
213 
214 static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
215 	{ "service_tbl",	"name",	"svc_name" },
216 	{ "instance_tbl",	"name",	"instance_svc, instance_name" },
217 	{ "snapshot_lnk_tbl",	"name",	"lnk_inst_id, lnk_snap_name" },
218 	{ "snapshot_lnk_tbl",	"snapid", "lnk_snap_id" },
219 	{ "snaplevel_tbl",	"id",	"snap_id" },
220 	{ "snaplevel_lnk_tbl",	"id",	"snaplvl_pg_id" },
221 	{ "snaplevel_lnk_tbl",	"level", "snaplvl_level_id" },
222 	{ NULL, NULL, NULL }
223 };
224 
225 static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
226 	{ NULL, NULL }
227 };
228 
229 static struct backend_idx_info idxs_np[] = {	/* BACKEND_TYPE_NONPERSIST */
230 	{ NULL, NULL, NULL }
231 };
232 
233 static struct backend_tbl_info tbls_common[] = { /* all backend types */
234 	/*
235 	 * pg_tbl defines property groups.  They are associated with a single
236 	 * service or instance.  The pg_gen_id links them with the latest
237 	 * "edited" version of its properties.
238 	 */
239 	{
240 		"pg_tbl",
241 		"pg_id           INTEGER PRIMARY KEY,"
242 		"pg_parent_id    INTEGER NOT NULL,"
243 		"pg_name         CHAR(256) NOT NULL,"
244 		"pg_type         CHAR(256) NOT NULL,"
245 		"pg_flags        INTEGER NOT NULL,"
246 		"pg_gen_id       INTEGER NOT NULL"
247 	},
248 
249 	/*
250 	 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
251 	 * (prop_name, prop_type, val_id) trios.
252 	 */
253 	{
254 		"prop_lnk_tbl",
255 		"lnk_prop_id     INTEGER PRIMARY KEY,"
256 		"lnk_pg_id       INTEGER NOT NULL,"
257 		"lnk_gen_id      INTEGER NOT NULL,"
258 		"lnk_prop_name   CHAR(256) NOT NULL,"
259 		"lnk_prop_type   CHAR(2) NOT NULL,"
260 		"lnk_val_id      INTEGER"
261 	},
262 
263 	/*
264 	 * value_tbl maps a value_id to a set of values.  For any given
265 	 * value_id, value_type is constant.  The table definition here
266 	 * is repeated in backend_check_upgrade(),  and must be kept in-sync.
267 	 */
268 	{
269 		"value_tbl",
270 		"value_id        INTEGER NOT NULL,"
271 		"value_type      CHAR(1) NOT NULL,"
272 		"value_value     VARCHAR NOT NULL,"
273 		"value_order     INTEGER DEFAULT 0"
274 	},
275 
276 	/*
277 	 * id_tbl has one row per id space
278 	 */
279 	{
280 		"id_tbl",
281 		"id_name         STRING NOT NULL,"
282 		"id_next         INTEGER NOT NULL"
283 	},
284 
285 	/*
286 	 * schema_version has a single row, which contains
287 	 * BACKEND_SCHEMA_VERSION at the time of creation.
288 	 */
289 	{
290 		"schema_version",
291 		"schema_version  INTEGER"
292 	},
293 	{ NULL, NULL }
294 };
295 
296 /*
297  * The indexing of value_tbl is repeated in backend_check_upgrade() and
298  * must be kept in sync with the indexing specification here.
299  */
300 static struct backend_idx_info idxs_common[] = { /* all backend types */
301 	{ "pg_tbl",		"parent", "pg_parent_id" },
302 	{ "pg_tbl",		"name",	"pg_parent_id, pg_name" },
303 	{ "pg_tbl",		"type",	"pg_parent_id, pg_type" },
304 	{ "prop_lnk_tbl",	"base",	"lnk_pg_id, lnk_gen_id" },
305 	{ "prop_lnk_tbl",	"val",	"lnk_val_id" },
306 	{ "value_tbl",		"id",	"value_id" },
307 	{ "id_tbl",		"id",	"id_name" },
308 	{ NULL, NULL, NULL }
309 };
310 
311 struct run_single_int_info {
312 	uint32_t	*rs_out;
313 	int		rs_result;
314 };
315 
316 /*ARGSUSED*/
317 static int
318 run_single_int_callback(void *arg, int columns, char **vals, char **names)
319 {
320 	struct run_single_int_info *info = arg;
321 	uint32_t val;
322 
323 	char *endptr = vals[0];
324 
325 	assert(info->rs_result != REP_PROTOCOL_SUCCESS);
326 	assert(columns == 1);
327 
328 	if (vals[0] == NULL)
329 		return (BACKEND_CALLBACK_CONTINUE);
330 
331 	errno = 0;
332 	val = strtoul(vals[0], &endptr, 10);
333 	if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
334 		backend_panic("malformed integer \"%20s\"", vals[0]);
335 
336 	*info->rs_out = val;
337 	info->rs_result = REP_PROTOCOL_SUCCESS;
338 	return (BACKEND_CALLBACK_CONTINUE);
339 }
340 
341 /*ARGSUSED*/
342 int
343 backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
344 {
345 	return (BACKEND_CALLBACK_ABORT);
346 }
347 
348 /*
349  * check to see if we can successfully start a transaction;  if not, the
350  * filesystem is mounted read-only.
351  */
352 static int
353 backend_is_readonly(struct sqlite *db, const char *path)
354 {
355 	int r;
356 	statvfs64_t stat;
357 
358 	if (statvfs64(path, &stat) == 0 && (stat.f_flag & ST_RDONLY))
359 		return (SQLITE_READONLY);
360 
361 	r = sqlite_exec(db,
362 	    "BEGIN TRANSACTION; "
363 	    "UPDATE schema_version SET schema_version = schema_version; ",
364 	    NULL, NULL, NULL);
365 	(void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
366 	return (r);
367 }
368 
369 static void
370 backend_trace_sql(void *arg, const char *sql)
371 {
372 	sqlite_backend_t *be = arg;
373 
374 	if (backend_print_trace) {
375 		(void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
376 	}
377 }
378 
379 static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
380 static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
381 
382 /*
383  * For a native build,  repositories are created from scratch, so upgrade
384  * is not an issue.  This variable is implicitly protected by
385  * bes[BACKEND_TYPE_NORMAL]->be_lock.
386  */
387 #ifdef NATIVE_BUILD
388 static boolean_t be_normal_upgraded = B_TRUE;
389 #else
390 static boolean_t be_normal_upgraded = B_FALSE;
391 #endif	/* NATIVE_BUILD */
392 
393 /*
394  * Has backend been upgraded? In nonpersistent case, answer is always
395  * yes.
396  */
397 boolean_t
398 backend_is_upgraded(backend_tx_t *bt)
399 {
400 	if (bt->bt_type == BACKEND_TYPE_NONPERSIST)
401 		return (B_TRUE);
402 	return (be_normal_upgraded);
403 }
404 
405 #define	BACKEND_PANIC_TIMEOUT	(50 * MILLISEC)
406 /*
407  * backend_panic() -- some kind of database problem or corruption has been hit.
408  * We attempt to quiesce the other database users -- all of the backend sql
409  * entry points will call backend_panic(NULL) if a panic is in progress, as
410  * will any attempt to start a transaction.
411  *
412  * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
413  * either drop the lock or call backend_panic().  If they don't respond in
414  * time, we'll just exit anyway.
415  */
416 void
417 backend_panic(const char *format, ...)
418 {
419 	int i;
420 	va_list args;
421 	int failed = 0;
422 
423 	(void) pthread_mutex_lock(&backend_panic_lock);
424 	if (backend_panic_thread != 0) {
425 		(void) pthread_mutex_unlock(&backend_panic_lock);
426 		/*
427 		 * first, drop any backend locks we're holding, then
428 		 * sleep forever on the panic_cv.
429 		 */
430 		for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
431 			if (bes[i] != NULL &&
432 			    bes[i]->be_thread == pthread_self())
433 				(void) pthread_mutex_unlock(&bes[i]->be_lock);
434 		}
435 		(void) pthread_mutex_lock(&backend_panic_lock);
436 		for (;;)
437 			(void) pthread_cond_wait(&backend_panic_cv,
438 			    &backend_panic_lock);
439 	}
440 	backend_panic_thread = pthread_self();
441 	(void) pthread_mutex_unlock(&backend_panic_lock);
442 
443 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
444 		if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
445 			(void) pthread_mutex_unlock(&bes[i]->be_lock);
446 	}
447 
448 	va_start(args, format);
449 	configd_vcritical(format, args);
450 	va_end(args);
451 
452 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
453 		timespec_t rel;
454 
455 		rel.tv_sec = 0;
456 		rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
457 
458 		if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
459 			if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
460 			    &rel) != 0)
461 				failed++;
462 		}
463 	}
464 	if (failed) {
465 		configd_critical("unable to quiesce database\n");
466 	}
467 
468 	if (backend_panic_abort)
469 		abort();
470 
471 	exit(CONFIGD_EXIT_DATABASE_BAD);
472 }
473 
474 /*
475  * Returns
476  *   _SUCCESS
477  *   _DONE - callback aborted query
478  *   _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
479  */
480 static int
481 backend_error(sqlite_backend_t *be, int error, char *errmsg)
482 {
483 	if (error == SQLITE_OK)
484 		return (REP_PROTOCOL_SUCCESS);
485 
486 	switch (error) {
487 	case SQLITE_ABORT:
488 		free(errmsg);
489 		return (REP_PROTOCOL_DONE);
490 
491 	case SQLITE_NOMEM:
492 	case SQLITE_FULL:
493 	case SQLITE_TOOBIG:
494 		free(errmsg);
495 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
496 
497 	default:
498 		backend_panic("%s: db error: %s", be->be_path, errmsg);
499 		/*NOTREACHED*/
500 	}
501 }
502 
503 static void
504 backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
505 {
506 	char **out = (char **)out_arg;
507 
508 	while (out_sz-- > 0)
509 		free(*out++);
510 	free(out_arg);
511 }
512 
513 /*
514  * builds a inverse-time-sorted array of backup files.  The path is a
515  * a single buffer, and the pointers look like:
516  *
517  *	/this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
518  *	^pathname		^	       ^(pathname+pathlen)
519  *				basename
520  *
521  * dirname will either be pathname, or ".".
522  *
523  * Returns the number of elements in the array, 0 if there are no previous
524  * backups, or -1 on error.
525  */
526 static ssize_t
527 backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
528 {
529 	char b_start, b_end;
530 	DIR *dir;
531 	char **out = NULL;
532 	char *name, *p;
533 	char *dirname, *basename;
534 	char *pathend;
535 	struct dirent *ent;
536 
537 	size_t count = 0;
538 	size_t baselen;
539 
540 	/*
541 	 * year, month, day, hour, min, sec, plus an '_'.
542 	 */
543 	const size_t ndigits = 4 + 5*2 + 1;
544 	const size_t baroffset = 4 + 2*2;
545 
546 	size_t idx;
547 
548 	pathend = pathname + pathlen;
549 	b_end = *pathend;
550 	*pathend = '\0';
551 
552 	basename = strrchr(pathname, '/');
553 
554 	if (basename != NULL) {
555 		assert(pathend > pathname && basename < pathend);
556 		basename++;
557 		dirname = pathname;
558 	} else {
559 		basename = pathname;
560 		dirname = ".";
561 	}
562 
563 	baselen = strlen(basename);
564 
565 	/*
566 	 * munge the string temporarily for the opendir(), then restore it.
567 	 */
568 	b_start = basename[0];
569 
570 	basename[0] = '\0';
571 	dir = opendir(dirname);
572 	basename[0] = b_start;		/* restore path */
573 
574 	if (dir == NULL)
575 		goto fail;
576 
577 
578 	while ((ent = readdir(dir)) != NULL) {
579 		/*
580 		 * Must match:
581 		 *	basename-YYYYMMDD_HHMMSS
582 		 * or we ignore it.
583 		 */
584 		if (strncmp(ent->d_name, basename, baselen) != 0)
585 			continue;
586 
587 		name = ent->d_name;
588 		if (name[baselen] != '-')
589 			continue;
590 
591 		p = name + baselen + 1;
592 
593 		for (idx = 0; idx < ndigits; idx++) {
594 			char c = p[idx];
595 			if (idx == baroffset && c != '_')
596 				break;
597 			if (idx != baroffset && (c < '0' || c > '9'))
598 				break;
599 		}
600 		if (idx != ndigits || p[idx] != '\0')
601 			continue;
602 
603 		/*
604 		 * We have a match.  insertion-sort it into our list.
605 		 */
606 		name = strdup(name);
607 		if (name == NULL)
608 			goto fail_closedir;
609 		p = strrchr(name, '-');
610 
611 		for (idx = 0; idx < count; idx++) {
612 			char *tmp = out[idx];
613 			char *tp = strrchr(tmp, '-');
614 
615 			int cmp = strcmp(p, tp);
616 			if (cmp == 0)
617 				cmp = strcmp(name, tmp);
618 
619 			if (cmp == 0) {
620 				free(name);
621 				name = NULL;
622 				break;
623 			} else if (cmp > 0) {
624 				out[idx] = name;
625 				name = tmp;
626 				p = tp;
627 			}
628 		}
629 
630 		if (idx == count) {
631 			char **new_out = realloc(out,
632 			    (count + 1) * sizeof (*out));
633 
634 			if (new_out == NULL) {
635 				free(name);
636 				goto fail_closedir;
637 			}
638 
639 			out = new_out;
640 			out[count++] = name;
641 		} else {
642 			assert(name == NULL);
643 		}
644 	}
645 	(void) closedir(dir);
646 
647 	basename[baselen] = b_end;
648 
649 	*out_arg = (const char **)out;
650 	return (count);
651 
652 fail_closedir:
653 	(void) closedir(dir);
654 fail:
655 	basename[0] = b_start;
656 	*pathend = b_end;
657 
658 	backend_backup_cleanup((const char **)out, count);
659 
660 	*out_arg = NULL;
661 	return (-1);
662 }
663 
664 /*
665  * Copies the repository path into out, a buffer of out_len bytes,
666  * removes the ".db" (or whatever) extension, and, if name is non-NULL,
667  * appends "-name" to it.  If name is non-NULL, it can fail with:
668  *
669  *	_TRUNCATED	will not fit in buffer.
670  *	_BAD_REQUEST	name is not a valid identifier
671  */
672 static rep_protocol_responseid_t
673 backend_backup_base(sqlite_backend_t *be, const char *name,
674     char *out, size_t out_len)
675 {
676 	char *p, *q;
677 	size_t len;
678 
679 	/*
680 	 * for paths of the form /path/to/foo.db, we truncate at the final
681 	 * '.'.
682 	 */
683 	(void) strlcpy(out, be->be_path, out_len);
684 
685 	p = strrchr(out, '/');
686 	q = strrchr(out, '.');
687 
688 	if (p != NULL && q != NULL && q > p)
689 		*q = 0;
690 
691 	if (name != NULL) {
692 		len = strlen(out);
693 		assert(len < out_len);
694 
695 		out += len;
696 		out_len -= len;
697 
698 		len = strlen(name);
699 
700 		/*
701 		 * verify that the name tag is entirely alphabetic,
702 		 * non-empty, and not too long.
703 		 */
704 		if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
705 		    uu_check_name(name, UU_NAME_DOMAIN) < 0)
706 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
707 
708 		if (snprintf(out, out_len, "-%s", name) >= out_len)
709 			return (REP_PROTOCOL_FAIL_TRUNCATED);
710 	}
711 
712 	return (REP_PROTOCOL_SUCCESS);
713 }
714 
715 /*
716  * See if a backup is needed.  We do a backup unless both files are
717  * byte-for-byte identical.
718  */
719 static int
720 backend_check_backup_needed(const char *rep_name, const char *backup_name)
721 {
722 	int repfd = open(rep_name, O_RDONLY);
723 	int fd = open(backup_name, O_RDONLY);
724 	struct stat s_rep, s_backup;
725 	int c1, c2;
726 
727 	FILE *f_rep = NULL;
728 	FILE *f_backup = NULL;
729 
730 	if (repfd < 0 || fd < 0)
731 		goto fail;
732 
733 	if (fstat(repfd, &s_rep) < 0 || fstat(fd, &s_backup) < 0)
734 		goto fail;
735 
736 	/*
737 	 * if they are the same file, we need to do a backup to break the
738 	 * hard link or symlink involved.
739 	 */
740 	if (s_rep.st_ino == s_backup.st_ino && s_rep.st_dev == s_backup.st_dev)
741 		goto fail;
742 
743 	if (s_rep.st_size != s_backup.st_size)
744 		goto fail;
745 
746 	if ((f_rep = fdopen(repfd, "r")) == NULL ||
747 	    (f_backup = fdopen(fd, "r")) == NULL)
748 		goto fail;
749 
750 	do {
751 		c1 = getc(f_rep);
752 		c2 = getc(f_backup);
753 		if (c1 != c2)
754 			goto fail;
755 	} while (c1 != EOF);
756 
757 	if (!ferror(f_rep) && !ferror(f_backup)) {
758 		(void) fclose(f_rep);
759 		(void) fclose(f_backup);
760 		(void) close(repfd);
761 		(void) close(fd);
762 		return (0);
763 	}
764 
765 fail:
766 	if (f_rep != NULL)
767 		(void) fclose(f_rep);
768 	if (f_backup != NULL)
769 		(void) fclose(f_backup);
770 	if (repfd >= 0)
771 		(void) close(repfd);
772 	if (fd >= 0)
773 		(void) close(fd);
774 	return (1);
775 }
776 
777 /*
778  * This interface is called to perform the actual copy
779  *
780  * Return:
781  *	_FAIL_UNKNOWN		read/write fails
782  *	_FAIL_NO_RESOURCES	out of memory
783  *	_SUCCESS		copy succeeds
784  */
785 static rep_protocol_responseid_t
786 backend_do_copy(const char *src, int srcfd, const char *dst,
787     int dstfd, size_t *sz)
788 {
789 	char *buf;
790 	off_t nrd, nwr, n, r_off = 0, w_off = 0;
791 
792 	if ((buf = malloc(8192)) == NULL)
793 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
794 
795 	while ((nrd = read(srcfd, buf, 8192)) != 0) {
796 		if (nrd < 0) {
797 			if (errno == EINTR)
798 				continue;
799 
800 			configd_critical(
801 			    "Backend copy failed: fails to read from %s "
802 			    "at offset %d: %s\n", src, r_off, strerror(errno));
803 			free(buf);
804 			return (REP_PROTOCOL_FAIL_UNKNOWN);
805 		}
806 
807 		r_off += nrd;
808 
809 		nwr = 0;
810 		do {
811 			if ((n = write(dstfd, &buf[nwr], nrd - nwr)) < 0) {
812 				if (errno == EINTR)
813 					continue;
814 
815 				configd_critical(
816 				    "Backend copy failed: fails to write to %s "
817 				    "at offset %d: %s\n", dst, w_off,
818 				    strerror(errno));
819 				free(buf);
820 				return (REP_PROTOCOL_FAIL_UNKNOWN);
821 			}
822 
823 			nwr += n;
824 			w_off += n;
825 
826 		} while (nwr < nrd);
827 	}
828 
829 	if (sz)
830 		*sz = w_off;
831 
832 	free(buf);
833 	return (REP_PROTOCOL_SUCCESS);
834 }
835 
836 /*
837  * Can return:
838  *	_BAD_REQUEST		name is not valid
839  *	_TRUNCATED		name is too long for current repository path
840  *	_UNKNOWN		failed for unknown reason (details written to
841  *				console)
842  *	_BACKEND_READONLY	backend is not writable
843  *	_NO_RESOURCES		out of memory
844  *	_SUCCESS		Backup completed successfully.
845  */
846 static rep_protocol_responseid_t
847 backend_create_backup_locked(sqlite_backend_t *be, const char *name)
848 {
849 	const char **old_list;
850 	ssize_t old_sz;
851 	ssize_t old_max = max_repository_backups;
852 	ssize_t cur;
853 	char *finalname;
854 	char *finalpath;
855 	char *tmppath;
856 	int infd, outfd;
857 	size_t len;
858 	time_t now;
859 	struct tm now_tm;
860 	rep_protocol_responseid_t result;
861 
862 	if ((finalpath = malloc(PATH_MAX)) == NULL)
863 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
864 
865 	if ((tmppath = malloc(PATH_MAX)) == NULL) {
866 		free(finalpath);
867 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
868 	}
869 
870 	if (be->be_readonly) {
871 		result = REP_PROTOCOL_FAIL_BACKEND_READONLY;
872 		goto out;
873 	}
874 
875 	result = backend_backup_base(be, name, finalpath, PATH_MAX);
876 	if (result != REP_PROTOCOL_SUCCESS)
877 		goto out;
878 
879 	if (!backend_check_backup_needed(be->be_path, finalpath)) {
880 		result = REP_PROTOCOL_SUCCESS;
881 		goto out;
882 	}
883 
884 	/*
885 	 * remember the original length, and the basename location
886 	 */
887 	len = strlen(finalpath);
888 	finalname = strrchr(finalpath, '/');
889 	if (finalname != NULL)
890 		finalname++;
891 	else
892 		finalname = finalpath;
893 
894 	(void) strlcpy(tmppath, finalpath, PATH_MAX);
895 	if (strlcat(tmppath, "-tmpXXXXXX", PATH_MAX) >= PATH_MAX) {
896 		result = REP_PROTOCOL_FAIL_TRUNCATED;
897 		goto out;
898 	}
899 
900 	now = time(NULL);
901 	if (localtime_r(&now, &now_tm) == NULL) {
902 		configd_critical(
903 		    "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
904 		    be->be_path, strerror(errno));
905 		result = REP_PROTOCOL_FAIL_UNKNOWN;
906 		goto out;
907 	}
908 
909 	if (strftime(finalpath + len, PATH_MAX - len,
910 	    "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >= PATH_MAX - len) {
911 		result = REP_PROTOCOL_FAIL_TRUNCATED;
912 		goto out;
913 	}
914 
915 	infd = open(be->be_path, O_RDONLY);
916 	if (infd < 0) {
917 		configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
918 		    be->be_path, strerror(errno));
919 		result = REP_PROTOCOL_FAIL_UNKNOWN;
920 		goto out;
921 	}
922 
923 	outfd = mkstemp(tmppath);
924 	if (outfd < 0) {
925 		configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
926 		    name, tmppath, strerror(errno));
927 		(void) close(infd);
928 		result = REP_PROTOCOL_FAIL_UNKNOWN;
929 		goto out;
930 	}
931 
932 	if ((result = backend_do_copy((const char *)be->be_path, infd,
933 	    (const char *)tmppath, outfd, NULL)) != REP_PROTOCOL_SUCCESS)
934 		goto fail;
935 
936 	/*
937 	 * grab the old list before doing our re-name.
938 	 */
939 	if (old_max > 0)
940 		old_sz = backend_backup_get_prev(finalpath, len, &old_list);
941 
942 	if (rename(tmppath, finalpath) < 0) {
943 		configd_critical(
944 		    "\"%s\" backup failed: rename(%s, %s): %s\n",
945 		    name, tmppath, finalpath, strerror(errno));
946 		result = REP_PROTOCOL_FAIL_UNKNOWN;
947 		goto fail;
948 	}
949 
950 	tmppath[len] = 0;	/* strip -XXXXXX, for reference symlink */
951 
952 	(void) unlink(tmppath);
953 	if (symlink(finalname, tmppath) < 0) {
954 		configd_critical(
955 		    "\"%s\" backup completed, but updating "
956 		    "\"%s\" symlink to \"%s\" failed: %s\n",
957 		    name, tmppath, finalname, strerror(errno));
958 	}
959 
960 	if (old_max > 0 && old_sz > 0) {
961 		/* unlink all but the first (old_max - 1) files */
962 		for (cur = old_max - 1; cur < old_sz; cur++) {
963 			(void) strlcpy(finalname, old_list[cur],
964 			    PATH_MAX - (finalname - finalpath));
965 			if (unlink(finalpath) < 0)
966 				configd_critical(
967 				    "\"%s\" backup completed, but removing old "
968 				    "file \"%s\" failed: %s\n",
969 				    name, finalpath, strerror(errno));
970 		}
971 
972 		backend_backup_cleanup(old_list, old_sz);
973 	}
974 
975 	result = REP_PROTOCOL_SUCCESS;
976 
977 fail:
978 	(void) close(infd);
979 	(void) close(outfd);
980 	if (result != REP_PROTOCOL_SUCCESS)
981 		(void) unlink(tmppath);
982 
983 out:
984 	free(finalpath);
985 	free(tmppath);
986 
987 	return (result);
988 }
989 
990 /*
991  * Check if value_tbl has been upgraded in the main database,  and
992  * if not (if the value_order column is not present),  and do_upgrade is true,
993  * upgrade value_tbl in repository to contain the additional value_order
994  * column. The version of sqlite used means ALTER TABLE is not
995  * available, so we cannot simply use "ALTER TABLE value_tbl ADD COLUMN".
996  * Rather we need to create a temporary table with the additional column,
997  * import the value_tbl, drop the original value_tbl, recreate the value_tbl
998  * with the additional column, import the values from value_tbl_tmp,
999  * reindex and finally drop value_tbl_tmp.  During boot, we wish to check
1000  * if the repository has been upgraded before it is writable,  so that
1001  * property value retrieval can use the appropriate form of the SELECT
1002  * statement that retrieves property values.  As a result, we need to check
1003  * if the repository has been upgraded prior to the point when we can
1004  * actually carry out the update.
1005  */
1006 void
1007 backend_check_upgrade(sqlite_backend_t *be, boolean_t do_upgrade)
1008 {
1009 	char *errp;
1010 	int r;
1011 
1012 	if (be_normal_upgraded)
1013 		return;
1014 	/*
1015 	 * Test if upgrade is needed. If value_order column does not exist,
1016 	 * we need to upgrade the schema.
1017 	 */
1018 	r = sqlite_exec(be->be_db, "SELECT value_order FROM value_tbl LIMIT 1;",
1019 	    NULL, NULL, NULL);
1020 	if (r == SQLITE_ERROR && do_upgrade) {
1021 		/* No value_order column - needs upgrade */
1022 		configd_info("Upgrading SMF repository format...");
1023 		r = sqlite_exec(be->be_db,
1024 		    "BEGIN TRANSACTION; "
1025 		    "CREATE TABLE value_tbl_tmp ( "
1026 		    "value_id   INTEGER NOT NULL, "
1027 		    "value_type CHAR(1) NOT NULL, "
1028 		    "value_value VARCHAR NOT NULL, "
1029 		    "value_order INTEGER DEFAULT 0); "
1030 		    "INSERT INTO value_tbl_tmp "
1031 		    "(value_id, value_type, value_value) "
1032 		    "SELECT value_id, value_type, value_value FROM value_tbl; "
1033 		    "DROP TABLE value_tbl; "
1034 		    "CREATE TABLE value_tbl( "
1035 		    "value_id   INTEGER NOT NULL, "
1036 		    "value_type CHAR(1) NOT NULL, "
1037 		    "value_value VARCHAR NOT NULL, "
1038 		    "value_order INTEGER DEFAULT 0); "
1039 		    "INSERT INTO value_tbl SELECT * FROM value_tbl_tmp; "
1040 		    "CREATE INDEX value_tbl_id ON value_tbl (value_id); "
1041 		    "DROP TABLE value_tbl_tmp; "
1042 		    "COMMIT TRANSACTION; "
1043 		    "VACUUM; ",
1044 		    NULL, NULL, &errp);
1045 		if (r == SQLITE_OK) {
1046 			configd_info("SMF repository upgrade is complete.");
1047 		} else {
1048 			backend_panic("%s: repository upgrade failed: %s",
1049 			    be->be_path, errp);
1050 			/* NOTREACHED */
1051 		}
1052 	}
1053 	if (r == SQLITE_OK)
1054 		be_normal_upgraded = B_TRUE;
1055 	else
1056 		be_normal_upgraded = B_FALSE;
1057 }
1058 
1059 static int
1060 backend_check_readonly(sqlite_backend_t *be, int writing, hrtime_t t)
1061 {
1062 	char *errp;
1063 	struct sqlite *new;
1064 	int r;
1065 
1066 	assert(be->be_readonly);
1067 	assert(be == bes[BACKEND_TYPE_NORMAL]);
1068 
1069 	/*
1070 	 * If we don't *need* to be writable, only check every once in a
1071 	 * while.
1072 	 */
1073 	if (!writing) {
1074 		if ((uint64_t)(t - be->be_lastcheck) <
1075 		    BACKEND_READONLY_CHECK_INTERVAL)
1076 			return (REP_PROTOCOL_SUCCESS);
1077 		be->be_lastcheck = t;
1078 	}
1079 
1080 	new = sqlite_open(be->be_path, 0600, &errp);
1081 	if (new == NULL) {
1082 		backend_panic("reopening %s: %s\n", be->be_path, errp);
1083 		/*NOTREACHED*/
1084 	}
1085 	r = backend_is_readonly(new, be->be_path);
1086 
1087 	if (r != SQLITE_OK) {
1088 		sqlite_close(new);
1089 		if (writing)
1090 			return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
1091 		return (REP_PROTOCOL_SUCCESS);
1092 	}
1093 
1094 	/*
1095 	 * We can write!  Swap the db handles, mark ourself writable,
1096 	 * upgrade if necessary,  and make a backup.
1097 	 */
1098 	sqlite_close(be->be_db);
1099 	be->be_db = new;
1100 	be->be_readonly = 0;
1101 
1102 	if (be->be_type == BACKEND_TYPE_NORMAL)
1103 		backend_check_upgrade(be, B_TRUE);
1104 
1105 	if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
1106 	    REP_PROTOCOL_SUCCESS) {
1107 		configd_critical(
1108 		    "unable to create \"%s\" backup of \"%s\"\n",
1109 		    REPOSITORY_BOOT_BACKUP, be->be_path);
1110 	}
1111 
1112 	return (REP_PROTOCOL_SUCCESS);
1113 }
1114 
1115 /*
1116  * If t is not BACKEND_TYPE_NORMAL, can fail with
1117  *   _BACKEND_ACCESS - backend does not exist
1118  *
1119  * If writing is nonzero, can also fail with
1120  *   _BACKEND_READONLY - backend is read-only
1121  */
1122 static int
1123 backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
1124 {
1125 	sqlite_backend_t *be = NULL;
1126 	hrtime_t ts, vts;
1127 
1128 	*bep = NULL;
1129 
1130 	assert(t == BACKEND_TYPE_NORMAL ||
1131 	    t == BACKEND_TYPE_NONPERSIST);
1132 
1133 	be = bes[t];
1134 	if (t == BACKEND_TYPE_NORMAL)
1135 		assert(be != NULL);		/* should always be there */
1136 
1137 	if (be == NULL)
1138 		return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
1139 
1140 	if (backend_panic_thread != 0)
1141 		backend_panic(NULL);		/* don't proceed */
1142 
1143 	ts = gethrtime();
1144 	vts = gethrvtime();
1145 	(void) pthread_mutex_lock(&be->be_lock);
1146 	UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
1147 
1148 	if (backend_panic_thread != 0) {
1149 		(void) pthread_mutex_unlock(&be->be_lock);
1150 		backend_panic(NULL);		/* don't proceed */
1151 	}
1152 	be->be_thread = pthread_self();
1153 
1154 	if (be->be_readonly) {
1155 		int r;
1156 		assert(t == BACKEND_TYPE_NORMAL);
1157 
1158 		r = backend_check_readonly(be, writing, ts);
1159 		if (r != REP_PROTOCOL_SUCCESS) {
1160 			be->be_thread = 0;
1161 			(void) pthread_mutex_unlock(&be->be_lock);
1162 			return (r);
1163 		}
1164 	}
1165 
1166 	if (backend_do_trace)
1167 		(void) sqlite_trace(be->be_db, backend_trace_sql, be);
1168 	else
1169 		(void) sqlite_trace(be->be_db, NULL, NULL);
1170 
1171 	be->be_writing = writing;
1172 	*bep = be;
1173 	return (REP_PROTOCOL_SUCCESS);
1174 }
1175 
1176 static void
1177 backend_unlock(sqlite_backend_t *be)
1178 {
1179 	be->be_writing = 0;
1180 	be->be_thread = 0;
1181 	(void) pthread_mutex_unlock(&be->be_lock);
1182 }
1183 
1184 static void
1185 backend_destroy(sqlite_backend_t *be)
1186 {
1187 	if (be->be_db != NULL) {
1188 		sqlite_close(be->be_db);
1189 		be->be_db = NULL;
1190 	}
1191 	be->be_thread = 0;
1192 	(void) pthread_mutex_unlock(&be->be_lock);
1193 	(void) pthread_mutex_destroy(&be->be_lock);
1194 }
1195 
1196 static void
1197 backend_create_finish(backend_type_t backend_id, sqlite_backend_t *be)
1198 {
1199 	assert(MUTEX_HELD(&be->be_lock));
1200 	assert(be == &be_info[backend_id]);
1201 
1202 	bes[backend_id] = be;
1203 	(void) pthread_mutex_unlock(&be->be_lock);
1204 }
1205 
1206 static int
1207 backend_fd_write(int fd, const char *mess)
1208 {
1209 	int len = strlen(mess);
1210 	int written;
1211 
1212 	while (len > 0) {
1213 		if ((written = write(fd, mess, len)) < 0)
1214 			return (-1);
1215 		mess += written;
1216 		len -= written;
1217 	}
1218 	return (0);
1219 }
1220 
1221 /*
1222  * Can return:
1223  *	_BAD_REQUEST		name is not valid
1224  *	_TRUNCATED		name is too long for current repository path
1225  *	_UNKNOWN		failed for unknown reason (details written to
1226  *				console)
1227  *	_BACKEND_READONLY	backend is not writable
1228  *	_NO_RESOURCES		out of memory
1229  *	_SUCCESS		Backup completed successfully.
1230  */
1231 rep_protocol_responseid_t
1232 backend_create_backup(const char *name)
1233 {
1234 	rep_protocol_responseid_t result;
1235 	sqlite_backend_t *be;
1236 
1237 	result = backend_lock(BACKEND_TYPE_NORMAL, 0, &be);
1238 	assert(result == REP_PROTOCOL_SUCCESS);
1239 
1240 	result = backend_create_backup_locked(be, name);
1241 	backend_unlock(be);
1242 
1243 	return (result);
1244 }
1245 
1246 /*
1247  * Copy the repository.  If the sw_back flag is not set, we are
1248  * copying the repository from the default location under /etc/svc to
1249  * the tmpfs /etc/svc/volatile location.  If the flag is set, we are
1250  * copying back to the /etc/svc location from the volatile location
1251  * after manifest-import is completed.
1252  *
1253  * Can return:
1254  *
1255  *	REP_PROTOCOL_SUCCESS		successful copy and rename
1256  *	REP_PROTOCOL_FAIL_UNKNOWN	file operation error
1257  *	REP_PROTOCOL_FAIL_NO_RESOURCES	out of memory
1258  */
1259 static rep_protocol_responseid_t
1260 backend_switch_copy(const char *src, const char *dst, int sw_back)
1261 {
1262 	int srcfd, dstfd;
1263 	char *tmppath = malloc(PATH_MAX);
1264 	rep_protocol_responseid_t res = REP_PROTOCOL_SUCCESS;
1265 	struct stat s_buf;
1266 	size_t cpsz, sz;
1267 
1268 	if (tmppath == NULL) {
1269 		res = REP_PROTOCOL_FAIL_NO_RESOURCES;
1270 		goto out;
1271 	}
1272 
1273 	/*
1274 	 * Create and open the related db files
1275 	 */
1276 	(void) strlcpy(tmppath, dst, PATH_MAX);
1277 	sz = strlcat(tmppath, "-XXXXXX", PATH_MAX);
1278 	assert(sz < PATH_MAX);
1279 	if (sz >= PATH_MAX) {
1280 		configd_critical(
1281 		    "Backend copy failed: strlcat %s: overflow\n", tmppath);
1282 		abort();
1283 	}
1284 
1285 	if ((dstfd = mkstemp(tmppath)) < 0) {
1286 		configd_critical("Backend copy failed: mkstemp %s: %s\n",
1287 		    tmppath, strerror(errno));
1288 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1289 		goto out;
1290 	}
1291 
1292 	if ((srcfd = open(src, O_RDONLY)) < 0) {
1293 		configd_critical("Backend copy failed: opening %s: %s\n",
1294 		    src, strerror(errno));
1295 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1296 		goto errexit;
1297 	}
1298 
1299 	/*
1300 	 * fstat the backend before copy for sanity check.
1301 	 */
1302 	if (fstat(srcfd, &s_buf) < 0) {
1303 		configd_critical("Backend copy failed: fstat %s: %s\n",
1304 		    src, strerror(errno));
1305 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1306 		goto errexit;
1307 	}
1308 
1309 	if ((res = backend_do_copy(src, srcfd, dst, dstfd, &cpsz)) !=
1310 	    REP_PROTOCOL_SUCCESS)
1311 		goto errexit;
1312 
1313 	if (cpsz != s_buf.st_size) {
1314 		configd_critical("Backend copy failed: incomplete copy\n");
1315 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1316 		goto errexit;
1317 	}
1318 
1319 	/*
1320 	 * Rename tmppath to dst
1321 	 */
1322 	if (rename(tmppath, dst) < 0) {
1323 		configd_critical(
1324 		    "Backend copy failed: rename %s to %s: %s\n",
1325 		    tmppath, dst, strerror(errno));
1326 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1327 	}
1328 
1329 errexit:
1330 	if (res != REP_PROTOCOL_SUCCESS && unlink(tmppath) < 0)
1331 		configd_critical(
1332 		    "Backend copy failed: remove %s: %s\n",
1333 		    tmppath, strerror(errno));
1334 
1335 	(void) close(srcfd);
1336 	(void) close(dstfd);
1337 
1338 out:
1339 	free(tmppath);
1340 	if (sw_back) {
1341 		if (unlink(src) < 0)
1342 			configd_critical(
1343 			    "Backend copy failed: remove %s: %s\n",
1344 			    src, strerror(errno));
1345 	}
1346 
1347 	return (res);
1348 }
1349 
1350 /*
1351  * Perform sanity check on the repository.
1352  * Return 0 if check succeeds or -1 if fails.
1353  */
1354 static int
1355 backend_switch_check(struct sqlite *be_db, char **errp)
1356 {
1357 	struct run_single_int_info info;
1358 	uint32_t val = -1UL;
1359 	int r;
1360 
1361 	info.rs_out = &val;
1362 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1363 
1364 	r = sqlite_exec(be_db,
1365 	    "SELECT schema_version FROM schema_version;",
1366 	    run_single_int_callback, &info, errp);
1367 
1368 	if (r == SQLITE_OK &&
1369 	    info.rs_result != REP_PROTOCOL_FAIL_NOT_FOUND &&
1370 	    val == BACKEND_SCHEMA_VERSION)
1371 		return (0);
1372 	else
1373 		return (-1);
1374 }
1375 
1376 /*
1377  * Backend switch entry point.  It is called to perform the backend copy and
1378  * switch from src to dst.  First, it blocks all other clients from accessing
1379  * the repository by calling backend_lock to lock the repository.  Upon
1380  * successful lock, copying and switching of the repository are performed.
1381  *
1382  * Can return:
1383  *	REP_PROTOCOL_SUCCESS			successful switch
1384  *	REP_PROTOCOL_FAIL_BACKEND_ACCESS	backen access fails
1385  *	REP_PROTOCOL_FAIL_BACKEND_READONLY	backend is not writable
1386  *	REP_PROTOCOL_FAIL_UNKNOWN		file operation error
1387  *	REP_PROTOCOL_FAIL_NO_RESOURCES		out of memory
1388  */
1389 rep_protocol_responseid_t
1390 backend_switch(int sw_back)
1391 {
1392 	rep_protocol_responseid_t result;
1393 	sqlite_backend_t *be;
1394 	struct sqlite *new;
1395 	char *errp;
1396 	const char *dst;
1397 
1398 	result = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
1399 	if (result != REP_PROTOCOL_SUCCESS)
1400 		return (result);
1401 
1402 	if (sw_back) {
1403 		dst = REPOSITORY_DB;
1404 	} else {
1405 		dst = FAST_REPOSITORY_DB;
1406 	}
1407 
1408 	/*
1409 	 * Do the actual copy and rename
1410 	 */
1411 	result = backend_switch_copy(be->be_path, dst, sw_back);
1412 	if (result != REP_PROTOCOL_SUCCESS) {
1413 		goto errout;
1414 	}
1415 
1416 	/*
1417 	 * Do the backend sanity check and switch
1418 	 */
1419 	new = sqlite_open(dst, 0600, &errp);
1420 	if (new != NULL) {
1421 		/*
1422 		 * Sanity check
1423 		 */
1424 		if (backend_switch_check(new, &errp) == 0) {
1425 			free((char *)be->be_path);
1426 			be->be_path = strdup(dst);
1427 			if (be->be_path == NULL) {
1428 				configd_critical(
1429 				    "Backend switch failed: strdup %s: %s\n",
1430 				    dst, strerror(errno));
1431 				result = REP_PROTOCOL_FAIL_NO_RESOURCES;
1432 				sqlite_close(new);
1433 			} else {
1434 				sqlite_close(be->be_db);
1435 				be->be_db = new;
1436 			}
1437 		} else {
1438 			configd_critical(
1439 			    "Backend switch failed: integrity check %s: %s\n",
1440 			    dst, errp);
1441 			result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1442 		}
1443 	} else {
1444 		configd_critical("Backend switch failed: sqlite_open %s: %s\n",
1445 		    dst, errp);
1446 		result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1447 	}
1448 
1449 errout:
1450 	backend_unlock(be);
1451 	return (result);
1452 }
1453 
1454 /*
1455  * This routine is called to attempt the recovery of
1456  * the most recent valid repository if possible when configd
1457  * is restarted for some reasons or when system crashes
1458  * during the switch operation.  The repository databases
1459  * referenced here are indicators of successful switch
1460  * operations.
1461  */
1462 static void
1463 backend_switch_recovery(void)
1464 {
1465 	const char *fast_db = FAST_REPOSITORY_DB;
1466 	char *errp;
1467 	struct stat s_buf;
1468 	struct sqlite *be_db;
1469 
1470 
1471 	/*
1472 	 * A good transient db containing most recent data can
1473 	 * exist if system or svc.configd crashes during the
1474 	 * switch operation.  If that is the case, check its
1475 	 * integrity and use it.
1476 	 */
1477 	if (stat(fast_db, &s_buf) < 0) {
1478 		return;
1479 	}
1480 
1481 	/*
1482 	 * Do sanity check on the db
1483 	 */
1484 	be_db = sqlite_open(fast_db, 0600, &errp);
1485 
1486 	if (be_db != NULL) {
1487 		if (backend_switch_check(be_db, &errp) == 0)
1488 			(void) backend_switch_copy(fast_db, REPOSITORY_DB, 1);
1489 	}
1490 
1491 	(void) unlink(fast_db);
1492 }
1493 
1494 /*ARGSUSED*/
1495 static int
1496 backend_integrity_callback(void *private, int narg, char **vals, char **cols)
1497 {
1498 	char **out = private;
1499 	char *old = *out;
1500 	char *new;
1501 	const char *info;
1502 	size_t len;
1503 	int x;
1504 
1505 	for (x = 0; x < narg; x++) {
1506 		if ((info = vals[x]) != NULL &&
1507 		    strcmp(info, "ok") != 0) {
1508 			len = (old == NULL)? 0 : strlen(old);
1509 			len += strlen(info) + 2;	/* '\n' + '\0' */
1510 
1511 			new = realloc(old, len);
1512 			if (new == NULL)
1513 				return (BACKEND_CALLBACK_ABORT);
1514 			if (old == NULL)
1515 				new[0] = 0;
1516 			old = *out = new;
1517 			(void) strlcat(new, info, len);
1518 			(void) strlcat(new, "\n", len);
1519 		}
1520 	}
1521 	return (BACKEND_CALLBACK_CONTINUE);
1522 }
1523 
1524 #define	BACKEND_CREATE_LOCKED		-2
1525 #define	BACKEND_CREATE_FAIL		-1
1526 #define	BACKEND_CREATE_SUCCESS		0
1527 #define	BACKEND_CREATE_READONLY		1
1528 #define	BACKEND_CREATE_NEED_INIT	2
1529 static int
1530 backend_create(backend_type_t backend_id, const char *db_file,
1531     sqlite_backend_t **bep)
1532 {
1533 	char *errp;
1534 	char *integrity_results = NULL;
1535 	sqlite_backend_t *be;
1536 	int r;
1537 	uint32_t val = -1UL;
1538 	struct run_single_int_info info;
1539 	int fd;
1540 
1541 	assert(backend_id >= 0 && backend_id < BACKEND_TYPE_TOTAL);
1542 
1543 	be = &be_info[backend_id];
1544 
1545 	assert(be->be_db == NULL);
1546 
1547 	(void) pthread_mutex_init(&be->be_lock, NULL);
1548 	(void) pthread_mutex_lock(&be->be_lock);
1549 
1550 	be->be_type = backend_id;
1551 	be->be_path = strdup(db_file);
1552 	if (be->be_path == NULL) {
1553 		perror("malloc");
1554 		goto fail;
1555 	}
1556 
1557 	be->be_db = sqlite_open(be->be_path, 0600, &errp);
1558 
1559 	if (be->be_db == NULL) {
1560 		if (strstr(errp, "out of memory") != NULL) {
1561 			configd_critical("%s: %s\n", db_file, errp);
1562 			free(errp);
1563 
1564 			goto fail;
1565 		}
1566 
1567 		/* report it as an integrity failure */
1568 		integrity_results = errp;
1569 		errp = NULL;
1570 		goto integrity_fail;
1571 	}
1572 
1573 	/*
1574 	 * check if we are inited and of the correct schema version
1575 	 *
1576 	 */
1577 	info.rs_out = &val;
1578 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1579 
1580 	r = sqlite_exec(be->be_db, "SELECT schema_version FROM schema_version;",
1581 	    run_single_int_callback, &info, &errp);
1582 	if (r == SQLITE_ERROR &&
1583 	    strcmp("no such table: schema_version", errp) == 0) {
1584 		free(errp);
1585 		/*
1586 		 * Could be an empty repository, could be pre-schema_version
1587 		 * schema.  Check for id_tbl, which has always been there.
1588 		 */
1589 		r = sqlite_exec(be->be_db, "SELECT count() FROM id_tbl;",
1590 		    NULL, NULL, &errp);
1591 		if (r == SQLITE_ERROR &&
1592 		    strcmp("no such table: id_tbl", errp) == 0) {
1593 			free(errp);
1594 			*bep = be;
1595 			return (BACKEND_CREATE_NEED_INIT);
1596 		}
1597 
1598 		configd_critical("%s: schema version mismatch\n", db_file);
1599 		goto fail;
1600 	}
1601 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1602 		free(errp);
1603 		*bep = NULL;
1604 		backend_destroy(be);
1605 		return (BACKEND_CREATE_LOCKED);
1606 	}
1607 	if (r == SQLITE_OK) {
1608 		if (info.rs_result == REP_PROTOCOL_FAIL_NOT_FOUND ||
1609 		    val != BACKEND_SCHEMA_VERSION) {
1610 			configd_critical("%s: schema version mismatch\n",
1611 			    db_file);
1612 			goto fail;
1613 		}
1614 	}
1615 
1616 	/*
1617 	 * pull in the whole database sequentially.
1618 	 */
1619 	if ((fd = open(db_file, O_RDONLY)) >= 0) {
1620 		size_t sz = 64 * 1024;
1621 		char *buffer = malloc(sz);
1622 		if (buffer != NULL) {
1623 			while (read(fd, buffer, sz) > 0)
1624 				;
1625 			free(buffer);
1626 		}
1627 		(void) close(fd);
1628 	}
1629 
1630 	/*
1631 	 * run an integrity check
1632 	 */
1633 	r = sqlite_exec(be->be_db, "PRAGMA integrity_check;",
1634 	    backend_integrity_callback, &integrity_results, &errp);
1635 
1636 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1637 		free(errp);
1638 		*bep = NULL;
1639 		backend_destroy(be);
1640 		return (BACKEND_CREATE_LOCKED);
1641 	}
1642 	if (r == SQLITE_ABORT) {
1643 		free(errp);
1644 		errp = NULL;
1645 		integrity_results = "out of memory running integrity check\n";
1646 	} else if (r != SQLITE_OK && integrity_results == NULL) {
1647 		integrity_results = errp;
1648 		errp = NULL;
1649 	}
1650 
1651 integrity_fail:
1652 	if (integrity_results != NULL) {
1653 		const char *fname = "/etc/svc/volatile/db_errors";
1654 		if ((fd = open(fname, O_CREAT|O_WRONLY|O_APPEND, 0600)) < 0) {
1655 			fname = NULL;
1656 		} else {
1657 			if (backend_fd_write(fd, "\n\n") < 0 ||
1658 			    backend_fd_write(fd, db_file) < 0 ||
1659 			    backend_fd_write(fd,
1660 			    ": PRAGMA integrity_check; failed.  Results:\n") <
1661 			    0 || backend_fd_write(fd, integrity_results) < 0 ||
1662 			    backend_fd_write(fd, "\n\n") < 0) {
1663 				fname = NULL;
1664 			}
1665 			(void) close(fd);
1666 		}
1667 
1668 		if (!is_main_repository ||
1669 		    backend_id == BACKEND_TYPE_NONPERSIST) {
1670 			if (fname != NULL)
1671 				configd_critical(
1672 				    "%s: integrity check failed. Details in "
1673 				    "%s\n", db_file, fname);
1674 			else
1675 				configd_critical(
1676 				    "%s: integrity check failed.\n",
1677 				    db_file);
1678 		} else {
1679 			(void) fprintf(stderr,
1680 "\n"
1681 "svc.configd: smf(5) database integrity check of:\n"
1682 "\n"
1683 "    %s\n"
1684 "\n"
1685 "  failed. The database might be damaged or a media error might have\n"
1686 "  prevented it from being verified.  Additional information useful to\n"
1687 "  your service provider%s%s\n"
1688 "\n"
1689 "  The system will not be able to boot until you have restored a working\n"
1690 "  database.  svc.startd(1M) will provide a sulogin(1M) prompt for recovery\n"
1691 "  purposes.  The command:\n"
1692 "\n"
1693 "    /lib/svc/bin/restore_repository\n"
1694 "\n"
1695 "  can be run to restore a backup version of your repository.  See\n"
1696 "  http://sun.com/msg/SMF-8000-MY for more information.\n"
1697 "\n",
1698 			    db_file,
1699 			    (fname == NULL)? ":\n\n" : " is in:\n\n    ",
1700 			    (fname == NULL)? integrity_results : fname);
1701 		}
1702 		free(errp);
1703 		goto fail;
1704 	}
1705 
1706 	/*
1707 	 * Simply do check if backend has been upgraded.  We do not wish
1708 	 * to actually carry out upgrade here - the main repository may
1709 	 * not be writable at this point.  Actual upgrade is carried out
1710 	 * via backend_check_readonly().  This check is done so that
1711 	 * we determine repository state - upgraded or not - and then
1712 	 * the appropriate SELECT statement (value-ordered or not)
1713 	 * can be used when retrieving property values early in boot.
1714 	 */
1715 	if (backend_id == BACKEND_TYPE_NORMAL)
1716 		backend_check_upgrade(be, B_FALSE);
1717 	/*
1718 	 * check if we are writable
1719 	 */
1720 	r = backend_is_readonly(be->be_db, be->be_path);
1721 
1722 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1723 		free(errp);
1724 		*bep = NULL;
1725 		backend_destroy(be);
1726 		return (BACKEND_CREATE_LOCKED);
1727 	}
1728 	if (r != SQLITE_OK && r != SQLITE_FULL) {
1729 		free(errp);
1730 		be->be_readonly = 1;
1731 		*bep = be;
1732 		return (BACKEND_CREATE_READONLY);
1733 	}
1734 
1735 	*bep = be;
1736 	return (BACKEND_CREATE_SUCCESS);
1737 
1738 fail:
1739 	*bep = NULL;
1740 	backend_destroy(be);
1741 	return (BACKEND_CREATE_FAIL);
1742 }
1743 
1744 /*
1745  * (arg & -arg) is, through the magic of twos-complement arithmetic, the
1746  * lowest set bit in arg.
1747  */
1748 static size_t
1749 round_up_to_p2(size_t arg)
1750 {
1751 	/*
1752 	 * Don't allow a zero result.
1753 	 */
1754 	assert(arg > 0 && ((ssize_t)arg > 0));
1755 
1756 	while ((arg & (arg - 1)) != 0)
1757 		arg += (arg & -arg);
1758 
1759 	return (arg);
1760 }
1761 
1762 /*
1763  * Returns
1764  *   _NO_RESOURCES - out of memory
1765  *   _BACKEND_ACCESS - backend type t (other than _NORMAL) doesn't exist
1766  *   _DONE - callback aborted query
1767  *   _SUCCESS
1768  */
1769 int
1770 backend_run(backend_type_t t, backend_query_t *q,
1771     backend_run_callback_f *cb, void *data)
1772 {
1773 	char *errmsg = NULL;
1774 	int ret;
1775 	sqlite_backend_t *be;
1776 	hrtime_t ts, vts;
1777 
1778 	if (q == NULL || q->bq_buf == NULL)
1779 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1780 
1781 	if ((ret = backend_lock(t, 0, &be)) != REP_PROTOCOL_SUCCESS)
1782 		return (ret);
1783 
1784 	ts = gethrtime();
1785 	vts = gethrvtime();
1786 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
1787 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1788 	ret = backend_error(be, ret, errmsg);
1789 	backend_unlock(be);
1790 
1791 	return (ret);
1792 }
1793 
1794 /*
1795  * Starts a "read-only" transaction -- i.e., locks out writers as long
1796  * as it is active.
1797  *
1798  * Fails with
1799  *   _NO_RESOURCES - out of memory
1800  *
1801  * If t is not _NORMAL, can also fail with
1802  *   _BACKEND_ACCESS - backend does not exist
1803  *
1804  * If writable is true, can also fail with
1805  *   _BACKEND_READONLY
1806  */
1807 static int
1808 backend_tx_begin_common(backend_type_t t, backend_tx_t **txp, int writable)
1809 {
1810 	backend_tx_t *ret;
1811 	sqlite_backend_t *be;
1812 	int r;
1813 
1814 	*txp = NULL;
1815 
1816 	ret = uu_zalloc(sizeof (*ret));
1817 	if (ret == NULL)
1818 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1819 
1820 	if ((r = backend_lock(t, writable, &be)) != REP_PROTOCOL_SUCCESS) {
1821 		uu_free(ret);
1822 		return (r);
1823 	}
1824 
1825 	ret->bt_be = be;
1826 	ret->bt_readonly = !writable;
1827 	ret->bt_type = t;
1828 	ret->bt_full = 0;
1829 
1830 	*txp = ret;
1831 	return (REP_PROTOCOL_SUCCESS);
1832 }
1833 
1834 int
1835 backend_tx_begin_ro(backend_type_t t, backend_tx_t **txp)
1836 {
1837 	return (backend_tx_begin_common(t, txp, 0));
1838 }
1839 
1840 static void
1841 backend_tx_end(backend_tx_t *tx)
1842 {
1843 	sqlite_backend_t *be;
1844 
1845 	be = tx->bt_be;
1846 
1847 	if (tx->bt_full) {
1848 		struct sqlite *new;
1849 
1850 		/*
1851 		 * sqlite tends to be sticky with SQLITE_FULL, so we try
1852 		 * to get a fresh database handle if we got a FULL warning
1853 		 * along the way.  If that fails, no harm done.
1854 		 */
1855 		new = sqlite_open(be->be_path, 0600, NULL);
1856 		if (new != NULL) {
1857 			sqlite_close(be->be_db);
1858 			be->be_db = new;
1859 		}
1860 	}
1861 	backend_unlock(be);
1862 	tx->bt_be = NULL;
1863 	uu_free(tx);
1864 }
1865 
1866 void
1867 backend_tx_end_ro(backend_tx_t *tx)
1868 {
1869 	assert(tx->bt_readonly);
1870 	backend_tx_end(tx);
1871 }
1872 
1873 /*
1874  * Fails with
1875  *   _NO_RESOURCES - out of memory
1876  *   _BACKEND_ACCESS
1877  *   _BACKEND_READONLY
1878  */
1879 int
1880 backend_tx_begin(backend_type_t t, backend_tx_t **txp)
1881 {
1882 	int r;
1883 	char *errmsg;
1884 	hrtime_t ts, vts;
1885 
1886 	r = backend_tx_begin_common(t, txp, 1);
1887 	if (r != REP_PROTOCOL_SUCCESS)
1888 		return (r);
1889 
1890 	ts = gethrtime();
1891 	vts = gethrvtime();
1892 	r = sqlite_exec((*txp)->bt_be->be_db, "BEGIN TRANSACTION", NULL, NULL,
1893 	    &errmsg);
1894 	UPDATE_TOTALS((*txp)->bt_be, bt_exec, ts, vts);
1895 	if (r == SQLITE_FULL)
1896 		(*txp)->bt_full = 1;
1897 	r = backend_error((*txp)->bt_be, r, errmsg);
1898 
1899 	if (r != REP_PROTOCOL_SUCCESS) {
1900 		assert(r != REP_PROTOCOL_DONE);
1901 		(void) sqlite_exec((*txp)->bt_be->be_db,
1902 		    "ROLLBACK TRANSACTION", NULL, NULL, NULL);
1903 		backend_tx_end(*txp);
1904 		*txp = NULL;
1905 		return (r);
1906 	}
1907 
1908 	(*txp)->bt_readonly = 0;
1909 
1910 	return (REP_PROTOCOL_SUCCESS);
1911 }
1912 
1913 void
1914 backend_tx_rollback(backend_tx_t *tx)
1915 {
1916 	int r;
1917 	char *errmsg;
1918 	sqlite_backend_t *be;
1919 	hrtime_t ts, vts;
1920 
1921 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1922 	be = tx->bt_be;
1923 
1924 	ts = gethrtime();
1925 	vts = gethrvtime();
1926 	r = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
1927 	    &errmsg);
1928 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1929 	if (r == SQLITE_FULL)
1930 		tx->bt_full = 1;
1931 	(void) backend_error(be, r, errmsg);
1932 
1933 	backend_tx_end(tx);
1934 }
1935 
1936 /*
1937  * Fails with
1938  *   _NO_RESOURCES - out of memory
1939  */
1940 int
1941 backend_tx_commit(backend_tx_t *tx)
1942 {
1943 	int r, r2;
1944 	char *errmsg;
1945 	sqlite_backend_t *be;
1946 	hrtime_t ts, vts;
1947 
1948 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1949 	be = tx->bt_be;
1950 	ts = gethrtime();
1951 	vts = gethrvtime();
1952 	r = sqlite_exec(be->be_db, "COMMIT TRANSACTION", NULL, NULL,
1953 	    &errmsg);
1954 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1955 	if (r == SQLITE_FULL)
1956 		tx->bt_full = 1;
1957 
1958 	r = backend_error(be, r, errmsg);
1959 	assert(r != REP_PROTOCOL_DONE);
1960 
1961 	if (r != REP_PROTOCOL_SUCCESS) {
1962 		r2 = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
1963 		    &errmsg);
1964 		r2 = backend_error(be, r2, errmsg);
1965 		if (r2 != REP_PROTOCOL_SUCCESS)
1966 			backend_panic("cannot rollback failed commit");
1967 
1968 		backend_tx_end(tx);
1969 		return (r);
1970 	}
1971 	backend_tx_end(tx);
1972 	return (REP_PROTOCOL_SUCCESS);
1973 }
1974 
1975 static const char *
1976 id_space_to_name(enum id_space id)
1977 {
1978 	switch (id) {
1979 	case BACKEND_ID_SERVICE_INSTANCE:
1980 		return ("SI");
1981 	case BACKEND_ID_PROPERTYGRP:
1982 		return ("PG");
1983 	case BACKEND_ID_GENERATION:
1984 		return ("GEN");
1985 	case BACKEND_ID_PROPERTY:
1986 		return ("PROP");
1987 	case BACKEND_ID_VALUE:
1988 		return ("VAL");
1989 	case BACKEND_ID_SNAPNAME:
1990 		return ("SNAME");
1991 	case BACKEND_ID_SNAPSHOT:
1992 		return ("SHOT");
1993 	case BACKEND_ID_SNAPLEVEL:
1994 		return ("SLVL");
1995 	default:
1996 		abort();
1997 		/*NOTREACHED*/
1998 	}
1999 }
2000 
2001 /*
2002  * Returns a new id or 0 if the id argument is invalid or the query fails.
2003  */
2004 uint32_t
2005 backend_new_id(backend_tx_t *tx, enum id_space id)
2006 {
2007 	struct run_single_int_info info;
2008 	uint32_t new_id = 0;
2009 	const char *name = id_space_to_name(id);
2010 	char *errmsg;
2011 	int ret;
2012 	sqlite_backend_t *be;
2013 	hrtime_t ts, vts;
2014 
2015 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2016 	be = tx->bt_be;
2017 
2018 	info.rs_out = &new_id;
2019 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
2020 
2021 	ts = gethrtime();
2022 	vts = gethrvtime();
2023 	ret = sqlite_exec_printf(be->be_db,
2024 	    "SELECT id_next FROM id_tbl WHERE (id_name = '%q');"
2025 	    "UPDATE id_tbl SET id_next = id_next + 1 WHERE (id_name = '%q');",
2026 	    run_single_int_callback, &info, &errmsg, name, name);
2027 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2028 	if (ret == SQLITE_FULL)
2029 		tx->bt_full = 1;
2030 
2031 	ret = backend_error(be, ret, errmsg);
2032 
2033 	if (ret != REP_PROTOCOL_SUCCESS) {
2034 		return (0);
2035 	}
2036 
2037 	return (new_id);
2038 }
2039 
2040 /*
2041  * Returns
2042  *   _NO_RESOURCES - out of memory
2043  *   _DONE - callback aborted query
2044  *   _SUCCESS
2045  */
2046 int
2047 backend_tx_run(backend_tx_t *tx, backend_query_t *q,
2048     backend_run_callback_f *cb, void *data)
2049 {
2050 	char *errmsg = NULL;
2051 	int ret;
2052 	sqlite_backend_t *be;
2053 	hrtime_t ts, vts;
2054 
2055 	assert(tx != NULL && tx->bt_be != NULL);
2056 	be = tx->bt_be;
2057 
2058 	if (q == NULL || q->bq_buf == NULL)
2059 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2060 
2061 	ts = gethrtime();
2062 	vts = gethrvtime();
2063 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
2064 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2065 	if (ret == SQLITE_FULL)
2066 		tx->bt_full = 1;
2067 	ret = backend_error(be, ret, errmsg);
2068 
2069 	return (ret);
2070 }
2071 
2072 /*
2073  * Returns
2074  *   _NO_RESOURCES - out of memory
2075  *   _NOT_FOUND - the query returned no results
2076  *   _SUCCESS - the query returned a single integer
2077  */
2078 int
2079 backend_tx_run_single_int(backend_tx_t *tx, backend_query_t *q, uint32_t *buf)
2080 {
2081 	struct run_single_int_info info;
2082 	int ret;
2083 
2084 	info.rs_out = buf;
2085 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
2086 
2087 	ret = backend_tx_run(tx, q, run_single_int_callback, &info);
2088 	assert(ret != REP_PROTOCOL_DONE);
2089 
2090 	if (ret != REP_PROTOCOL_SUCCESS)
2091 		return (ret);
2092 
2093 	return (info.rs_result);
2094 }
2095 
2096 /*
2097  * Fails with
2098  *   _NO_RESOURCES - out of memory
2099  */
2100 int
2101 backend_tx_run_update(backend_tx_t *tx, const char *format, ...)
2102 {
2103 	va_list a;
2104 	char *errmsg;
2105 	int ret;
2106 	sqlite_backend_t *be;
2107 	hrtime_t ts, vts;
2108 
2109 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2110 	be = tx->bt_be;
2111 
2112 	va_start(a, format);
2113 	ts = gethrtime();
2114 	vts = gethrvtime();
2115 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2116 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2117 	if (ret == SQLITE_FULL)
2118 		tx->bt_full = 1;
2119 	va_end(a);
2120 	ret = backend_error(be, ret, errmsg);
2121 	assert(ret != REP_PROTOCOL_DONE);
2122 
2123 	return (ret);
2124 }
2125 
2126 /*
2127  * returns REP_PROTOCOL_FAIL_NOT_FOUND if no changes occured
2128  */
2129 int
2130 backend_tx_run_update_changed(backend_tx_t *tx, const char *format, ...)
2131 {
2132 	va_list a;
2133 	char *errmsg;
2134 	int ret;
2135 	sqlite_backend_t *be;
2136 	hrtime_t ts, vts;
2137 
2138 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2139 	be = tx->bt_be;
2140 
2141 	va_start(a, format);
2142 	ts = gethrtime();
2143 	vts = gethrvtime();
2144 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2145 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2146 	if (ret == SQLITE_FULL)
2147 		tx->bt_full = 1;
2148 	va_end(a);
2149 
2150 	ret = backend_error(be, ret, errmsg);
2151 
2152 	return (ret);
2153 }
2154 
2155 #define	BACKEND_ADD_SCHEMA(be, file, tbls, idxs) \
2156 	(backend_add_schema((be), (file), \
2157 	    (tbls), sizeof (tbls) / sizeof (*(tbls)), \
2158 	    (idxs), sizeof (idxs) / sizeof (*(idxs))))
2159 
2160 static int
2161 backend_add_schema(sqlite_backend_t *be, const char *file,
2162     struct backend_tbl_info *tbls, int tbl_count,
2163     struct backend_idx_info *idxs, int idx_count)
2164 {
2165 	int i;
2166 	char *errmsg;
2167 	int ret;
2168 
2169 	/*
2170 	 * Create the tables.
2171 	 */
2172 	for (i = 0; i < tbl_count; i++) {
2173 		if (tbls[i].bti_name == NULL) {
2174 			assert(i + 1 == tbl_count);
2175 			break;
2176 		}
2177 		ret = sqlite_exec_printf(be->be_db,
2178 		    "CREATE TABLE %s (%s);\n",
2179 		    NULL, NULL, &errmsg, tbls[i].bti_name, tbls[i].bti_cols);
2180 
2181 		if (ret != SQLITE_OK) {
2182 			configd_critical(
2183 			    "%s: %s table creation fails: %s\n", file,
2184 			    tbls[i].bti_name, errmsg);
2185 			free(errmsg);
2186 			return (-1);
2187 		}
2188 	}
2189 
2190 	/*
2191 	 * Make indices on key tables and columns.
2192 	 */
2193 	for (i = 0; i < idx_count; i++) {
2194 		if (idxs[i].bxi_tbl == NULL) {
2195 			assert(i + 1 == idx_count);
2196 			break;
2197 		}
2198 
2199 		ret = sqlite_exec_printf(be->be_db,
2200 		    "CREATE INDEX %s_%s ON %s (%s);\n",
2201 		    NULL, NULL, &errmsg, idxs[i].bxi_tbl, idxs[i].bxi_idx,
2202 		    idxs[i].bxi_tbl, idxs[i].bxi_cols);
2203 
2204 		if (ret != SQLITE_OK) {
2205 			configd_critical(
2206 			    "%s: %s_%s index creation fails: %s\n", file,
2207 			    idxs[i].bxi_tbl, idxs[i].bxi_idx, errmsg);
2208 			free(errmsg);
2209 			return (-1);
2210 		}
2211 	}
2212 	return (0);
2213 }
2214 
2215 static int
2216 backend_init_schema(sqlite_backend_t *be, const char *db_file, backend_type_t t)
2217 {
2218 	int i;
2219 	char *errmsg;
2220 	int ret;
2221 
2222 	assert(t == BACKEND_TYPE_NORMAL || t == BACKEND_TYPE_NONPERSIST);
2223 
2224 	if (t == BACKEND_TYPE_NORMAL) {
2225 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_normal, idxs_normal);
2226 	} else if (t == BACKEND_TYPE_NONPERSIST) {
2227 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_np, idxs_np);
2228 	} else {
2229 		abort();		/* can't happen */
2230 	}
2231 
2232 	if (ret < 0) {
2233 		return (ret);
2234 	}
2235 
2236 	ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_common, idxs_common);
2237 	if (ret < 0) {
2238 		return (ret);
2239 	}
2240 
2241 	/*
2242 	 * Add the schema version to the table
2243 	 */
2244 	ret = sqlite_exec_printf(be->be_db,
2245 	    "INSERT INTO schema_version (schema_version) VALUES (%d)",
2246 	    NULL, NULL, &errmsg, BACKEND_SCHEMA_VERSION);
2247 	if (ret != SQLITE_OK) {
2248 		configd_critical(
2249 		    "setting schema version fails: %s\n", errmsg);
2250 		free(errmsg);
2251 	}
2252 
2253 	/*
2254 	 * Populate id_tbl with initial IDs.
2255 	 */
2256 	for (i = 0; i < BACKEND_ID_INVALID; i++) {
2257 		const char *name = id_space_to_name(i);
2258 
2259 		ret = sqlite_exec_printf(be->be_db,
2260 		    "INSERT INTO id_tbl (id_name, id_next) "
2261 		    "VALUES ('%q', %d);", NULL, NULL, &errmsg, name, 1);
2262 		if (ret != SQLITE_OK) {
2263 			configd_critical(
2264 			    "id insertion for %s fails: %s\n", name, errmsg);
2265 			free(errmsg);
2266 			return (-1);
2267 		}
2268 	}
2269 	/*
2270 	 * Set the persistance of the database.  The normal database is marked
2271 	 * "synchronous", so that all writes are synchronized to stable storage
2272 	 * before proceeding.
2273 	 */
2274 	ret = sqlite_exec_printf(be->be_db,
2275 	    "PRAGMA default_synchronous = %s; PRAGMA synchronous = %s;",
2276 	    NULL, NULL, &errmsg,
2277 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF",
2278 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF");
2279 	if (ret != SQLITE_OK) {
2280 		configd_critical("pragma setting fails: %s\n", errmsg);
2281 		free(errmsg);
2282 		return (-1);
2283 	}
2284 
2285 	return (0);
2286 }
2287 
2288 int
2289 backend_init(const char *db_file, const char *npdb_file, int have_np)
2290 {
2291 	sqlite_backend_t *be;
2292 	int r;
2293 	int writable_persist = 1;
2294 
2295 	/* set up our temporary directory */
2296 	sqlite_temp_directory = "/etc/svc/volatile";
2297 
2298 	if (strcmp(SQLITE_VERSION, sqlite_version) != 0) {
2299 		configd_critical("Mismatched link!  (%s should be %s)\n",
2300 		    sqlite_version, SQLITE_VERSION);
2301 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2302 	}
2303 
2304 	/*
2305 	 * If the system crashed during a backend switch, there might
2306 	 * be a leftover transient database which contains useful
2307 	 * information which can be used for recovery.
2308 	 */
2309 	backend_switch_recovery();
2310 
2311 	if (db_file == NULL)
2312 		db_file = REPOSITORY_DB;
2313 	if (strcmp(db_file, REPOSITORY_DB) != 0) {
2314 		is_main_repository = 0;
2315 	}
2316 
2317 	r = backend_create(BACKEND_TYPE_NORMAL, db_file, &be);
2318 	switch (r) {
2319 	case BACKEND_CREATE_FAIL:
2320 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2321 	case BACKEND_CREATE_LOCKED:
2322 		return (CONFIGD_EXIT_DATABASE_LOCKED);
2323 	case BACKEND_CREATE_SUCCESS:
2324 		break;		/* success */
2325 	case BACKEND_CREATE_READONLY:
2326 		writable_persist = 0;
2327 		break;
2328 	case BACKEND_CREATE_NEED_INIT:
2329 		if (backend_init_schema(be, db_file, BACKEND_TYPE_NORMAL)) {
2330 			backend_destroy(be);
2331 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2332 		}
2333 		break;
2334 	default:
2335 		abort();
2336 		/*NOTREACHED*/
2337 	}
2338 	backend_create_finish(BACKEND_TYPE_NORMAL, be);
2339 
2340 	if (have_np) {
2341 		if (npdb_file == NULL)
2342 			npdb_file = NONPERSIST_DB;
2343 
2344 		r = backend_create(BACKEND_TYPE_NONPERSIST, npdb_file, &be);
2345 		switch (r) {
2346 		case BACKEND_CREATE_SUCCESS:
2347 			break;		/* success */
2348 		case BACKEND_CREATE_FAIL:
2349 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2350 		case BACKEND_CREATE_LOCKED:
2351 			return (CONFIGD_EXIT_DATABASE_LOCKED);
2352 		case BACKEND_CREATE_READONLY:
2353 			configd_critical("%s: unable to write\n", npdb_file);
2354 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2355 		case BACKEND_CREATE_NEED_INIT:
2356 			if (backend_init_schema(be, db_file,
2357 			    BACKEND_TYPE_NONPERSIST)) {
2358 				backend_destroy(be);
2359 				return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2360 			}
2361 			break;
2362 		default:
2363 			abort();
2364 			/*NOTREACHED*/
2365 		}
2366 		backend_create_finish(BACKEND_TYPE_NONPERSIST, be);
2367 
2368 		/*
2369 		 * If we started up with a writable filesystem, but the
2370 		 * non-persistent database needed initialization, we
2371 		 * are booting a non-global zone, so do a backup.
2372 		 */
2373 		if (r == BACKEND_CREATE_NEED_INIT && writable_persist &&
2374 		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2375 		    REP_PROTOCOL_SUCCESS) {
2376 			if (backend_create_backup_locked(be,
2377 			    REPOSITORY_BOOT_BACKUP) != REP_PROTOCOL_SUCCESS) {
2378 				configd_critical(
2379 				    "unable to create \"%s\" backup of "
2380 				    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2381 				    be->be_path);
2382 			}
2383 			backend_unlock(be);
2384 		}
2385 	}
2386 
2387 	/*
2388 	 * If the persistent backend is writable at this point, upgrade it.
2389 	 * This can occur in a few cases, most notably on UFS roots if
2390 	 * we are operating on the backend from another root, as is the case
2391 	 * during alternate-root BFU.
2392 	 *
2393 	 * Otherwise, upgrade will occur via backend_check_readonly() when
2394 	 * the repository is re-opened read-write.
2395 	 */
2396 	if (writable_persist) {
2397 		r = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
2398 		assert(r == REP_PROTOCOL_SUCCESS);
2399 		backend_check_upgrade(be, B_TRUE);
2400 		backend_unlock(be);
2401 	}
2402 
2403 	return (CONFIGD_EXIT_OKAY);
2404 }
2405 
2406 /*
2407  * quiesce all database activity prior to exiting
2408  */
2409 void
2410 backend_fini(void)
2411 {
2412 	sqlite_backend_t *be_normal, *be_np;
2413 
2414 	(void) backend_lock(BACKEND_TYPE_NORMAL, 1, &be_normal);
2415 	(void) backend_lock(BACKEND_TYPE_NONPERSIST, 1, &be_np);
2416 }
2417 
2418 #define	QUERY_BASE	128
2419 backend_query_t *
2420 backend_query_alloc(void)
2421 {
2422 	backend_query_t *q;
2423 	q = calloc(1, sizeof (backend_query_t));
2424 	if (q != NULL) {
2425 		q->bq_size = QUERY_BASE;
2426 		q->bq_buf = calloc(1, q->bq_size);
2427 		if (q->bq_buf == NULL) {
2428 			q->bq_size = 0;
2429 		}
2430 
2431 	}
2432 	return (q);
2433 }
2434 
2435 void
2436 backend_query_append(backend_query_t *q, const char *value)
2437 {
2438 	char *alloc;
2439 	int count;
2440 	size_t size, old_len;
2441 
2442 	if (q == NULL) {
2443 		/* We'll discover the error when we try to run the query. */
2444 		return;
2445 	}
2446 
2447 	while (q->bq_buf != NULL) {
2448 		old_len = strlen(q->bq_buf);
2449 		size = q->bq_size;
2450 		count = strlcat(q->bq_buf, value, size);
2451 
2452 		if (count < size)
2453 			break;				/* success */
2454 
2455 		q->bq_buf[old_len] = 0;
2456 		size = round_up_to_p2(count + 1);
2457 
2458 		assert(size > q->bq_size);
2459 		alloc = realloc(q->bq_buf, size);
2460 		if (alloc == NULL) {
2461 			free(q->bq_buf);
2462 			q->bq_buf = NULL;
2463 			break;				/* can't grow */
2464 		}
2465 
2466 		q->bq_buf = alloc;
2467 		q->bq_size = size;
2468 	}
2469 }
2470 
2471 void
2472 backend_query_add(backend_query_t *q, const char *format, ...)
2473 {
2474 	va_list args;
2475 	char *new;
2476 
2477 	if (q == NULL || q->bq_buf == NULL)
2478 		return;
2479 
2480 	va_start(args, format);
2481 	new = sqlite_vmprintf(format, args);
2482 	va_end(args);
2483 
2484 	if (new == NULL) {
2485 		free(q->bq_buf);
2486 		q->bq_buf = NULL;
2487 		return;
2488 	}
2489 
2490 	backend_query_append(q, new);
2491 
2492 	free(new);
2493 }
2494 
2495 void
2496 backend_query_free(backend_query_t *q)
2497 {
2498 	if (q != NULL) {
2499 		if (q->bq_buf != NULL) {
2500 			free(q->bq_buf);
2501 		}
2502 		free(q);
2503 	}
2504 }
2505