xref: /illumos-gate/usr/src/cmd/svc/configd/backend.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <assert.h>
30 #include <door.h>
31 #include <dirent.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <limits.h>
35 #include <pthread.h>
36 #include <stdarg.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <zone.h>
42 
43 #include "configd.h"
44 #include "repcache_protocol.h"
45 
46 #include "sqlite/sqlite.h"
47 #include "sqlite/sqlite-misc.h"
48 
49 /*
50  * This file has two purposes:
51  *
52  * 1. It contains the database schema, and the code for setting up our backend
53  *    databases, including installing said schema.
54  *
55  * 2. It provides a simplified interface to the SQL database library, and
56  *    synchronizes MT access to the database.
57  */
58 
59 typedef struct backend_spent {
60 	uint64_t bs_count;
61 	hrtime_t bs_time;
62 	hrtime_t bs_vtime;
63 } backend_spent_t;
64 
65 typedef struct backend_totals {
66 	backend_spent_t	bt_lock;	/* waiting for lock */
67 	backend_spent_t	bt_exec;	/* time spent executing SQL */
68 } backend_totals_t;
69 
70 typedef struct sqlite_backend {
71 	pthread_mutex_t	be_lock;
72 	pthread_t	be_thread;	/* thread holding lock */
73 	struct sqlite	*be_db;
74 	const char	*be_path;	/* path to db */
75 	int		be_readonly;	/* backend is read-only */
76 	int		be_writing;	/* held for writing */
77 	backend_type_t	be_type;	/* type of db */
78 	backend_totals_t be_totals[2];	/* one for reading, one for writing */
79 } sqlite_backend_t;
80 
81 struct backend_tx {
82 	sqlite_backend_t	*bt_be;
83 	int			bt_readonly;
84 	int			bt_type;
85 	int			bt_full;	/* SQLITE_FULL during tx */
86 };
87 
88 #define	UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
89 	backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
90 	__bsp->bs_count++;						\
91 	__bsp->bs_time += (gethrtime() - ts);				\
92 	__bsp->bs_vtime += (gethrvtime() - vts);			\
93 }
94 
95 #define	UPDATE_TOTALS(sb, field, ts, vts) \
96 	UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
97 
98 struct backend_query {
99 	char	*bq_buf;
100 	size_t	bq_size;
101 };
102 
103 struct backend_tbl_info {
104 	const char *bti_name;
105 	const char *bti_cols;
106 };
107 
108 struct backend_idx_info {
109 	const char *bxi_tbl;
110 	const char *bxi_idx;
111 	const char *bxi_cols;
112 };
113 
114 static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
115 static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
116 pthread_t backend_panic_thread = 0;
117 
118 int backend_do_trace = 0;		/* invoke tracing callback */
119 int backend_print_trace = 0;		/* tracing callback prints SQL */
120 int backend_panic_abort = 0;		/* abort when panicking */
121 
122 /*
123  * Any change to the below schema should bump the version number
124  */
125 #define	BACKEND_SCHEMA_VERSION		5
126 
127 static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
128 	/*
129 	 * service_tbl holds all services.  svc_id is the identifier of the
130 	 * service.
131 	 */
132 	{
133 		"service_tbl",
134 		"svc_id          INTEGER PRIMARY KEY,"
135 		"svc_name        CHAR(256) NOT NULL"
136 	},
137 
138 	/*
139 	 * instance_tbl holds all of the instances.  The parent service id
140 	 * is instance_svc.
141 	 */
142 	{
143 		"instance_tbl",
144 		"instance_id     INTEGER PRIMARY KEY,"
145 		"instance_name   CHAR(256) NOT NULL,"
146 		"instance_svc    INTEGER NOT NULL"
147 	},
148 
149 	/*
150 	 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
151 	 */
152 	{
153 		"snapshot_lnk_tbl",
154 		"lnk_id          INTEGER PRIMARY KEY,"
155 		"lnk_inst_id     INTEGER NOT NULL,"
156 		"lnk_snap_name   CHAR(256) NOT NULL,"
157 		"lnk_snap_id     INTEGER NOT NULL"
158 	},
159 
160 	/*
161 	 * snaplevel_tbl maps a snapshot id to a set of named, ordered
162 	 * snaplevels.
163 	 */
164 	{
165 		"snaplevel_tbl",
166 		"snap_id                 INTEGER NOT NULL,"
167 		"snap_level_num          INTEGER NOT NULL,"
168 		"snap_level_id           INTEGER NOT NULL,"
169 		"snap_level_service_id   INTEGER NOT NULL,"
170 		"snap_level_service      CHAR(256) NOT NULL,"
171 		"snap_level_instance_id  INTEGER NULL,"
172 		"snap_level_instance     CHAR(256) NULL"
173 	},
174 
175 	/*
176 	 * snaplevel_lnk_tbl links snaplevels to property groups.
177 	 * snaplvl_pg_* is identical to the original property group,
178 	 * and snaplvl_gen_id overrides the generation number.
179 	 * The service/instance ids are as in the snaplevel.
180 	 */
181 	{
182 		"snaplevel_lnk_tbl",
183 		"snaplvl_level_id INTEGER NOT NULL,"
184 		"snaplvl_pg_id    INTEGER NOT NULL,"
185 		"snaplvl_pg_name  CHAR(256) NOT NULL,"
186 		"snaplvl_pg_type  CHAR(256) NOT NULL,"
187 		"snaplvl_pg_flags INTEGER NOT NULL,"
188 		"snaplvl_gen_id   INTEGER NOT NULL"
189 	},
190 
191 	{ NULL, NULL }
192 };
193 
194 static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
195 	{ "service_tbl",	"name",	"svc_name" },
196 	{ "instance_tbl",	"name",	"instance_svc, instance_name" },
197 	{ "snapshot_lnk_tbl",	"name",	"lnk_inst_id, lnk_snap_name" },
198 	{ "snapshot_lnk_tbl",	"snapid", "lnk_snap_id" },
199 	{ "snaplevel_tbl",	"id",	"snap_id" },
200 	{ "snaplevel_lnk_tbl",	"id",	"snaplvl_pg_id" },
201 	{ "snaplevel_lnk_tbl",	"level", "snaplvl_level_id" },
202 	{ NULL, NULL, NULL }
203 };
204 
205 static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
206 	{ NULL, NULL }
207 };
208 
209 static struct backend_idx_info idxs_np[] = {	/* BACKEND_TYPE_NONPERSIST */
210 	{ NULL, NULL, NULL }
211 };
212 
213 static struct backend_tbl_info tbls_common[] = { /* all backend types */
214 	/*
215 	 * pg_tbl defines property groups.  They are associated with a single
216 	 * service or instance.  The pg_gen_id links them with the latest
217 	 * "edited" version of its properties.
218 	 */
219 	{
220 		"pg_tbl",
221 		"pg_id           INTEGER PRIMARY KEY,"
222 		"pg_parent_id    INTEGER NOT NULL,"
223 		"pg_name         CHAR(256) NOT NULL,"
224 		"pg_type         CHAR(256) NOT NULL,"
225 		"pg_flags        INTEGER NOT NULL,"
226 		"pg_gen_id       INTEGER NOT NULL"
227 	},
228 
229 	/*
230 	 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
231 	 * (prop_name, prop_type, val_id) trios.
232 	 */
233 	{
234 		"prop_lnk_tbl",
235 		"lnk_prop_id     INTEGER PRIMARY KEY,"
236 		"lnk_pg_id       INTEGER NOT NULL,"
237 		"lnk_gen_id      INTEGER NOT NULL,"
238 		"lnk_prop_name   CHAR(256) NOT NULL,"
239 		"lnk_prop_type   CHAR(2) NOT NULL,"
240 		"lnk_val_id      INTEGER"
241 	},
242 
243 	/*
244 	 * value_tbl maps a value_id to a set of values.  For any given
245 	 * value_id, value_type is constant.
246 	 */
247 	{
248 		"value_tbl",
249 		"value_id        INTEGER NOT NULL,"
250 		"value_type      CHAR(1) NOT NULL,"
251 		"value_value     VARCHAR NOT NULL"
252 	},
253 
254 	/*
255 	 * id_tbl has one row per id space
256 	 */
257 	{
258 		"id_tbl",
259 		"id_name         STRING NOT NULL,"
260 		"id_next         INTEGER NOT NULL"
261 	},
262 
263 	/*
264 	 * schema_version has a single row, which contains
265 	 * BACKEND_SCHEMA_VERSION at the time of creation.
266 	 */
267 	{
268 		"schema_version",
269 		"schema_version  INTEGER"
270 	},
271 	{ NULL, NULL }
272 };
273 
274 static struct backend_idx_info idxs_common[] = { /* all backend types */
275 	{ "pg_tbl",		"parent", "pg_parent_id" },
276 	{ "pg_tbl",		"name",	"pg_parent_id, pg_name" },
277 	{ "pg_tbl",		"type",	"pg_parent_id, pg_type" },
278 	{ "prop_lnk_tbl",	"base",	"lnk_pg_id, lnk_gen_id" },
279 	{ "prop_lnk_tbl",	"val",	"lnk_val_id" },
280 	{ "value_tbl",		"id",	"value_id" },
281 	{ "id_tbl",		"id",	"id_name" },
282 	{ NULL, NULL, NULL }
283 };
284 
285 struct run_single_int_info {
286 	uint32_t	*rs_out;
287 	int		rs_result;
288 };
289 
290 /*ARGSUSED*/
291 static int
292 run_single_int_callback(void *arg, int columns, char **vals, char **names)
293 {
294 	struct run_single_int_info *info = arg;
295 	uint32_t val;
296 
297 	char *endptr = vals[0];
298 
299 	assert(info->rs_result != REP_PROTOCOL_SUCCESS);
300 	assert(columns == 1);
301 
302 	if (vals[0] == NULL)
303 		return (BACKEND_CALLBACK_CONTINUE);
304 
305 	errno = 0;
306 	val = strtoul(vals[0], &endptr, 10);
307 	if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
308 		backend_panic("malformed integer \"%20s\"", vals[0]);
309 
310 	*info->rs_out = val;
311 	info->rs_result = REP_PROTOCOL_SUCCESS;
312 	return (BACKEND_CALLBACK_CONTINUE);
313 }
314 
315 /*ARGSUSED*/
316 int
317 backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
318 {
319 	return (BACKEND_CALLBACK_ABORT);
320 }
321 
322 static int
323 backend_is_readonly(struct sqlite *db, char **errp)
324 {
325 	int r = sqlite_exec(db,
326 	    "BEGIN TRANSACTION; "
327 	    "UPDATE schema_version SET schema_version = schema_version; ",
328 	    NULL, NULL, errp);
329 
330 	(void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
331 	return (r);
332 }
333 
334 static void
335 backend_trace_sql(void *arg, const char *sql)
336 {
337 	sqlite_backend_t *be = arg;
338 
339 	if (backend_print_trace) {
340 		(void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
341 	}
342 }
343 
344 static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
345 static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
346 
347 #define	BACKEND_PANIC_TIMEOUT	(50 * MILLISEC)
348 /*
349  * backend_panic() -- some kind of database problem or corruption has been hit.
350  * We attempt to quiesce the other database users -- all of the backend sql
351  * entry points will call backend_panic(NULL) if a panic is in progress, as
352  * will any attempt to start a transaction.
353  *
354  * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
355  * either drop the lock or call backend_panic().  If they don't respond in
356  * time, we'll just exit anyway.
357  */
358 void
359 backend_panic(const char *format, ...)
360 {
361 	int i;
362 	va_list args;
363 	int failed = 0;
364 
365 	(void) pthread_mutex_lock(&backend_panic_lock);
366 	if (backend_panic_thread != 0) {
367 		(void) pthread_mutex_unlock(&backend_panic_lock);
368 		/*
369 		 * first, drop any backend locks we're holding, then
370 		 * sleep forever on the panic_cv.
371 		 */
372 		for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
373 			if (bes[i] != NULL &&
374 			    bes[i]->be_thread == pthread_self())
375 				(void) pthread_mutex_unlock(&bes[i]->be_lock);
376 		}
377 		(void) pthread_mutex_lock(&backend_panic_lock);
378 		for (;;)
379 			(void) pthread_cond_wait(&backend_panic_cv,
380 			    &backend_panic_lock);
381 	}
382 	backend_panic_thread = pthread_self();
383 	(void) pthread_mutex_unlock(&backend_panic_lock);
384 
385 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
386 		if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
387 			(void) pthread_mutex_unlock(&bes[i]->be_lock);
388 	}
389 
390 	va_start(args, format);
391 	configd_vcritical(format, args);
392 	va_end(args);
393 
394 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
395 		timespec_t rel;
396 
397 		rel.tv_sec = 0;
398 		rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
399 
400 		if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
401 			if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
402 			    &rel) != 0)
403 				failed++;
404 		}
405 	}
406 	if (failed) {
407 		configd_critical("unable to quiesce database\n");
408 	}
409 
410 	if (backend_panic_abort)
411 		abort();
412 
413 	exit(CONFIGD_EXIT_DATABASE_BAD);
414 }
415 
416 /*
417  * Returns
418  *   _SUCCESS
419  *   _DONE - callback aborted query
420  *   _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
421  */
422 static int
423 backend_error(sqlite_backend_t *be, int error, char *errmsg)
424 {
425 	if (error == SQLITE_OK)
426 		return (REP_PROTOCOL_SUCCESS);
427 
428 	switch (error) {
429 	case SQLITE_ABORT:
430 		free(errmsg);
431 		return (REP_PROTOCOL_DONE);
432 
433 	case SQLITE_NOMEM:
434 	case SQLITE_FULL:
435 	case SQLITE_TOOBIG:
436 		free(errmsg);
437 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
438 
439 	default:
440 		backend_panic("%s: db error: %s", be->be_path, errmsg);
441 		/*NOTREACHED*/
442 	}
443 }
444 
445 static void
446 backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
447 {
448 	char **out = (char **)out_arg;
449 
450 	while (out_sz-- > 0)
451 		free(*out++);
452 	free(out_arg);
453 }
454 
455 /*
456  * builds a inverse-time-sorted array of backup files.  The path is a
457  * a single buffer, and the pointers look like:
458  *
459  *	/this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
460  *	^pathname		^	       ^(pathname+pathlen)
461  *				basename
462  *
463  * dirname will either be pathname, or ".".
464  *
465  * Returns the number of elements in the array, 0 if there are no previous
466  * backups, or -1 on error.
467  */
468 static ssize_t
469 backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
470 {
471 	char b_start, b_end;
472 	DIR *dir;
473 	char **out = NULL;
474 	char *name, *p;
475 	char *dirname, *basename;
476 	char *pathend;
477 	struct dirent *ent;
478 
479 	size_t count = 0;
480 	size_t baselen;
481 
482 	/*
483 	 * year, month, day, hour, min, sec, plus an '_'.
484 	 */
485 	const size_t ndigits = 4 + 5*2 + 1;
486 	const size_t baroffset = 4 + 2*2;
487 
488 	size_t idx;
489 
490 	pathend = pathname + pathlen;
491 	b_end = *pathend;
492 	*pathend = '\0';
493 
494 	basename = strrchr(pathname, '/');
495 
496 	if (basename != NULL) {
497 		assert(pathend > pathname && basename < pathend);
498 		basename++;
499 		dirname = pathname;
500 	} else {
501 		basename = pathname;
502 		dirname = ".";
503 	}
504 
505 	baselen = strlen(basename);
506 
507 	/*
508 	 * munge the string temporarily for the opendir(), then restore it.
509 	 */
510 	b_start = basename[0];
511 
512 	basename[0] = '\0';
513 	dir = opendir(dirname);
514 	basename[0] = b_start;		/* restore path */
515 
516 	if (dir == NULL)
517 		goto fail;
518 
519 
520 	while ((ent = readdir(dir)) != NULL) {
521 		/*
522 		 * Must match:
523 		 *	basename-YYYYMMDD_HHMMSS
524 		 * or we ignore it.
525 		 */
526 		if (strncmp(ent->d_name, basename, baselen) != 0)
527 			continue;
528 
529 		name = ent->d_name;
530 		if (name[baselen] != '-')
531 			continue;
532 
533 		p = name + baselen + 1;
534 
535 		for (idx = 0; idx < ndigits; idx++) {
536 			char c = p[idx];
537 			if (idx == baroffset && c != '_')
538 				break;
539 			if (idx != baroffset && (c < '0' || c > '9'))
540 				break;
541 		}
542 		if (idx != ndigits || p[idx] != '\0')
543 			continue;
544 
545 		/*
546 		 * We have a match.  insertion-sort it into our list.
547 		 */
548 		name = strdup(name);
549 		if (name == NULL)
550 			goto fail_closedir;
551 		p = strrchr(name, '-');
552 
553 		for (idx = 0; idx < count; idx++) {
554 			char *tmp = out[idx];
555 			char *tp = strrchr(tmp, '-');
556 
557 			int cmp = strcmp(p, tp);
558 			if (cmp == 0)
559 				cmp = strcmp(name, tmp);
560 
561 			if (cmp == 0) {
562 				free(name);
563 				name = NULL;
564 				break;
565 			} else if (cmp > 0) {
566 				out[idx] = name;
567 				name = tmp;
568 				p = tp;
569 			}
570 		}
571 
572 		if (idx == count) {
573 			char **new_out = realloc(out,
574 			    (count + 1) * sizeof (*out));
575 
576 			if (new_out == NULL) {
577 				free(name);
578 				goto fail_closedir;
579 			}
580 
581 			out = new_out;
582 			out[count++] = name;
583 		} else {
584 			assert(name == NULL);
585 		}
586 	}
587 	(void) closedir(dir);
588 
589 	basename[baselen] = b_end;
590 
591 	*out_arg = (const char **)out;
592 	return (count);
593 
594 fail_closedir:
595 	(void) closedir(dir);
596 fail:
597 	basename[0] = b_start;
598 	*pathend = b_end;
599 
600 	backend_backup_cleanup((const char **)out, count);
601 
602 	*out_arg = NULL;
603 	return (-1);
604 }
605 
606 /*
607  * Copies the repository path into out, a buffer of out_len bytes,
608  * removes the ".db" (or whatever) extension, and, if name is non-NULL,
609  * appends "-name" to it.  If name is non-NULL, it can fail with:
610  *
611  *	_TRUNCATED	will not fit in buffer.
612  *	_BAD_REQUEST	name is not a valid identifier
613  */
614 static rep_protocol_responseid_t
615 backend_backup_base(sqlite_backend_t *be, const char *name,
616     char *out, size_t out_len)
617 {
618 	char *p, *q;
619 	size_t len;
620 
621 	/*
622 	 * for paths of the form /path/to/foo.db, we truncate at the final
623 	 * '.'.
624 	 */
625 	(void) strlcpy(out, be->be_path, out_len);
626 
627 	p = strrchr(out, '/');
628 	q = strrchr(out, '.');
629 
630 	if (p != NULL && q != NULL && q > p)
631 		*q = 0;
632 
633 	if (name != NULL) {
634 		len = strlen(out);
635 		assert(len < out_len);
636 
637 		out += len;
638 		out_len -= len;
639 
640 		len = strlen(name);
641 
642 		/*
643 		 * verify that the name tag is entirely alphabetic,
644 		 * non-empty, and not too long.
645 		 */
646 		if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
647 		    uu_check_name(name, UU_NAME_DOMAIN) < 0)
648 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
649 
650 		if (snprintf(out, out_len, "-%s", name) >= out_len)
651 			return (REP_PROTOCOL_FAIL_TRUNCATED);
652 	}
653 
654 	return (REP_PROTOCOL_SUCCESS);
655 }
656 
657 /*
658  * Can return:
659  *	_BAD_REQUEST		name is not valid
660  *	_TRUNCATED		name is too long for current repository path
661  *	_UNKNOWN		failed for unknown reason (details written to
662  *				console)
663  *	_BACKEND_READONLY	backend is not writable
664  *
665  *	_SUCCESS		Backup completed successfully.
666  */
667 static rep_protocol_responseid_t
668 backend_create_backup_locked(sqlite_backend_t *be, const char *name)
669 {
670 	const char **old_list;
671 	ssize_t old_sz;
672 	ssize_t old_max = max_repository_backups;
673 	ssize_t cur;
674 
675 	char *finalname;
676 
677 	char finalpath[PATH_MAX];
678 	char tmppath[PATH_MAX];
679 	char buf[8192];
680 	int infd, outfd;
681 	size_t len;
682 	off_t inlen, outlen, offset;
683 
684 	time_t now;
685 	struct tm now_tm;
686 
687 	rep_protocol_responseid_t result;
688 
689 	if (be->be_readonly)
690 		return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
691 
692 	result = backend_backup_base(be, name, finalpath, sizeof (finalpath));
693 	if (result != REP_PROTOCOL_SUCCESS)
694 		return (result);
695 
696 	/*
697 	 * remember the original length, and the basename location
698 	 */
699 	len = strlen(finalpath);
700 	finalname = strrchr(finalpath, '/');
701 	if (finalname != NULL)
702 		finalname++;
703 	else
704 		finalname = finalpath;
705 
706 	(void) strlcpy(tmppath, finalpath, sizeof (tmppath));
707 	if (strlcat(tmppath, "-tmpXXXXXX", sizeof (tmppath)) >=
708 	    sizeof (tmppath))
709 		return (REP_PROTOCOL_FAIL_TRUNCATED);
710 
711 	now = time(NULL);
712 	if (localtime_r(&now, &now_tm) == NULL) {
713 		configd_critical(
714 		    "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
715 		    be->be_path, strerror(errno));
716 		return (REP_PROTOCOL_FAIL_UNKNOWN);
717 	}
718 
719 	if (strftime(finalpath + len, sizeof (finalpath) - len,
720 	    "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >=
721 	    sizeof (finalpath) - len) {
722 		return (REP_PROTOCOL_FAIL_TRUNCATED);
723 	}
724 
725 	infd = open(be->be_path, O_RDONLY);
726 	if (infd < 0) {
727 		configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
728 		    be->be_path, strerror(errno));
729 		return (REP_PROTOCOL_FAIL_UNKNOWN);
730 	}
731 
732 	outfd = mkstemp(tmppath);
733 	if (outfd < 0) {
734 		configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
735 		    name, tmppath, strerror(errno));
736 		(void) close(infd);
737 		return (REP_PROTOCOL_FAIL_UNKNOWN);
738 	}
739 
740 	for (;;) {
741 		do {
742 			inlen = read(infd, buf, sizeof (buf));
743 		} while (inlen < 0 && errno == EINTR);
744 
745 		if (inlen <= 0)
746 			break;
747 
748 		for (offset = 0; offset < inlen; offset += outlen) {
749 			do {
750 				outlen = write(outfd, buf + offset,
751 				    inlen - offset);
752 			} while (outlen < 0 && errno == EINTR);
753 
754 			if (outlen >= 0)
755 				continue;
756 
757 			configd_critical(
758 			    "\"%s\" backup failed: write to %s: %s\n",
759 			    name, tmppath, strerror(errno));
760 			result = REP_PROTOCOL_FAIL_UNKNOWN;
761 			goto fail;
762 		}
763 	}
764 
765 	if (inlen < 0) {
766 		configd_critical(
767 		    "\"%s\" backup failed: read from %s: %s\n",
768 		    name, be->be_path, strerror(errno));
769 		goto fail;
770 	}
771 
772 	/*
773 	 * grab the old list before doing our re-name.
774 	 */
775 	if (old_max > 0)
776 		old_sz = backend_backup_get_prev(finalpath, len, &old_list);
777 
778 	if (rename(tmppath, finalpath) < 0) {
779 		configd_critical(
780 		    "\"%s\" backup failed: rename(%s, %s): %s\n",
781 		    name, tmppath, finalpath, strerror(errno));
782 		result = REP_PROTOCOL_FAIL_UNKNOWN;
783 		goto fail;
784 	}
785 
786 	tmppath[len] = 0;	/* strip -XXXXXX, for reference symlink */
787 
788 	(void) unlink(tmppath);
789 	if (symlink(finalname, tmppath) < 0) {
790 		configd_critical(
791 		    "\"%s\" backup completed, but updating "
792 		    "\"%s\" symlink to \"%s\" failed: %s\n",
793 		    name, tmppath, finalname, strerror(errno));
794 	}
795 
796 	if (old_max > 0 && old_sz > 0) {
797 		/* unlink all but the first (old_max - 1) files */
798 		for (cur = old_max - 1; cur < old_sz; cur++) {
799 			(void) strlcpy(finalname, old_list[cur],
800 			    sizeof (finalpath) - (finalname - finalpath));
801 			if (unlink(finalpath) < 0)
802 				configd_critical(
803 				    "\"%s\" backup completed, but removing old "
804 				    "file \"%s\" failed: %s\n",
805 				    name, finalpath, strerror(errno));
806 		}
807 
808 		backend_backup_cleanup(old_list, old_sz);
809 	}
810 
811 	result = REP_PROTOCOL_SUCCESS;
812 
813 fail:
814 	(void) close(infd);
815 	(void) close(outfd);
816 	if (result != REP_PROTOCOL_SUCCESS)
817 		(void) unlink(tmppath);
818 
819 	return (result);
820 }
821 
822 
823 /*
824  * If t is not BACKEND_TYPE_NORMAL, can fail with
825  *   _BACKEND_ACCESS - backend does not exist
826  *
827  * If writing is nonzero, can also fail with
828  *   _BACKEND_READONLY - backend is read-only
829  */
830 static int
831 backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
832 {
833 	sqlite_backend_t *be = NULL;
834 	hrtime_t ts, vts;
835 
836 	*bep = NULL;
837 
838 	assert(t == BACKEND_TYPE_NORMAL ||
839 	    t == BACKEND_TYPE_NONPERSIST);
840 
841 	be = bes[t];
842 	if (t == BACKEND_TYPE_NORMAL)
843 		assert(be != NULL);		/* should always be there */
844 
845 	if (be == NULL)
846 		return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
847 
848 	if (backend_panic_thread != 0)
849 		backend_panic(NULL);		/* don't proceed */
850 
851 	ts = gethrtime();
852 	vts = gethrvtime();
853 	(void) pthread_mutex_lock(&be->be_lock);
854 	UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
855 
856 	if (backend_panic_thread != 0) {
857 		(void) pthread_mutex_unlock(&be->be_lock);
858 		backend_panic(NULL);		/* don't proceed */
859 	}
860 	be->be_thread = pthread_self();
861 
862 	if (writing && be->be_readonly) {
863 		char *errp;
864 		struct sqlite *new;
865 		int r;
866 
867 		assert(t == BACKEND_TYPE_NORMAL);
868 
869 		new = sqlite_open(be->be_path, 0600, &errp);
870 		if (new == NULL) {
871 			backend_panic("reopening %s: %s\n", be->be_path, errp);
872 			/*NOTREACHED*/
873 		}
874 		r = backend_is_readonly(new, &errp);
875 		if (r != SQLITE_OK) {
876 			free(errp);
877 			sqlite_close(new);
878 			be->be_thread = 0;
879 			(void) pthread_mutex_unlock(&be->be_lock);
880 			return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
881 		}
882 
883 		/*
884 		 * We can write!  Swap our db handles, mark ourself writable,
885 		 * and make a backup.
886 		 */
887 		sqlite_close(be->be_db);
888 		be->be_db = new;
889 		be->be_readonly = 0;
890 
891 		if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
892 		    REP_PROTOCOL_SUCCESS) {
893 			configd_critical(
894 			    "unable to create \"%s\" backup of \"%s\"\n",
895 			    REPOSITORY_BOOT_BACKUP, be->be_path);
896 		}
897 	}
898 
899 	if (backend_do_trace)
900 		(void) sqlite_trace(be->be_db, backend_trace_sql, be);
901 	else
902 		(void) sqlite_trace(be->be_db, NULL, NULL);
903 
904 	be->be_writing = writing;
905 	*bep = be;
906 	return (REP_PROTOCOL_SUCCESS);
907 }
908 
909 static void
910 backend_unlock(sqlite_backend_t *be)
911 {
912 	be->be_writing = 0;
913 	be->be_thread = 0;
914 	(void) pthread_mutex_unlock(&be->be_lock);
915 }
916 
917 static void
918 backend_destroy(sqlite_backend_t *be)
919 {
920 	if (be->be_db != NULL) {
921 		sqlite_close(be->be_db);
922 		be->be_db = NULL;
923 	}
924 	be->be_thread = 0;
925 	(void) pthread_mutex_unlock(&be->be_lock);
926 	(void) pthread_mutex_destroy(&be->be_lock);
927 }
928 
929 static void
930 backend_create_finish(backend_type_t backend_id, sqlite_backend_t *be)
931 {
932 	assert(MUTEX_HELD(&be->be_lock));
933 	assert(be == &be_info[backend_id]);
934 
935 	bes[backend_id] = be;
936 	(void) pthread_mutex_unlock(&be->be_lock);
937 }
938 
939 static int
940 backend_fd_write(int fd, const char *mess)
941 {
942 	int len = strlen(mess);
943 	int written;
944 
945 	while (len > 0) {
946 		if ((written = write(fd, mess, len)) < 0)
947 			return (-1);
948 		mess += written;
949 		len -= written;
950 	}
951 	return (0);
952 }
953 
954 /*
955  * Can return:
956  *	_BAD_REQUEST		name is not valid
957  *	_TRUNCATED		name is too long for current repository path
958  *	_UNKNOWN		failed for unknown reason (details written to
959  *				console)
960  *	_BACKEND_READONLY	backend is not writable
961  *
962  *	_SUCCESS		Backup completed successfully.
963  */
964 rep_protocol_responseid_t
965 backend_create_backup(const char *name)
966 {
967 	rep_protocol_responseid_t result;
968 	sqlite_backend_t *be;
969 
970 	result = backend_lock(BACKEND_TYPE_NORMAL, 0, &be);
971 	if (result != REP_PROTOCOL_SUCCESS)
972 		return (result);
973 
974 	result = backend_create_backup_locked(be, name);
975 	backend_unlock(be);
976 
977 	return (result);
978 }
979 
980 /*ARGSUSED*/
981 static int
982 backend_integrity_callback(void *private, int narg, char **vals, char **cols)
983 {
984 	char **out = private;
985 	char *old = *out;
986 	char *new;
987 	const char *info;
988 	size_t len;
989 	int x;
990 
991 	for (x = 0; x < narg; x++) {
992 		if ((info = vals[x]) != NULL &&
993 		    strcmp(info, "ok") != 0) {
994 			len = (old == NULL)? 0 : strlen(old);
995 			len += strlen(info) + 2;	/* '\n' + '\0' */
996 
997 			new = realloc(old, len);
998 			if (new == NULL)
999 				return (BACKEND_CALLBACK_ABORT);
1000 			if (old == NULL)
1001 				new[0] = 0;
1002 			old = *out = new;
1003 			(void) strlcat(new, info, len);
1004 			(void) strlcat(new, "\n", len);
1005 		}
1006 	}
1007 	return (BACKEND_CALLBACK_CONTINUE);
1008 }
1009 
1010 #define	BACKEND_CREATE_LOCKED		-2
1011 #define	BACKEND_CREATE_FAIL		-1
1012 #define	BACKEND_CREATE_SUCCESS		0
1013 #define	BACKEND_CREATE_READONLY		1
1014 #define	BACKEND_CREATE_NEED_INIT	2
1015 static int
1016 backend_create(backend_type_t backend_id, const char *db_file,
1017     sqlite_backend_t **bep)
1018 {
1019 	char *errp;
1020 	char *integrity_results = NULL;
1021 	sqlite_backend_t *be;
1022 	int r;
1023 	uint32_t val = -1UL;
1024 	struct run_single_int_info info;
1025 	int fd;
1026 
1027 	assert(backend_id >= 0 && backend_id < BACKEND_TYPE_TOTAL);
1028 
1029 	be = &be_info[backend_id];
1030 	assert(be->be_db == NULL);
1031 
1032 	(void) pthread_mutex_init(&be->be_lock, NULL);
1033 	(void) pthread_mutex_lock(&be->be_lock);
1034 
1035 	be->be_type = backend_id;
1036 	be->be_path = strdup(db_file);
1037 	if (be->be_path == NULL) {
1038 		perror("malloc");
1039 		goto fail;
1040 	}
1041 
1042 	be->be_db = sqlite_open(be->be_path, 0600, &errp);
1043 
1044 	if (be->be_db == NULL) {
1045 		if (strstr(errp, "out of memory") != NULL) {
1046 			configd_critical("%s: %s\n", db_file, errp);
1047 			free(errp);
1048 
1049 			goto fail;
1050 		}
1051 
1052 		/* report it as an integrity failure */
1053 		integrity_results = errp;
1054 		errp = NULL;
1055 		goto integrity_fail;
1056 	}
1057 
1058 	/*
1059 	 * check if we are inited and of the correct schema version
1060 	 *
1061 	 * Eventually, we'll support schema upgrade here.
1062 	 */
1063 	info.rs_out = &val;
1064 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1065 
1066 	r = sqlite_exec(be->be_db, "SELECT schema_version FROM schema_version;",
1067 	    run_single_int_callback, &info, &errp);
1068 	if (r == SQLITE_ERROR &&
1069 	    strcmp("no such table: schema_version", errp) == 0) {
1070 		free(errp);
1071 		/*
1072 		 * Could be an empty repository, could be pre-schema_version
1073 		 * schema.  Check for id_tbl, which has always been there.
1074 		 */
1075 		r = sqlite_exec(be->be_db, "SELECT count() FROM id_tbl;",
1076 		    NULL, NULL, &errp);
1077 		if (r == SQLITE_ERROR &&
1078 		    strcmp("no such table: id_tbl", errp) == 0) {
1079 			free(errp);
1080 			*bep = be;
1081 			return (BACKEND_CREATE_NEED_INIT);
1082 		}
1083 
1084 		configd_critical("%s: schema version mismatch\n", db_file);
1085 		goto fail;
1086 	}
1087 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1088 		free(errp);
1089 		*bep = NULL;
1090 		backend_destroy(be);
1091 		return (BACKEND_CREATE_LOCKED);
1092 	}
1093 	if (r == SQLITE_OK) {
1094 		if (info.rs_result == REP_PROTOCOL_FAIL_NOT_FOUND ||
1095 		    val != BACKEND_SCHEMA_VERSION) {
1096 			configd_critical("%s: schema version mismatch\n",
1097 			    db_file);
1098 			goto fail;
1099 		}
1100 	}
1101 
1102 	/*
1103 	 * pull in the whole database sequentially.
1104 	 */
1105 	if ((fd = open(db_file, O_RDONLY)) >= 0) {
1106 		size_t sz = 64 * 1024;
1107 		char *buffer = malloc(sz);
1108 		if (buffer != NULL) {
1109 			while (read(fd, buffer, sz) > 0)
1110 				;
1111 			free(buffer);
1112 		}
1113 		(void) close(fd);
1114 	}
1115 
1116 	/*
1117 	 * run an integrity check
1118 	 */
1119 	r = sqlite_exec(be->be_db, "PRAGMA integrity_check;",
1120 	    backend_integrity_callback, &integrity_results, &errp);
1121 
1122 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1123 		free(errp);
1124 		*bep = NULL;
1125 		backend_destroy(be);
1126 		return (BACKEND_CREATE_LOCKED);
1127 	}
1128 	if (r == SQLITE_ABORT) {
1129 		free(errp);
1130 		errp = NULL;
1131 		integrity_results = "out of memory running integrity check\n";
1132 	} else if (r != SQLITE_OK && integrity_results == NULL) {
1133 		integrity_results = errp;
1134 		errp = NULL;
1135 	}
1136 
1137 integrity_fail:
1138 	if (integrity_results != NULL) {
1139 		const char *fname = "/etc/svc/volatile/db_errors";
1140 		if ((fd = open(fname, O_CREAT|O_WRONLY|O_APPEND, 0600)) < 0) {
1141 			fname = NULL;
1142 		} else {
1143 			if (backend_fd_write(fd, "\n\n") < 0 ||
1144 			    backend_fd_write(fd, db_file) < 0 ||
1145 			    backend_fd_write(fd,
1146 			    ": PRAGMA integrity_check; failed.  Results:\n") <
1147 			    0 || backend_fd_write(fd, integrity_results) < 0 ||
1148 			    backend_fd_write(fd, "\n\n") < 0) {
1149 				fname = NULL;
1150 			}
1151 			(void) close(fd);
1152 		}
1153 
1154 		if (!is_main_repository ||
1155 		    backend_id == BACKEND_TYPE_NONPERSIST) {
1156 			if (fname != NULL)
1157 				configd_critical(
1158 				    "%s: integrity check failed. Details in "
1159 				    "%s\n", db_file, fname);
1160 			else
1161 				configd_critical(
1162 				    "%s: integrity check failed: %s\n",
1163 				    db_file);
1164 		} else {
1165 			(void) fprintf(stderr,
1166 "\n"
1167 "svc.configd: smf(5) database integrity check of:\n"
1168 "\n"
1169 "    %s\n"
1170 "\n"
1171 "  failed. The database might be damaged or a media error might have\n"
1172 "  prevented it from being verified.  Additional information useful to\n"
1173 "  your service provider%s%s\n"
1174 "\n"
1175 "  The system will not be able to boot until you have restored a working\n"
1176 "  database.  svc.startd(1M) will provide a sulogin(1M) prompt for recovery\n"
1177 "  purposes.  The command:\n"
1178 "\n"
1179 "    /lib/svc/bin/restore_repository\n"
1180 "\n"
1181 "  can be run to restore a backup version of your repository.  See\n"
1182 "  http://sun.com/msg/SMF-8000-MY for more information.\n"
1183 "\n",
1184 			db_file,
1185 			(fname == NULL)? ":\n\n" : " is in:\n\n    ",
1186 			(fname == NULL)? integrity_results : fname);
1187 		}
1188 		free(errp);
1189 		goto fail;
1190 	}
1191 
1192 	/*
1193 	 * check if we are writable
1194 	 */
1195 	r = backend_is_readonly(be->be_db, &errp);
1196 
1197 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1198 		free(errp);
1199 		*bep = NULL;
1200 		backend_destroy(be);
1201 		return (BACKEND_CREATE_LOCKED);
1202 	}
1203 	if (r != SQLITE_OK && r != SQLITE_FULL) {
1204 		free(errp);
1205 		be->be_readonly = 1;
1206 		*bep = be;
1207 		return (BACKEND_CREATE_READONLY);
1208 	}
1209 	*bep = be;
1210 	return (BACKEND_CREATE_SUCCESS);
1211 
1212 fail:
1213 	*bep = NULL;
1214 	backend_destroy(be);
1215 	return (BACKEND_CREATE_FAIL);
1216 }
1217 
1218 /*
1219  * (arg & -arg) is, through the magic of twos-complement arithmetic, the
1220  * lowest set bit in arg.
1221  */
1222 static size_t
1223 round_up_to_p2(size_t arg)
1224 {
1225 	/*
1226 	 * Don't allow a zero result.
1227 	 */
1228 	assert(arg > 0 && ((ssize_t)arg > 0));
1229 
1230 	while ((arg & (arg - 1)) != 0)
1231 		arg += (arg & -arg);
1232 
1233 	return (arg);
1234 }
1235 
1236 /*
1237  * Returns
1238  *   _NO_RESOURCES - out of memory
1239  *   _BACKEND_ACCESS - backend type t (other than _NORMAL) doesn't exist
1240  *   _DONE - callback aborted query
1241  *   _SUCCESS
1242  */
1243 int
1244 backend_run(backend_type_t t, backend_query_t *q,
1245     backend_run_callback_f *cb, void *data)
1246 {
1247 	char *errmsg = NULL;
1248 	int ret;
1249 	sqlite_backend_t *be;
1250 	hrtime_t ts, vts;
1251 
1252 	if (q == NULL || q->bq_buf == NULL)
1253 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1254 
1255 	if ((ret = backend_lock(t, 0, &be)) != REP_PROTOCOL_SUCCESS)
1256 		return (ret);
1257 
1258 	ts = gethrtime();
1259 	vts = gethrvtime();
1260 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
1261 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1262 	ret = backend_error(be, ret, errmsg);
1263 	backend_unlock(be);
1264 
1265 	return (ret);
1266 }
1267 
1268 /*
1269  * Starts a "read-only" transaction -- i.e., locks out writers as long
1270  * as it is active.
1271  *
1272  * Fails with
1273  *   _NO_RESOURCES - out of memory
1274  *
1275  * If t is not _NORMAL, can also fail with
1276  *   _BACKEND_ACCESS - backend does not exist
1277  *
1278  * If writable is true, can also fail with
1279  *   _BACKEND_READONLY
1280  */
1281 static int
1282 backend_tx_begin_common(backend_type_t t, backend_tx_t **txp, int writable)
1283 {
1284 	backend_tx_t *ret;
1285 	sqlite_backend_t *be;
1286 	int r;
1287 
1288 	*txp = NULL;
1289 
1290 	ret = uu_zalloc(sizeof (*ret));
1291 	if (ret == NULL)
1292 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1293 
1294 	if ((r = backend_lock(t, writable, &be)) != REP_PROTOCOL_SUCCESS) {
1295 		uu_free(ret);
1296 		return (r);
1297 	}
1298 
1299 	ret->bt_be = be;
1300 	ret->bt_readonly = !writable;
1301 	ret->bt_type = t;
1302 	ret->bt_full = 0;
1303 
1304 	*txp = ret;
1305 	return (REP_PROTOCOL_SUCCESS);
1306 }
1307 
1308 int
1309 backend_tx_begin_ro(backend_type_t t, backend_tx_t **txp)
1310 {
1311 	return (backend_tx_begin_common(t, txp, 0));
1312 }
1313 
1314 static void
1315 backend_tx_end(backend_tx_t *tx)
1316 {
1317 	sqlite_backend_t *be;
1318 
1319 	be = tx->bt_be;
1320 
1321 	if (tx->bt_full) {
1322 		struct sqlite *new;
1323 
1324 		/*
1325 		 * sqlite tends to be sticky with SQLITE_FULL, so we try
1326 		 * to get a fresh database handle if we got a FULL warning
1327 		 * along the way.  If that fails, no harm done.
1328 		 */
1329 		new = sqlite_open(be->be_path, 0600, NULL);
1330 		if (new != NULL) {
1331 			sqlite_close(be->be_db);
1332 			be->be_db = new;
1333 		}
1334 	}
1335 	backend_unlock(be);
1336 	tx->bt_be = NULL;
1337 	uu_free(tx);
1338 }
1339 
1340 void
1341 backend_tx_end_ro(backend_tx_t *tx)
1342 {
1343 	assert(tx->bt_readonly);
1344 	backend_tx_end(tx);
1345 }
1346 
1347 /*
1348  * Fails with
1349  *   _NO_RESOURCES - out of memory
1350  *   _BACKEND_ACCESS
1351  *   _BACKEND_READONLY
1352  */
1353 int
1354 backend_tx_begin(backend_type_t t, backend_tx_t **txp)
1355 {
1356 	int r;
1357 	char *errmsg;
1358 	hrtime_t ts, vts;
1359 
1360 	r = backend_tx_begin_common(t, txp, 1);
1361 	if (r != REP_PROTOCOL_SUCCESS)
1362 		return (r);
1363 
1364 	ts = gethrtime();
1365 	vts = gethrvtime();
1366 	r = sqlite_exec((*txp)->bt_be->be_db, "BEGIN TRANSACTION", NULL, NULL,
1367 	    &errmsg);
1368 	UPDATE_TOTALS((*txp)->bt_be, bt_exec, ts, vts);
1369 	if (r == SQLITE_FULL)
1370 		(*txp)->bt_full = 1;
1371 	r = backend_error((*txp)->bt_be, r, errmsg);
1372 
1373 	if (r != REP_PROTOCOL_SUCCESS) {
1374 		assert(r != REP_PROTOCOL_DONE);
1375 		(void) sqlite_exec((*txp)->bt_be->be_db,
1376 		    "ROLLBACK TRANSACTION", NULL, NULL, NULL);
1377 		backend_tx_end(*txp);
1378 		*txp = NULL;
1379 		return (r);
1380 	}
1381 
1382 	(*txp)->bt_readonly = 0;
1383 
1384 	return (REP_PROTOCOL_SUCCESS);
1385 }
1386 
1387 void
1388 backend_tx_rollback(backend_tx_t *tx)
1389 {
1390 	int r;
1391 	char *errmsg;
1392 	sqlite_backend_t *be;
1393 	hrtime_t ts, vts;
1394 
1395 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1396 	be = tx->bt_be;
1397 
1398 	ts = gethrtime();
1399 	vts = gethrvtime();
1400 	r = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
1401 	    &errmsg);
1402 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1403 	if (r == SQLITE_FULL)
1404 		tx->bt_full = 1;
1405 	(void) backend_error(be, r, errmsg);
1406 
1407 	backend_tx_end(tx);
1408 }
1409 
1410 /*
1411  * Fails with
1412  *   _NO_RESOURCES - out of memory
1413  */
1414 int
1415 backend_tx_commit(backend_tx_t *tx)
1416 {
1417 	int r, r2;
1418 	char *errmsg;
1419 	sqlite_backend_t *be;
1420 	hrtime_t ts, vts;
1421 
1422 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1423 	be = tx->bt_be;
1424 	ts = gethrtime();
1425 	vts = gethrvtime();
1426 	r = sqlite_exec(be->be_db, "COMMIT TRANSACTION", NULL, NULL,
1427 	    &errmsg);
1428 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1429 	if (r == SQLITE_FULL)
1430 		tx->bt_full = 1;
1431 
1432 	r = backend_error(be, r, errmsg);
1433 	assert(r != REP_PROTOCOL_DONE);
1434 
1435 	if (r != REP_PROTOCOL_SUCCESS) {
1436 		r2 = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
1437 		    &errmsg);
1438 		r2 = backend_error(be, r2, errmsg);
1439 		if (r2 != REP_PROTOCOL_SUCCESS)
1440 			backend_panic("cannot rollback failed commit");
1441 
1442 		backend_tx_end(tx);
1443 		return (r);
1444 	}
1445 	backend_tx_end(tx);
1446 	return (REP_PROTOCOL_SUCCESS);
1447 }
1448 
1449 static const char *
1450 id_space_to_name(enum id_space id)
1451 {
1452 	switch (id) {
1453 	case BACKEND_ID_SERVICE_INSTANCE:
1454 		return ("SI");
1455 	case BACKEND_ID_PROPERTYGRP:
1456 		return ("PG");
1457 	case BACKEND_ID_GENERATION:
1458 		return ("GEN");
1459 	case BACKEND_ID_PROPERTY:
1460 		return ("PROP");
1461 	case BACKEND_ID_VALUE:
1462 		return ("VAL");
1463 	case BACKEND_ID_SNAPNAME:
1464 		return ("SNAME");
1465 	case BACKEND_ID_SNAPSHOT:
1466 		return ("SHOT");
1467 	case BACKEND_ID_SNAPLEVEL:
1468 		return ("SLVL");
1469 	default:
1470 		abort();
1471 		/*NOTREACHED*/
1472 	}
1473 }
1474 
1475 /*
1476  * Returns a new id or 0 if the id argument is invalid or the query fails.
1477  */
1478 uint32_t
1479 backend_new_id(backend_tx_t *tx, enum id_space id)
1480 {
1481 	struct run_single_int_info info;
1482 	uint32_t new_id = 0;
1483 	const char *name = id_space_to_name(id);
1484 	char *errmsg;
1485 	int ret;
1486 	sqlite_backend_t *be;
1487 	hrtime_t ts, vts;
1488 
1489 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1490 	be = tx->bt_be;
1491 
1492 	info.rs_out = &new_id;
1493 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1494 
1495 	ts = gethrtime();
1496 	vts = gethrvtime();
1497 	ret = sqlite_exec_printf(be->be_db,
1498 	    "SELECT id_next FROM id_tbl WHERE (id_name = '%q');"
1499 	    "UPDATE id_tbl SET id_next = id_next + 1 WHERE (id_name = '%q');",
1500 	    run_single_int_callback, &info, &errmsg, name, name);
1501 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1502 	if (ret == SQLITE_FULL)
1503 		tx->bt_full = 1;
1504 
1505 	ret = backend_error(be, ret, errmsg);
1506 
1507 	if (ret != REP_PROTOCOL_SUCCESS) {
1508 		return (0);
1509 	}
1510 
1511 	return (new_id);
1512 }
1513 
1514 /*
1515  * Returns
1516  *   _NO_RESOURCES - out of memory
1517  *   _DONE - callback aborted query
1518  *   _SUCCESS
1519  */
1520 int
1521 backend_tx_run(backend_tx_t *tx, backend_query_t *q,
1522     backend_run_callback_f *cb, void *data)
1523 {
1524 	char *errmsg = NULL;
1525 	int ret;
1526 	sqlite_backend_t *be;
1527 	hrtime_t ts, vts;
1528 
1529 	assert(tx != NULL && tx->bt_be != NULL);
1530 	be = tx->bt_be;
1531 
1532 	if (q == NULL || q->bq_buf == NULL)
1533 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1534 
1535 	ts = gethrtime();
1536 	vts = gethrvtime();
1537 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
1538 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1539 	if (ret == SQLITE_FULL)
1540 		tx->bt_full = 1;
1541 	ret = backend_error(be, ret, errmsg);
1542 
1543 	return (ret);
1544 }
1545 
1546 /*
1547  * Returns
1548  *   _NO_RESOURCES - out of memory
1549  *   _NOT_FOUND - the query returned no results
1550  *   _SUCCESS - the query returned a single integer
1551  */
1552 int
1553 backend_tx_run_single_int(backend_tx_t *tx, backend_query_t *q, uint32_t *buf)
1554 {
1555 	struct run_single_int_info info;
1556 	int ret;
1557 
1558 	info.rs_out = buf;
1559 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1560 
1561 	ret = backend_tx_run(tx, q, run_single_int_callback, &info);
1562 	assert(ret != REP_PROTOCOL_DONE);
1563 
1564 	if (ret != REP_PROTOCOL_SUCCESS)
1565 		return (ret);
1566 
1567 	return (info.rs_result);
1568 }
1569 
1570 /*
1571  * Fails with
1572  *   _NO_RESOURCES - out of memory
1573  */
1574 int
1575 backend_tx_run_update(backend_tx_t *tx, const char *format, ...)
1576 {
1577 	va_list a;
1578 	char *errmsg;
1579 	int ret;
1580 	sqlite_backend_t *be;
1581 	hrtime_t ts, vts;
1582 
1583 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1584 	be = tx->bt_be;
1585 
1586 	va_start(a, format);
1587 	ts = gethrtime();
1588 	vts = gethrvtime();
1589 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
1590 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1591 	if (ret == SQLITE_FULL)
1592 		tx->bt_full = 1;
1593 	va_end(a);
1594 	ret = backend_error(be, ret, errmsg);
1595 	assert(ret != REP_PROTOCOL_DONE);
1596 
1597 	return (ret);
1598 }
1599 
1600 /*
1601  * returns REP_PROTOCOL_FAIL_NOT_FOUND if no changes occured
1602  */
1603 int
1604 backend_tx_run_update_changed(backend_tx_t *tx, const char *format, ...)
1605 {
1606 	va_list a;
1607 	char *errmsg;
1608 	int ret;
1609 	sqlite_backend_t *be;
1610 	hrtime_t ts, vts;
1611 
1612 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1613 	be = tx->bt_be;
1614 
1615 	va_start(a, format);
1616 	ts = gethrtime();
1617 	vts = gethrvtime();
1618 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
1619 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1620 	if (ret == SQLITE_FULL)
1621 		tx->bt_full = 1;
1622 	va_end(a);
1623 
1624 	ret = backend_error(be, ret, errmsg);
1625 
1626 	return (ret);
1627 }
1628 
1629 #define	BACKEND_ADD_SCHEMA(be, file, tbls, idxs) \
1630 	(backend_add_schema((be), (file), \
1631 	    (tbls), sizeof (tbls) / sizeof (*(tbls)), \
1632 	    (idxs), sizeof (idxs) / sizeof (*(idxs))))
1633 
1634 static int
1635 backend_add_schema(sqlite_backend_t *be, const char *file,
1636     struct backend_tbl_info *tbls, int tbl_count,
1637     struct backend_idx_info *idxs, int idx_count)
1638 {
1639 	int i;
1640 	char *errmsg;
1641 	int ret;
1642 
1643 	/*
1644 	 * Create the tables.
1645 	 */
1646 	for (i = 0; i < tbl_count; i++) {
1647 		if (tbls[i].bti_name == NULL) {
1648 			assert(i + 1 == tbl_count);
1649 			break;
1650 		}
1651 		ret = sqlite_exec_printf(be->be_db,
1652 		    "CREATE TABLE %s (%s);\n",
1653 		    NULL, NULL, &errmsg, tbls[i].bti_name, tbls[i].bti_cols);
1654 
1655 		if (ret != SQLITE_OK) {
1656 			configd_critical(
1657 			    "%s: %s table creation fails: %s\n", file,
1658 			    tbls[i].bti_name, errmsg);
1659 			free(errmsg);
1660 			return (-1);
1661 		}
1662 	}
1663 
1664 	/*
1665 	 * Make indices on key tables and columns.
1666 	 */
1667 	for (i = 0; i < idx_count; i++) {
1668 		if (idxs[i].bxi_tbl == NULL) {
1669 			assert(i + 1 == idx_count);
1670 			break;
1671 		}
1672 
1673 		ret = sqlite_exec_printf(be->be_db,
1674 		    "CREATE INDEX %s_%s ON %s (%s);\n",
1675 		    NULL, NULL, &errmsg, idxs[i].bxi_tbl, idxs[i].bxi_idx,
1676 		    idxs[i].bxi_tbl, idxs[i].bxi_cols);
1677 
1678 		if (ret != SQLITE_OK) {
1679 			configd_critical(
1680 			    "%s: %s_%s index creation fails: %s\n", file,
1681 			    idxs[i].bxi_tbl, idxs[i].bxi_idx, errmsg);
1682 			free(errmsg);
1683 			return (-1);
1684 		}
1685 	}
1686 	return (0);
1687 }
1688 
1689 static int
1690 backend_init_schema(sqlite_backend_t *be, const char *db_file, backend_type_t t)
1691 {
1692 	int i;
1693 	char *errmsg;
1694 	int ret;
1695 
1696 	assert(t == BACKEND_TYPE_NORMAL || t == BACKEND_TYPE_NONPERSIST);
1697 
1698 	if (t == BACKEND_TYPE_NORMAL) {
1699 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_normal, idxs_normal);
1700 	} else if (t == BACKEND_TYPE_NONPERSIST) {
1701 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_np, idxs_np);
1702 	} else {
1703 		abort();		/* can't happen */
1704 	}
1705 
1706 	if (ret < 0) {
1707 		return (ret);
1708 	}
1709 
1710 	ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_common, idxs_common);
1711 	if (ret < 0) {
1712 		return (ret);
1713 	}
1714 
1715 	/*
1716 	 * Add the schema version to the table
1717 	 */
1718 	ret = sqlite_exec_printf(be->be_db,
1719 	    "INSERT INTO schema_version (schema_version) VALUES (%d)",
1720 	    NULL, NULL, &errmsg, BACKEND_SCHEMA_VERSION);
1721 	if (ret != SQLITE_OK) {
1722 		configd_critical(
1723 		    "setting schema version fails: %s\n", errmsg);
1724 		free(errmsg);
1725 	}
1726 
1727 	/*
1728 	 * Populate id_tbl with initial IDs.
1729 	 */
1730 	for (i = 0; i < BACKEND_ID_INVALID; i++) {
1731 		const char *name = id_space_to_name(i);
1732 
1733 		ret = sqlite_exec_printf(be->be_db,
1734 		    "INSERT INTO id_tbl (id_name, id_next) "
1735 		    "VALUES ('%q', %d);", NULL, NULL, &errmsg, name, 1);
1736 		if (ret != SQLITE_OK) {
1737 			configd_critical(
1738 			    "id insertion for %s fails: %s\n", name, errmsg);
1739 			free(errmsg);
1740 			return (-1);
1741 		}
1742 	}
1743 	/*
1744 	 * Set the persistance of the database.  The normal database is marked
1745 	 * "synchronous", so that all writes are synchronized to stable storage
1746 	 * before proceeding.
1747 	 */
1748 	ret = sqlite_exec_printf(be->be_db,
1749 	    "PRAGMA default_synchronous = %s; PRAGMA synchronous = %s;",
1750 	    NULL, NULL, &errmsg,
1751 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF",
1752 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF");
1753 	if (ret != SQLITE_OK) {
1754 		configd_critical("pragma setting fails: %s\n", errmsg);
1755 		free(errmsg);
1756 		return (-1);
1757 	}
1758 
1759 	return (0);
1760 }
1761 
1762 int
1763 backend_init(const char *db_file, const char *npdb_file, int have_np)
1764 {
1765 	sqlite_backend_t *be;
1766 	int r;
1767 	int writable_persist = 1;
1768 
1769 	/* set up our temporary directory */
1770 	sqlite_temp_directory = "/etc/svc/volatile";
1771 
1772 	if (strcmp(SQLITE_VERSION, sqlite_version) != 0) {
1773 		configd_critical("Mismatched link!  (%s should be %s)\n",
1774 		    sqlite_version, SQLITE_VERSION);
1775 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
1776 	}
1777 	if (db_file == NULL)
1778 		db_file = REPOSITORY_DB;
1779 
1780 	r = backend_create(BACKEND_TYPE_NORMAL, db_file, &be);
1781 	switch (r) {
1782 	case BACKEND_CREATE_FAIL:
1783 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
1784 	case BACKEND_CREATE_LOCKED:
1785 		return (CONFIGD_EXIT_DATABASE_LOCKED);
1786 	case BACKEND_CREATE_SUCCESS:
1787 		break;		/* success */
1788 	case BACKEND_CREATE_READONLY:
1789 		writable_persist = 0;
1790 		break;
1791 	case BACKEND_CREATE_NEED_INIT:
1792 		if (backend_init_schema(be, db_file, BACKEND_TYPE_NORMAL)) {
1793 			backend_destroy(be);
1794 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
1795 		}
1796 		break;
1797 	default:
1798 		abort();
1799 		/*NOTREACHED*/
1800 	}
1801 	backend_create_finish(BACKEND_TYPE_NORMAL, be);
1802 
1803 	if (have_np) {
1804 		if (npdb_file == NULL)
1805 			npdb_file = NONPERSIST_DB;
1806 
1807 		r = backend_create(BACKEND_TYPE_NONPERSIST, npdb_file, &be);
1808 		switch (r) {
1809 		case BACKEND_CREATE_SUCCESS:
1810 			break;		/* success */
1811 		case BACKEND_CREATE_FAIL:
1812 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
1813 		case BACKEND_CREATE_LOCKED:
1814 			return (CONFIGD_EXIT_DATABASE_LOCKED);
1815 		case BACKEND_CREATE_READONLY:
1816 			configd_critical("%s: unable to write\n", npdb_file);
1817 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
1818 		case BACKEND_CREATE_NEED_INIT:
1819 			if (backend_init_schema(be, db_file,
1820 			    BACKEND_TYPE_NONPERSIST)) {
1821 				backend_destroy(be);
1822 				return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
1823 			}
1824 			break;
1825 		default:
1826 			abort();
1827 			/*NOTREACHED*/
1828 		}
1829 		backend_create_finish(BACKEND_TYPE_NONPERSIST, be);
1830 
1831 		/*
1832 		 * If we started up with a writable filesystem, but the
1833 		 * non-persistent database needed initialization, we
1834 		 * are booting a non-global zone, so do a backup.
1835 		 */
1836 		if (r == BACKEND_CREATE_NEED_INIT && writable_persist &&
1837 		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
1838 		    REP_PROTOCOL_SUCCESS) {
1839 			if (backend_create_backup_locked(be,
1840 			    REPOSITORY_BOOT_BACKUP) != REP_PROTOCOL_SUCCESS) {
1841 				configd_critical(
1842 				    "unable to create \"%s\" backup of "
1843 				    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
1844 				    be->be_path);
1845 			}
1846 			backend_unlock(be);
1847 		}
1848 	}
1849 	return (CONFIGD_EXIT_OKAY);
1850 }
1851 
1852 /*
1853  * quiesce all database activity prior to exiting
1854  */
1855 void
1856 backend_fini(void)
1857 {
1858 	sqlite_backend_t *be_normal, *be_np;
1859 
1860 	(void) backend_lock(BACKEND_TYPE_NORMAL, 1, &be_normal);
1861 	(void) backend_lock(BACKEND_TYPE_NONPERSIST, 1, &be_np);
1862 }
1863 
1864 #define	QUERY_BASE	128
1865 backend_query_t *
1866 backend_query_alloc(void)
1867 {
1868 	backend_query_t *q;
1869 	q = calloc(1, sizeof (backend_query_t));
1870 	if (q != NULL) {
1871 		q->bq_size = QUERY_BASE;
1872 		q->bq_buf = calloc(1, q->bq_size);
1873 		if (q->bq_buf == NULL) {
1874 			q->bq_size = 0;
1875 		}
1876 
1877 	}
1878 	return (q);
1879 }
1880 
1881 void
1882 backend_query_append(backend_query_t *q, const char *value)
1883 {
1884 	char *alloc;
1885 	int count;
1886 	size_t size, old_len;
1887 
1888 	if (q == NULL) {
1889 		/* We'll discover the error when we try to run the query. */
1890 		return;
1891 	}
1892 
1893 	while (q->bq_buf != NULL) {
1894 		old_len = strlen(q->bq_buf);
1895 		size = q->bq_size;
1896 		count = strlcat(q->bq_buf, value, size);
1897 
1898 		if (count < size)
1899 			break;				/* success */
1900 
1901 		q->bq_buf[old_len] = 0;
1902 		size = round_up_to_p2(count + 1);
1903 
1904 		assert(size > q->bq_size);
1905 		alloc = realloc(q->bq_buf, size);
1906 		if (alloc == NULL) {
1907 			free(q->bq_buf);
1908 			q->bq_buf = NULL;
1909 			break;				/* can't grow */
1910 		}
1911 
1912 		q->bq_buf = alloc;
1913 		q->bq_size = size;
1914 	}
1915 }
1916 
1917 void
1918 backend_query_add(backend_query_t *q, const char *format, ...)
1919 {
1920 	va_list args;
1921 	char *new;
1922 
1923 	if (q == NULL || q->bq_buf == NULL)
1924 		return;
1925 
1926 	va_start(args, format);
1927 	new = sqlite_vmprintf(format, args);
1928 	va_end(args);
1929 
1930 	if (new == NULL) {
1931 		free(q->bq_buf);
1932 		q->bq_buf = NULL;
1933 		return;
1934 	}
1935 
1936 	backend_query_append(q, new);
1937 
1938 	free(new);
1939 }
1940 
1941 void
1942 backend_query_free(backend_query_t *q)
1943 {
1944 	if (q != NULL) {
1945 		if (q->bq_buf != NULL) {
1946 			free(q->bq_buf);
1947 		}
1948 		free(q);
1949 	}
1950 }
1951