xref: /titanic_50/usr/src/cmd/svc/configd/backend.c (revision 3d729aecc03ea6ebb9bd5d56b8dccd24f57daa41)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * sqlite is not compatible with _FILE_OFFSET_BITS=64, but we need to
29  * be able to statvfs(2) possibly large systems.  This define gives us
30  * access to the transitional interfaces.  See lfcompile64(5) for how
31  * _LARGEFILE64_SOURCE works.
32  */
33 #define	_LARGEFILE64_SOURCE
34 
35 #include <assert.h>
36 #include <atomic.h>
37 #include <door.h>
38 #include <dirent.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <limits.h>
42 #include <pthread.h>
43 #include <stdarg.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <sys/stat.h>
49 #include <sys/statvfs.h>
50 #include <time.h>
51 #include <unistd.h>
52 #include <zone.h>
53 #include <libscf_priv.h>
54 
55 #include "configd.h"
56 #include "repcache_protocol.h"
57 
58 #include <sqlite.h>
59 #include <sqlite-misc.h>
60 
61 /*
62  * This file has two purposes:
63  *
64  * 1. It contains the database schema, and the code for setting up our backend
65  *    databases, including installing said schema.
66  *
67  * 2. It provides a simplified interface to the SQL database library, and
68  *    synchronizes MT access to the database.
69  */
70 
71 #define	IS_VOLATILE(be)		((be)->be_ppath != NULL)
72 #define	MAX_FLIGHT_RECORDER_EVENTS	100
73 
74 typedef enum backend_switch_results {
75 	BACKEND_SWITCH_FATAL =	-1,
76 	BACKEND_SWITCH_OK =	0,
77 	BACKEND_SWITCH_RO
78 } backend_switch_results_t;
79 
80 typedef struct backend_spent {
81 	uint64_t bs_count;
82 	hrtime_t bs_time;
83 	hrtime_t bs_vtime;
84 } backend_spent_t;
85 
86 typedef struct backend_totals {
87 	backend_spent_t	bt_lock;	/* waiting for lock */
88 	backend_spent_t	bt_exec;	/* time spent executing SQL */
89 } backend_totals_t;
90 
91 /*
92  * There are times when svcadm asks configd to move the BACKEND_TYPE_NORMAL
93  * repository to volatile storage.  See backend_switch().  When the
94  * repository is on volatile storage, we save the location of the permanent
95  * repository in be_ppath.  We use the saved path when the time comes to
96  * move the repository back.  When the repository is on permanent storage,
97  * be_ppath is set to NULL.  Also see the definition of IS_VOLATILE() above
98  * for testing if the repository is on volatile storage.
99  */
100 typedef struct sqlite_backend {
101 	pthread_mutex_t	be_lock;
102 	pthread_t	be_thread;	/* thread holding lock */
103 	struct sqlite	*be_db;
104 	const char	*be_path;	/* path to db */
105 	const char	*be_ppath;	/* saved path to persistent db when */
106 					/* backend is volatile */
107 	const char	*be_checkpoint;	/* path to repository checkpoint */
108 	int		be_readonly;	/* readonly at start, and still is */
109 	int		be_writing;	/* held for writing */
110 	backend_type_t	be_type;	/* type of db */
111 	hrtime_t	be_lastcheck;	/* time of last read-only check */
112 	backend_totals_t be_totals[2];	/* one for reading, one for writing */
113 } sqlite_backend_t;
114 
115 struct backend_tx {
116 	sqlite_backend_t	*bt_be;
117 	int			bt_readonly;
118 	int			bt_type;
119 	int			bt_full;	/* SQLITE_FULL during tx */
120 };
121 
122 #define	UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
123 	backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
124 	__bsp->bs_count++;						\
125 	__bsp->bs_time += (gethrtime() - ts);				\
126 	__bsp->bs_vtime += (gethrvtime() - vts);			\
127 }
128 
129 #define	UPDATE_TOTALS(sb, field, ts, vts) \
130 	UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
131 
132 struct backend_query {
133 	char	*bq_buf;
134 	size_t	bq_size;
135 };
136 
137 struct backend_tbl_info {
138 	const char *bti_name;
139 	const char *bti_cols;
140 };
141 
142 struct backend_idx_info {
143 	const char *bxi_tbl;
144 	const char *bxi_idx;
145 	const char *bxi_cols;
146 };
147 
148 /* Definitions for the flight recorder: */
149 
150 typedef enum be_flight_type {
151 	BE_FLIGHT_EV_NOEVENT = 0,	/* No event yet recorded. */
152 	BE_FLIGHT_EV_BACKUP,		/* Information about repo. backup */
153 	BE_FLIGHT_EV_BACKUP_ENTER,	/* Enter */
154 					/* backend_create_backup_locked() */
155 	BE_FLIGHT_EV_CHECKPOINT,	/* Request to checkpoint repository */
156 					/* for boot time backup */
157 	BE_FLIGHT_EV_CHECKPOINT_EXISTS,	/* Existing checkpoint detected on */
158 					/* restart */
159 	BE_FLIGHT_EV_LINGERING_FAST,	/* Use lingering fast repository */
160 	BE_FLIGHT_EV_NO_BACKUP,		/* Requested backup not made */
161 	BE_FLIGHT_EV_REPO_CREATE,	/* Main repository created */
162 	BE_FLIGHT_EV_RESTART,		/* This is a restart of configd */
163 	BE_FLIGHT_EV_SWITCH,		/* Switch repositories */
164 	BE_FLIGHT_EV_TRANS_RW		/* Root transitioned to read/write */
165 } be_flight_type_t;
166 
167 typedef enum be_flight_status {
168 	BE_FLIGHT_ST_INFO = 0,		/* No status.  Event is informative */
169 	BE_FLIGHT_ST_BOOT_BACKUP,	/* Boot time backup */
170 	BE_FLIGHT_ST_CHECKPOINT_BACKUP,	/* Backup from checkpoint */
171 	BE_FLIGHT_ST_CLIENT,		/* Request form client as opposed to */
172 					/* internal call */
173 	BE_FLIGHT_ST_DUPLICATE,		/* Backup duplicates existing one */
174 	BE_FLIGHT_ST_FAIL,		/* Operation failed. */
175 	BE_FLIGHT_ST_FAST,		/* Fast repository (tmpfs) */
176 	BE_FLIGHT_ST_MI_BACKUP,		/* Manifest-import backup */
177 	BE_FLIGHT_ST_NO_SWITCH,		/* Don't switch repositories */
178 	BE_FLIGHT_ST_OTHER_BACKUP,	/* Other type of backup */
179 	BE_FLIGHT_ST_PERMANENT,		/* Repository on permanet storage */
180 	BE_FLIGHT_ST_REPO_BACKUP,	/* Backup from repository */
181 	BE_FLIGHT_ST_RO,		/* Main repository is read-only */
182 	BE_FLIGHT_ST_RW,		/* Main repository is read/write */
183 	BE_FLIGHT_ST_SUCCESS,		/* Operation was successful */
184 	BE_FLIGHT_ST_SWITCH		/* Switch repository */
185 } be_flight_status_t;
186 
187 typedef struct be_flight_event {
188 	be_flight_type_t	bfe_type;	/* Type of event. */
189 	be_flight_status_t	bfe_status;	/* Result of the event. */
190 	time_t			bfe_time;	/* Time of the event. */
191 	uint_t			bfe_sequence;	/* Sequence number. */
192 } be_flight_event_t;
193 
194 static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
195 static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
196 pthread_t backend_panic_thread = 0;
197 
198 int backend_do_trace = 0;		/* invoke tracing callback */
199 int backend_print_trace = 0;		/* tracing callback prints SQL */
200 int backend_panic_abort = 0;		/* abort when panicking */
201 
202 /* Data for the flight_recorder. */
203 
204 static pthread_mutex_t backend_flight_recorder_lock = PTHREAD_MUTEX_INITIALIZER;
205 static be_flight_event_t flight_recorder[MAX_FLIGHT_RECORDER_EVENTS];
206 static uint_t flight_recorder_next = 0;
207 static uint_t flight_recorder_missed = 0;
208 static uint_t flight_recorder_sequence = 0;
209 
210 /* interval between read-only checks while starting up */
211 #define	BACKEND_READONLY_CHECK_INTERVAL	(2 * (hrtime_t)NANOSEC)
212 
213 /*
214  * Any incompatible change to the below schema should bump the version number.
215  * The schema has been changed to support value ordering,  but this change
216  * is backwards-compatible - i.e. a previous svc.configd can use a
217  * repository database with the new schema perfectly well.  As a result,
218  * the schema version has not been updated,  allowing downgrade of systems
219  * without losing repository data.
220  */
221 #define	BACKEND_SCHEMA_VERSION		5
222 
223 static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
224 	/*
225 	 * service_tbl holds all services.  svc_id is the identifier of the
226 	 * service.
227 	 */
228 	{
229 		"service_tbl",
230 		"svc_id          INTEGER PRIMARY KEY,"
231 		"svc_name        CHAR(256) NOT NULL"
232 	},
233 
234 	/*
235 	 * instance_tbl holds all of the instances.  The parent service id
236 	 * is instance_svc.
237 	 */
238 	{
239 		"instance_tbl",
240 		"instance_id     INTEGER PRIMARY KEY,"
241 		"instance_name   CHAR(256) NOT NULL,"
242 		"instance_svc    INTEGER NOT NULL"
243 	},
244 
245 	/*
246 	 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
247 	 */
248 	{
249 		"snapshot_lnk_tbl",
250 		"lnk_id          INTEGER PRIMARY KEY,"
251 		"lnk_inst_id     INTEGER NOT NULL,"
252 		"lnk_snap_name   CHAR(256) NOT NULL,"
253 		"lnk_snap_id     INTEGER NOT NULL"
254 	},
255 
256 	/*
257 	 * snaplevel_tbl maps a snapshot id to a set of named, ordered
258 	 * snaplevels.
259 	 */
260 	{
261 		"snaplevel_tbl",
262 		"snap_id                 INTEGER NOT NULL,"
263 		"snap_level_num          INTEGER NOT NULL,"
264 		"snap_level_id           INTEGER NOT NULL,"
265 		"snap_level_service_id   INTEGER NOT NULL,"
266 		"snap_level_service      CHAR(256) NOT NULL,"
267 		"snap_level_instance_id  INTEGER NULL,"
268 		"snap_level_instance     CHAR(256) NULL"
269 	},
270 
271 	/*
272 	 * snaplevel_lnk_tbl links snaplevels to property groups.
273 	 * snaplvl_pg_* is identical to the original property group,
274 	 * and snaplvl_gen_id overrides the generation number.
275 	 * The service/instance ids are as in the snaplevel.
276 	 */
277 	{
278 		"snaplevel_lnk_tbl",
279 		"snaplvl_level_id INTEGER NOT NULL,"
280 		"snaplvl_pg_id    INTEGER NOT NULL,"
281 		"snaplvl_pg_name  CHAR(256) NOT NULL,"
282 		"snaplvl_pg_type  CHAR(256) NOT NULL,"
283 		"snaplvl_pg_flags INTEGER NOT NULL,"
284 		"snaplvl_gen_id   INTEGER NOT NULL"
285 	},
286 
287 	{ NULL, NULL }
288 };
289 
290 static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
291 	{ "service_tbl",	"name",	"svc_name" },
292 	{ "instance_tbl",	"name",	"instance_svc, instance_name" },
293 	{ "snapshot_lnk_tbl",	"name",	"lnk_inst_id, lnk_snap_name" },
294 	{ "snapshot_lnk_tbl",	"snapid", "lnk_snap_id" },
295 	{ "snaplevel_tbl",	"id",	"snap_id" },
296 	{ "snaplevel_lnk_tbl",	"id",	"snaplvl_pg_id" },
297 	{ "snaplevel_lnk_tbl",	"level", "snaplvl_level_id" },
298 	{ NULL, NULL, NULL }
299 };
300 
301 static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
302 	{ NULL, NULL }
303 };
304 
305 static struct backend_idx_info idxs_np[] = {	/* BACKEND_TYPE_NONPERSIST */
306 	{ NULL, NULL, NULL }
307 };
308 
309 static struct backend_tbl_info tbls_common[] = { /* all backend types */
310 	/*
311 	 * pg_tbl defines property groups.  They are associated with a single
312 	 * service or instance.  The pg_gen_id links them with the latest
313 	 * "edited" version of its properties.
314 	 */
315 	{
316 		"pg_tbl",
317 		"pg_id           INTEGER PRIMARY KEY,"
318 		"pg_parent_id    INTEGER NOT NULL,"
319 		"pg_name         CHAR(256) NOT NULL,"
320 		"pg_type         CHAR(256) NOT NULL,"
321 		"pg_flags        INTEGER NOT NULL,"
322 		"pg_gen_id       INTEGER NOT NULL"
323 	},
324 
325 	/*
326 	 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
327 	 * (prop_name, prop_type, val_id) trios.
328 	 */
329 	{
330 		"prop_lnk_tbl",
331 		"lnk_prop_id     INTEGER PRIMARY KEY,"
332 		"lnk_pg_id       INTEGER NOT NULL,"
333 		"lnk_gen_id      INTEGER NOT NULL,"
334 		"lnk_prop_name   CHAR(256) NOT NULL,"
335 		"lnk_prop_type   CHAR(2) NOT NULL,"
336 		"lnk_val_id      INTEGER"
337 	},
338 
339 	/*
340 	 * value_tbl maps a value_id to a set of values.  For any given
341 	 * value_id, value_type is constant.  The table definition here
342 	 * is repeated in backend_check_upgrade(),  and must be kept in-sync.
343 	 */
344 	{
345 		"value_tbl",
346 		"value_id        INTEGER NOT NULL,"
347 		"value_type      CHAR(1) NOT NULL,"
348 		"value_value     VARCHAR NOT NULL,"
349 		"value_order     INTEGER DEFAULT 0"
350 	},
351 
352 	/*
353 	 * id_tbl has one row per id space
354 	 */
355 	{
356 		"id_tbl",
357 		"id_name         STRING NOT NULL,"
358 		"id_next         INTEGER NOT NULL"
359 	},
360 
361 	/*
362 	 * schema_version has a single row, which contains
363 	 * BACKEND_SCHEMA_VERSION at the time of creation.
364 	 */
365 	{
366 		"schema_version",
367 		"schema_version  INTEGER"
368 	},
369 	{ NULL, NULL }
370 };
371 
372 /*
373  * The indexing of value_tbl is repeated in backend_check_upgrade() and
374  * must be kept in sync with the indexing specification here.
375  */
376 static struct backend_idx_info idxs_common[] = { /* all backend types */
377 	{ "pg_tbl",		"parent", "pg_parent_id" },
378 	{ "pg_tbl",		"name",	"pg_parent_id, pg_name" },
379 	{ "pg_tbl",		"type",	"pg_parent_id, pg_type" },
380 	{ "prop_lnk_tbl",	"base",	"lnk_pg_id, lnk_gen_id" },
381 	{ "prop_lnk_tbl",	"val",	"lnk_val_id" },
382 	{ "value_tbl",		"id",	"value_id" },
383 	{ "id_tbl",		"id",	"id_name" },
384 	{ NULL, NULL, NULL }
385 };
386 
387 struct run_single_int_info {
388 	uint32_t	*rs_out;
389 	int		rs_result;
390 };
391 
392 static rep_protocol_responseid_t backend_copy_repository(const char *,
393     const char *, int);
394 static rep_protocol_responseid_t backend_do_copy(const char *, int,
395     const char *, int, size_t *);
396 
397 /*
398  * The flight recorder keeps track of events that happen primarily while
399  * the system is booting.  Once the system is up an running, one can take a
400  * gcore(1) of configd and examine the events with mdb.  Since we're most
401  * interested in early boot events, we stop recording events when the
402  * recorder is full.
403  */
404 static void
405 flight_recorder_event(be_flight_type_t type, be_flight_status_t res)
406 {
407 	be_flight_event_t *data;
408 	uint_t item;
409 	uint_t sequence;
410 
411 	if (pthread_mutex_lock(&backend_flight_recorder_lock) != 0) {
412 		atomic_inc_uint(&flight_recorder_missed);
413 		return;
414 	}
415 	if (flight_recorder_next >= MAX_FLIGHT_RECORDER_EVENTS) {
416 		/* Hit end of the array.  No more event recording. */
417 		item = flight_recorder_next;
418 	} else {
419 		item = flight_recorder_next++;
420 		sequence = flight_recorder_sequence++;
421 	}
422 	(void) pthread_mutex_unlock(&backend_flight_recorder_lock);
423 
424 	if (item >= MAX_FLIGHT_RECORDER_EVENTS) {
425 		/* Array is filled.  Stop recording events */
426 		atomic_inc_uint(&flight_recorder_missed);
427 		return;
428 	}
429 	data = &flight_recorder[item];
430 	(void) memset(data, 0, sizeof (*data));
431 	data->bfe_type = type;
432 	data->bfe_status = res;
433 	data->bfe_sequence = sequence;
434 	data->bfe_time = time(NULL);
435 }
436 
437 /*ARGSUSED*/
438 static int
439 run_single_int_callback(void *arg, int columns, char **vals, char **names)
440 {
441 	struct run_single_int_info *info = arg;
442 	uint32_t val;
443 
444 	char *endptr = vals[0];
445 
446 	assert(info->rs_result != REP_PROTOCOL_SUCCESS);
447 	assert(columns == 1);
448 
449 	if (vals[0] == NULL)
450 		return (BACKEND_CALLBACK_CONTINUE);
451 
452 	errno = 0;
453 	val = strtoul(vals[0], &endptr, 10);
454 	if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
455 		backend_panic("malformed integer \"%20s\"", vals[0]);
456 
457 	*info->rs_out = val;
458 	info->rs_result = REP_PROTOCOL_SUCCESS;
459 	return (BACKEND_CALLBACK_CONTINUE);
460 }
461 
462 /*ARGSUSED*/
463 int
464 backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
465 {
466 	return (BACKEND_CALLBACK_ABORT);
467 }
468 
469 /*
470  * check to see if we can successfully start a transaction;  if not, the
471  * filesystem is mounted read-only.
472  */
473 static int
474 backend_is_readonly(struct sqlite *db, const char *path)
475 {
476 	int r;
477 	statvfs64_t stat;
478 
479 	if (statvfs64(path, &stat) == 0 && (stat.f_flag & ST_RDONLY))
480 		return (SQLITE_READONLY);
481 
482 	r = sqlite_exec(db,
483 	    "BEGIN TRANSACTION; "
484 	    "UPDATE schema_version SET schema_version = schema_version; ",
485 	    NULL, NULL, NULL);
486 	(void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
487 	return (r);
488 }
489 
490 static void
491 backend_trace_sql(void *arg, const char *sql)
492 {
493 	sqlite_backend_t *be = arg;
494 
495 	if (backend_print_trace) {
496 		(void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
497 	}
498 }
499 
500 static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
501 static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
502 
503 /*
504  * For a native build,  repositories are created from scratch, so upgrade
505  * is not an issue.  This variable is implicitly protected by
506  * bes[BACKEND_TYPE_NORMAL]->be_lock.
507  */
508 #ifdef NATIVE_BUILD
509 static boolean_t be_normal_upgraded = B_TRUE;
510 #else
511 static boolean_t be_normal_upgraded = B_FALSE;
512 #endif	/* NATIVE_BUILD */
513 
514 /*
515  * Has backend been upgraded? In nonpersistent case, answer is always
516  * yes.
517  */
518 boolean_t
519 backend_is_upgraded(backend_tx_t *bt)
520 {
521 	if (bt->bt_type == BACKEND_TYPE_NONPERSIST)
522 		return (B_TRUE);
523 	return (be_normal_upgraded);
524 }
525 
526 #define	BACKEND_PANIC_TIMEOUT	(50 * MILLISEC)
527 /*
528  * backend_panic() -- some kind of database problem or corruption has been hit.
529  * We attempt to quiesce the other database users -- all of the backend sql
530  * entry points will call backend_panic(NULL) if a panic is in progress, as
531  * will any attempt to start a transaction.
532  *
533  * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
534  * either drop the lock or call backend_panic().  If they don't respond in
535  * time, we'll just exit anyway.
536  */
537 void
538 backend_panic(const char *format, ...)
539 {
540 	int i;
541 	va_list args;
542 	int failed = 0;
543 
544 	(void) pthread_mutex_lock(&backend_panic_lock);
545 	if (backend_panic_thread != 0) {
546 		(void) pthread_mutex_unlock(&backend_panic_lock);
547 		/*
548 		 * first, drop any backend locks we're holding, then
549 		 * sleep forever on the panic_cv.
550 		 */
551 		for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
552 			if (bes[i] != NULL &&
553 			    bes[i]->be_thread == pthread_self())
554 				(void) pthread_mutex_unlock(&bes[i]->be_lock);
555 		}
556 		(void) pthread_mutex_lock(&backend_panic_lock);
557 		for (;;)
558 			(void) pthread_cond_wait(&backend_panic_cv,
559 			    &backend_panic_lock);
560 	}
561 	backend_panic_thread = pthread_self();
562 	(void) pthread_mutex_unlock(&backend_panic_lock);
563 
564 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
565 		if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
566 			(void) pthread_mutex_unlock(&bes[i]->be_lock);
567 	}
568 
569 	va_start(args, format);
570 	configd_vcritical(format, args);
571 	va_end(args);
572 
573 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
574 		timespec_t rel;
575 
576 		rel.tv_sec = 0;
577 		rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
578 
579 		if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
580 			if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
581 			    &rel) != 0)
582 				failed++;
583 		}
584 	}
585 	if (failed) {
586 		configd_critical("unable to quiesce database\n");
587 	}
588 
589 	if (backend_panic_abort)
590 		abort();
591 
592 	exit(CONFIGD_EXIT_DATABASE_BAD);
593 }
594 
595 /*
596  * Returns
597  *   _SUCCESS
598  *   _DONE - callback aborted query
599  *   _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
600  */
601 static int
602 backend_error(sqlite_backend_t *be, int error, char *errmsg)
603 {
604 	if (error == SQLITE_OK)
605 		return (REP_PROTOCOL_SUCCESS);
606 
607 	switch (error) {
608 	case SQLITE_ABORT:
609 		free(errmsg);
610 		return (REP_PROTOCOL_DONE);
611 
612 	case SQLITE_NOMEM:
613 	case SQLITE_FULL:
614 	case SQLITE_TOOBIG:
615 		free(errmsg);
616 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
617 
618 	default:
619 		backend_panic("%s: db error: %s", be->be_path, errmsg);
620 		/*NOTREACHED*/
621 	}
622 }
623 
624 static void
625 backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
626 {
627 	char **out = (char **)out_arg;
628 
629 	while (out_sz-- > 0)
630 		free(*out++);
631 	free(out_arg);
632 }
633 
634 /*
635  * builds a inverse-time-sorted array of backup files.  The path is a
636  * a single buffer, and the pointers look like:
637  *
638  *	/this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
639  *	^pathname		^	       ^(pathname+pathlen)
640  *				basename
641  *
642  * dirname will either be pathname, or ".".
643  *
644  * Returns the number of elements in the array, 0 if there are no previous
645  * backups, or -1 on error.
646  */
647 static ssize_t
648 backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
649 {
650 	char b_start, b_end;
651 	DIR *dir;
652 	char **out = NULL;
653 	char *name, *p;
654 	char *dirname, *basename;
655 	char *pathend;
656 	struct dirent *ent;
657 
658 	size_t count = 0;
659 	size_t baselen;
660 
661 	/*
662 	 * year, month, day, hour, min, sec, plus an '_'.
663 	 */
664 	const size_t ndigits = 4 + 5*2 + 1;
665 	const size_t baroffset = 4 + 2*2;
666 
667 	size_t idx;
668 
669 	pathend = pathname + pathlen;
670 	b_end = *pathend;
671 	*pathend = '\0';
672 
673 	basename = strrchr(pathname, '/');
674 
675 	if (basename != NULL) {
676 		assert(pathend > pathname && basename < pathend);
677 		basename++;
678 		dirname = pathname;
679 	} else {
680 		basename = pathname;
681 		dirname = ".";
682 	}
683 
684 	baselen = strlen(basename);
685 
686 	/*
687 	 * munge the string temporarily for the opendir(), then restore it.
688 	 */
689 	b_start = basename[0];
690 
691 	basename[0] = '\0';
692 	dir = opendir(dirname);
693 	basename[0] = b_start;		/* restore path */
694 
695 	if (dir == NULL)
696 		goto fail;
697 
698 
699 	while ((ent = readdir(dir)) != NULL) {
700 		/*
701 		 * Must match:
702 		 *	basename-YYYYMMDD_HHMMSS
703 		 * or we ignore it.
704 		 */
705 		if (strncmp(ent->d_name, basename, baselen) != 0)
706 			continue;
707 
708 		name = ent->d_name;
709 		if (name[baselen] != '-')
710 			continue;
711 
712 		p = name + baselen + 1;
713 
714 		for (idx = 0; idx < ndigits; idx++) {
715 			char c = p[idx];
716 			if (idx == baroffset && c != '_')
717 				break;
718 			if (idx != baroffset && (c < '0' || c > '9'))
719 				break;
720 		}
721 		if (idx != ndigits || p[idx] != '\0')
722 			continue;
723 
724 		/*
725 		 * We have a match.  insertion-sort it into our list.
726 		 */
727 		name = strdup(name);
728 		if (name == NULL)
729 			goto fail_closedir;
730 		p = strrchr(name, '-');
731 
732 		for (idx = 0; idx < count; idx++) {
733 			char *tmp = out[idx];
734 			char *tp = strrchr(tmp, '-');
735 
736 			int cmp = strcmp(p, tp);
737 			if (cmp == 0)
738 				cmp = strcmp(name, tmp);
739 
740 			if (cmp == 0) {
741 				free(name);
742 				name = NULL;
743 				break;
744 			} else if (cmp > 0) {
745 				out[idx] = name;
746 				name = tmp;
747 				p = tp;
748 			}
749 		}
750 
751 		if (idx == count) {
752 			char **new_out = realloc(out,
753 			    (count + 1) * sizeof (*out));
754 
755 			if (new_out == NULL) {
756 				free(name);
757 				goto fail_closedir;
758 			}
759 
760 			out = new_out;
761 			out[count++] = name;
762 		} else {
763 			assert(name == NULL);
764 		}
765 	}
766 	(void) closedir(dir);
767 
768 	basename[baselen] = b_end;
769 
770 	*out_arg = (const char **)out;
771 	return (count);
772 
773 fail_closedir:
774 	(void) closedir(dir);
775 fail:
776 	basename[0] = b_start;
777 	*pathend = b_end;
778 
779 	backend_backup_cleanup((const char **)out, count);
780 
781 	*out_arg = NULL;
782 	return (-1);
783 }
784 
785 /*
786  * Copies the repository path into out, a buffer of out_len bytes,
787  * removes the ".db" (or whatever) extension, and, if name is non-NULL,
788  * appends "-name" to it.  If name is non-NULL, it can fail with:
789  *
790  *	_TRUNCATED	will not fit in buffer.
791  *	_BAD_REQUEST	name is not a valid identifier
792  */
793 static rep_protocol_responseid_t
794 backend_backup_base(sqlite_backend_t *be, const char *name,
795     char *out, size_t out_len)
796 {
797 	char *p, *q;
798 	size_t len;
799 
800 	/*
801 	 * for paths of the form /path/to/foo.db, we truncate at the final
802 	 * '.'.
803 	 */
804 	(void) strlcpy(out, IS_VOLATILE(be) ? be->be_ppath : be->be_path,
805 	    out_len);
806 
807 	p = strrchr(out, '/');
808 	q = strrchr(out, '.');
809 
810 	if (p != NULL && q != NULL && q > p)
811 		*q = 0;
812 
813 	if (name != NULL) {
814 		len = strlen(out);
815 		assert(len < out_len);
816 
817 		out += len;
818 		out_len -= len;
819 
820 		len = strlen(name);
821 
822 		/*
823 		 * verify that the name tag is entirely alphabetic,
824 		 * non-empty, and not too long.
825 		 */
826 		if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
827 		    uu_check_name(name, UU_NAME_DOMAIN) < 0)
828 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
829 
830 		if (snprintf(out, out_len, "-%s", name) >= out_len)
831 			return (REP_PROTOCOL_FAIL_TRUNCATED);
832 	}
833 
834 	return (REP_PROTOCOL_SUCCESS);
835 }
836 
837 /*
838  * Make a checkpoint of the repository, so that we can use it for a backup
839  * when the root file system becomes read/write.  We'll first copy the
840  * repository into a temporary file and then rename it to
841  * REPOSITORY_CHECKPOINT.  This is protection against configd crashing in
842  * the middle of the copy and leaving a partial copy at
843  * REPOSITORY_CHECKPOINT.  Renames are atomic.
844  */
845 static rep_protocol_responseid_t
846 backend_checkpoint_repository(sqlite_backend_t *be)
847 {
848 	rep_protocol_responseid_t r;
849 
850 	assert(be->be_readonly);	/* Only need a checkpoint if / is ro */
851 	assert(be->be_type == BACKEND_TYPE_NORMAL);
852 	assert(be->be_checkpoint == NULL); /* Only 1 checkpoint */
853 
854 	r = backend_copy_repository(be->be_path, REPOSITORY_CHECKPOINT, 0);
855 	if (r == REP_PROTOCOL_SUCCESS)
856 		be->be_checkpoint = REPOSITORY_CHECKPOINT;
857 
858 	flight_recorder_event(BE_FLIGHT_EV_CHECKPOINT,
859 	    r == REP_PROTOCOL_SUCCESS ? BE_FLIGHT_ST_SUCCESS :
860 	    BE_FLIGHT_ST_FAIL);
861 
862 	return (r);
863 }
864 
865 /*
866  * See if a backup is needed.  We do a backup unless both files are
867  * byte-for-byte identical.
868  */
869 static int
870 backend_check_backup_needed(const char *rep_name, const char *backup_name)
871 {
872 	int repfd = open(rep_name, O_RDONLY);
873 	int fd = open(backup_name, O_RDONLY);
874 	struct stat s_rep, s_backup;
875 	int c1, c2;
876 
877 	FILE *f_rep = NULL;
878 	FILE *f_backup = NULL;
879 
880 	if (repfd < 0 || fd < 0)
881 		goto fail;
882 
883 	if (fstat(repfd, &s_rep) < 0 || fstat(fd, &s_backup) < 0)
884 		goto fail;
885 
886 	/*
887 	 * if they are the same file, we need to do a backup to break the
888 	 * hard link or symlink involved.
889 	 */
890 	if (s_rep.st_ino == s_backup.st_ino && s_rep.st_dev == s_backup.st_dev)
891 		goto fail;
892 
893 	if (s_rep.st_size != s_backup.st_size)
894 		goto fail;
895 
896 	if ((f_rep = fdopen(repfd, "r")) == NULL ||
897 	    (f_backup = fdopen(fd, "r")) == NULL)
898 		goto fail;
899 
900 	do {
901 		c1 = getc(f_rep);
902 		c2 = getc(f_backup);
903 		if (c1 != c2)
904 			goto fail;
905 	} while (c1 != EOF);
906 
907 	if (!ferror(f_rep) && !ferror(f_backup)) {
908 		(void) fclose(f_rep);
909 		(void) fclose(f_backup);
910 		(void) close(repfd);
911 		(void) close(fd);
912 		return (0);
913 	}
914 
915 fail:
916 	if (f_rep != NULL)
917 		(void) fclose(f_rep);
918 	if (f_backup != NULL)
919 		(void) fclose(f_backup);
920 	if (repfd >= 0)
921 		(void) close(repfd);
922 	if (fd >= 0)
923 		(void) close(fd);
924 	return (1);
925 }
926 
927 /*
928  * This interface is called to perform the actual copy
929  *
930  * Return:
931  *	_FAIL_UNKNOWN		read/write fails
932  *	_FAIL_NO_RESOURCES	out of memory
933  *	_SUCCESS		copy succeeds
934  */
935 static rep_protocol_responseid_t
936 backend_do_copy(const char *src, int srcfd, const char *dst,
937     int dstfd, size_t *sz)
938 {
939 	char *buf;
940 	off_t nrd, nwr, n, r_off = 0, w_off = 0;
941 
942 	if ((buf = malloc(8192)) == NULL)
943 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
944 
945 	while ((nrd = read(srcfd, buf, 8192)) != 0) {
946 		if (nrd < 0) {
947 			if (errno == EINTR)
948 				continue;
949 
950 			configd_critical(
951 			    "Backend copy failed: fails to read from %s "
952 			    "at offset %d: %s\n", src, r_off, strerror(errno));
953 			free(buf);
954 			return (REP_PROTOCOL_FAIL_UNKNOWN);
955 		}
956 
957 		r_off += nrd;
958 
959 		nwr = 0;
960 		do {
961 			if ((n = write(dstfd, &buf[nwr], nrd - nwr)) < 0) {
962 				if (errno == EINTR)
963 					continue;
964 
965 				configd_critical(
966 				    "Backend copy failed: fails to write to %s "
967 				    "at offset %d: %s\n", dst, w_off,
968 				    strerror(errno));
969 				free(buf);
970 				return (REP_PROTOCOL_FAIL_UNKNOWN);
971 			}
972 
973 			nwr += n;
974 			w_off += n;
975 
976 		} while (nwr < nrd);
977 	}
978 
979 	if (sz)
980 		*sz = w_off;
981 
982 	free(buf);
983 	return (REP_PROTOCOL_SUCCESS);
984 }
985 
986 /*
987  * Can return:
988  *	_BAD_REQUEST		name is not valid
989  *	_TRUNCATED		name is too long for current repository path
990  *	_UNKNOWN		failed for unknown reason (details written to
991  *				console)
992  *	_BACKEND_READONLY	backend is not writable
993  *	_NO_RESOURCES		out of memory
994  *	_SUCCESS		Backup completed successfully.
995  */
996 static rep_protocol_responseid_t
997 backend_create_backup_locked(sqlite_backend_t *be, const char *name)
998 {
999 	const char **old_list;
1000 	ssize_t old_sz;
1001 	ssize_t old_max = max_repository_backups;
1002 	ssize_t cur;
1003 	char *finalname;
1004 	char *finalpath;
1005 	char *tmppath;
1006 	int infd, outfd;
1007 	size_t len;
1008 	time_t now;
1009 	struct tm now_tm;
1010 	be_flight_status_t backup_type;
1011 	rep_protocol_responseid_t result;
1012 	const char *src;
1013 	int use_checkpoint;
1014 
1015 	if (strcmp(name, REPOSITORY_BOOT_BACKUP) == 0) {
1016 		backup_type = BE_FLIGHT_ST_BOOT_BACKUP;
1017 	} else if (strcmp(name, "manifest_import") ==  0) {
1018 		backup_type = BE_FLIGHT_ST_MI_BACKUP;
1019 	} else {
1020 		backup_type = BE_FLIGHT_ST_OTHER_BACKUP;
1021 	}
1022 	flight_recorder_event(BE_FLIGHT_EV_BACKUP_ENTER, backup_type);
1023 
1024 	if ((finalpath = malloc(PATH_MAX)) == NULL)
1025 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1026 
1027 	if ((tmppath = malloc(PATH_MAX)) == NULL) {
1028 		free(finalpath);
1029 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1030 	}
1031 
1032 	if (be->be_readonly) {
1033 		flight_recorder_event(BE_FLIGHT_EV_NO_BACKUP, BE_FLIGHT_ST_RO);
1034 		result = REP_PROTOCOL_FAIL_BACKEND_READONLY;
1035 		goto out;
1036 	}
1037 
1038 	result = backend_backup_base(be, name, finalpath, PATH_MAX);
1039 	if (result != REP_PROTOCOL_SUCCESS)
1040 		goto out;
1041 
1042 	/*
1043 	 * If this is a boot backup and if we made a checkpoint before the
1044 	 * root file system became read/write, then we should use the
1045 	 * checkpoint as the source.  Otherwise, we'll use the actual
1046 	 * repository as the source.
1047 	 */
1048 	if (be->be_checkpoint && name &&
1049 	    strcmp(REPOSITORY_BOOT_BACKUP, name) == 0) {
1050 		backup_type = BE_FLIGHT_ST_CHECKPOINT_BACKUP;
1051 		use_checkpoint = 1;
1052 		src = be->be_checkpoint;
1053 	} else {
1054 		backup_type = BE_FLIGHT_ST_REPO_BACKUP;
1055 		use_checkpoint = 0;
1056 		src = be->be_path;
1057 	}
1058 	flight_recorder_event(BE_FLIGHT_EV_BACKUP, backup_type);
1059 	if (!backend_check_backup_needed(src, finalpath)) {
1060 		/*
1061 		 * No changes, so there is no need for a backup.
1062 		 */
1063 		flight_recorder_event(BE_FLIGHT_EV_NO_BACKUP,
1064 		    BE_FLIGHT_ST_DUPLICATE);
1065 		result = REP_PROTOCOL_SUCCESS;
1066 		goto out;
1067 	}
1068 
1069 	/*
1070 	 * remember the original length, and the basename location
1071 	 */
1072 	len = strlen(finalpath);
1073 	finalname = strrchr(finalpath, '/');
1074 	if (finalname != NULL)
1075 		finalname++;
1076 	else
1077 		finalname = finalpath;
1078 
1079 	(void) strlcpy(tmppath, finalpath, PATH_MAX);
1080 	if (strlcat(tmppath, "-tmpXXXXXX", PATH_MAX) >= PATH_MAX) {
1081 		result = REP_PROTOCOL_FAIL_TRUNCATED;
1082 		goto out;
1083 	}
1084 
1085 	now = time(NULL);
1086 	if (localtime_r(&now, &now_tm) == NULL) {
1087 		configd_critical(
1088 		    "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
1089 		    strerror(errno));
1090 		result = REP_PROTOCOL_FAIL_UNKNOWN;
1091 		goto out;
1092 	}
1093 
1094 	if (strftime(finalpath + len, PATH_MAX - len,
1095 	    "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >= PATH_MAX - len) {
1096 		result = REP_PROTOCOL_FAIL_TRUNCATED;
1097 		goto out;
1098 	}
1099 
1100 	infd = open(src, O_RDONLY);
1101 	if (infd < 0) {
1102 		configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
1103 		    src, strerror(errno));
1104 		result = REP_PROTOCOL_FAIL_UNKNOWN;
1105 		goto out;
1106 	}
1107 
1108 	outfd = mkstemp(tmppath);
1109 	if (outfd < 0) {
1110 		configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
1111 		    name, tmppath, strerror(errno));
1112 		(void) close(infd);
1113 		result = REP_PROTOCOL_FAIL_UNKNOWN;
1114 		goto out;
1115 	}
1116 
1117 	if ((result = backend_do_copy(src, infd, (const char *)tmppath,
1118 	    outfd, NULL)) != REP_PROTOCOL_SUCCESS)
1119 		goto fail;
1120 
1121 	/*
1122 	 * grab the old list before doing our re-name.
1123 	 */
1124 	if (old_max > 0)
1125 		old_sz = backend_backup_get_prev(finalpath, len, &old_list);
1126 
1127 	if (rename(tmppath, finalpath) < 0) {
1128 		configd_critical(
1129 		    "\"%s\" backup failed: rename(%s, %s): %s\n",
1130 		    name, tmppath, finalpath, strerror(errno));
1131 		result = REP_PROTOCOL_FAIL_UNKNOWN;
1132 		goto fail;
1133 	}
1134 
1135 	tmppath[len] = 0;	/* strip -XXXXXX, for reference symlink */
1136 
1137 	(void) unlink(tmppath);
1138 	if (symlink(finalname, tmppath) < 0) {
1139 		configd_critical(
1140 		    "\"%s\" backup completed, but updating "
1141 		    "\"%s\" symlink to \"%s\" failed: %s\n",
1142 		    name, tmppath, finalname, strerror(errno));
1143 	}
1144 
1145 	if (old_max > 0 && old_sz > 0) {
1146 		/* unlink all but the first (old_max - 1) files */
1147 		for (cur = old_max - 1; cur < old_sz; cur++) {
1148 			(void) strlcpy(finalname, old_list[cur],
1149 			    PATH_MAX - (finalname - finalpath));
1150 			if (unlink(finalpath) < 0)
1151 				configd_critical(
1152 				    "\"%s\" backup completed, but removing old "
1153 				    "file \"%s\" failed: %s\n",
1154 				    name, finalpath, strerror(errno));
1155 		}
1156 
1157 		backend_backup_cleanup(old_list, old_sz);
1158 	}
1159 
1160 	result = REP_PROTOCOL_SUCCESS;
1161 	flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_SUCCESS);
1162 
1163 fail:
1164 	(void) close(infd);
1165 	(void) close(outfd);
1166 	if (result != REP_PROTOCOL_SUCCESS) {
1167 		flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_FAIL);
1168 		(void) unlink(tmppath);
1169 	}
1170 
1171 out:
1172 	/* Get rid of the checkpoint file now that we've used it. */
1173 	if (use_checkpoint && (result == REP_PROTOCOL_SUCCESS)) {
1174 		(void) unlink(be->be_checkpoint);
1175 		be->be_checkpoint = NULL;
1176 	}
1177 	free(finalpath);
1178 	free(tmppath);
1179 
1180 	return (result);
1181 }
1182 
1183 /*
1184  * Check if value_tbl has been upgraded in the main database,  and
1185  * if not (if the value_order column is not present),  and do_upgrade is true,
1186  * upgrade value_tbl in repository to contain the additional value_order
1187  * column. The version of sqlite used means ALTER TABLE is not
1188  * available, so we cannot simply use "ALTER TABLE value_tbl ADD COLUMN".
1189  * Rather we need to create a temporary table with the additional column,
1190  * import the value_tbl, drop the original value_tbl, recreate the value_tbl
1191  * with the additional column, import the values from value_tbl_tmp,
1192  * reindex and finally drop value_tbl_tmp.  During boot, we wish to check
1193  * if the repository has been upgraded before it is writable,  so that
1194  * property value retrieval can use the appropriate form of the SELECT
1195  * statement that retrieves property values.  As a result, we need to check
1196  * if the repository has been upgraded prior to the point when we can
1197  * actually carry out the update.
1198  */
1199 void
1200 backend_check_upgrade(sqlite_backend_t *be, boolean_t do_upgrade)
1201 {
1202 	char *errp;
1203 	int r;
1204 
1205 	if (be_normal_upgraded)
1206 		return;
1207 	/*
1208 	 * Test if upgrade is needed. If value_order column does not exist,
1209 	 * we need to upgrade the schema.
1210 	 */
1211 	r = sqlite_exec(be->be_db, "SELECT value_order FROM value_tbl LIMIT 1;",
1212 	    NULL, NULL, NULL);
1213 	if (r == SQLITE_ERROR && do_upgrade) {
1214 		/* No value_order column - needs upgrade */
1215 		configd_info("Upgrading SMF repository format...");
1216 		r = sqlite_exec(be->be_db,
1217 		    "BEGIN TRANSACTION; "
1218 		    "CREATE TABLE value_tbl_tmp ( "
1219 		    "value_id   INTEGER NOT NULL, "
1220 		    "value_type CHAR(1) NOT NULL, "
1221 		    "value_value VARCHAR NOT NULL, "
1222 		    "value_order INTEGER DEFAULT 0); "
1223 		    "INSERT INTO value_tbl_tmp "
1224 		    "(value_id, value_type, value_value) "
1225 		    "SELECT value_id, value_type, value_value FROM value_tbl; "
1226 		    "DROP TABLE value_tbl; "
1227 		    "CREATE TABLE value_tbl( "
1228 		    "value_id   INTEGER NOT NULL, "
1229 		    "value_type CHAR(1) NOT NULL, "
1230 		    "value_value VARCHAR NOT NULL, "
1231 		    "value_order INTEGER DEFAULT 0); "
1232 		    "INSERT INTO value_tbl SELECT * FROM value_tbl_tmp; "
1233 		    "CREATE INDEX value_tbl_id ON value_tbl (value_id); "
1234 		    "DROP TABLE value_tbl_tmp; "
1235 		    "COMMIT TRANSACTION; "
1236 		    "VACUUM; ",
1237 		    NULL, NULL, &errp);
1238 		if (r == SQLITE_OK) {
1239 			configd_info("SMF repository upgrade is complete.");
1240 		} else {
1241 			backend_panic("%s: repository upgrade failed: %s",
1242 			    be->be_path, errp);
1243 			/* NOTREACHED */
1244 		}
1245 	}
1246 	if (r == SQLITE_OK)
1247 		be_normal_upgraded = B_TRUE;
1248 	else
1249 		be_normal_upgraded = B_FALSE;
1250 }
1251 
1252 static int
1253 backend_check_readonly(sqlite_backend_t *be, int writing, hrtime_t t)
1254 {
1255 	const char *check_path;
1256 	char *errp;
1257 	struct sqlite *new;
1258 	int r;
1259 
1260 	assert(be->be_readonly);
1261 	assert(be == bes[BACKEND_TYPE_NORMAL]);
1262 
1263 	/*
1264 	 * If we don't *need* to be writable, only check every once in a
1265 	 * while.
1266 	 */
1267 	if (!writing) {
1268 		if ((uint64_t)(t - be->be_lastcheck) <
1269 		    BACKEND_READONLY_CHECK_INTERVAL)
1270 			return (REP_PROTOCOL_SUCCESS);
1271 		be->be_lastcheck = t;
1272 	}
1273 
1274 	/*
1275 	 * It could be that the repository has been moved to non-persistent
1276 	 * storage for performance reasons.  In this case we need to check
1277 	 * the persistent path to see if it is writable.  The
1278 	 * non-persistent path will always be writable.
1279 	 */
1280 	check_path = IS_VOLATILE(be) ? be->be_ppath : be->be_path;
1281 
1282 	new = sqlite_open(check_path, 0600, &errp);
1283 	if (new == NULL) {
1284 		backend_panic("reopening %s: %s\n", check_path, errp);
1285 		/*NOTREACHED*/
1286 	}
1287 	r = backend_is_readonly(new, check_path);
1288 
1289 	if (r != SQLITE_OK) {
1290 		/*
1291 		 * The underlying storage for the permanent repository is
1292 		 * still read-only, so we don't want to change the state or
1293 		 * move the checkpointed backup if it exists.  On the other
1294 		 * hand if the repository has been copied to volatile
1295 		 * storage, we'll let our caller go ahead and write to the
1296 		 * database.
1297 		 */
1298 		sqlite_close(new);
1299 		if (writing && (IS_VOLATILE(be) == 0))
1300 			return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
1301 		return (REP_PROTOCOL_SUCCESS);
1302 	}
1303 
1304 	/*
1305 	 * We can write!  If the repository is not on volatile storage,
1306 	 * swap the db handles.  Mark ourself as writable, upgrade the
1307 	 * repository if necessary and make a backup.
1308 	 */
1309 	be->be_readonly = 0;
1310 	flight_recorder_event(BE_FLIGHT_EV_TRANS_RW, BE_FLIGHT_ST_RW);
1311 	if (IS_VOLATILE(be)) {
1312 		/*
1313 		 * If the repository is on volatile storage, don't switch
1314 		 * the handles.  We'll continue to use the repository that
1315 		 * is on tmpfs until we're told to move it back by one of
1316 		 * our clients.  Clients, specifically manifest_import,
1317 		 * move the repository to tmpfs for performance reasons,
1318 		 * and that is the reason to not switch it back until we're
1319 		 * told to do so.
1320 		 */
1321 		flight_recorder_event(BE_FLIGHT_EV_TRANS_RW,
1322 		    BE_FLIGHT_ST_NO_SWITCH);
1323 		sqlite_close(new);
1324 	} else {
1325 		flight_recorder_event(BE_FLIGHT_EV_TRANS_RW,
1326 		    BE_FLIGHT_ST_SWITCH);
1327 		sqlite_close(be->be_db);
1328 		be->be_db = new;
1329 	}
1330 
1331 	if (be->be_type == BACKEND_TYPE_NORMAL)
1332 		backend_check_upgrade(be, B_TRUE);
1333 
1334 	if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
1335 	    REP_PROTOCOL_SUCCESS) {
1336 		configd_critical(
1337 		    "unable to create \"%s\" backup of \"%s\"\n",
1338 		    REPOSITORY_BOOT_BACKUP, be->be_path);
1339 	}
1340 
1341 	return (REP_PROTOCOL_SUCCESS);
1342 }
1343 
1344 /*
1345  * If t is not BACKEND_TYPE_NORMAL, can fail with
1346  *   _BACKEND_ACCESS - backend does not exist
1347  *
1348  * If writing is nonzero, can also fail with
1349  *   _BACKEND_READONLY - backend is read-only
1350  */
1351 static int
1352 backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
1353 {
1354 	sqlite_backend_t *be = NULL;
1355 	hrtime_t ts, vts;
1356 
1357 	*bep = NULL;
1358 
1359 	assert(t == BACKEND_TYPE_NORMAL ||
1360 	    t == BACKEND_TYPE_NONPERSIST);
1361 
1362 	be = bes[t];
1363 	if (t == BACKEND_TYPE_NORMAL)
1364 		assert(be != NULL);		/* should always be there */
1365 
1366 	if (be == NULL)
1367 		return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
1368 
1369 	if (backend_panic_thread != 0)
1370 		backend_panic(NULL);		/* don't proceed */
1371 
1372 	ts = gethrtime();
1373 	vts = gethrvtime();
1374 	(void) pthread_mutex_lock(&be->be_lock);
1375 	UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
1376 
1377 	if (backend_panic_thread != 0) {
1378 		(void) pthread_mutex_unlock(&be->be_lock);
1379 		backend_panic(NULL);		/* don't proceed */
1380 	}
1381 	be->be_thread = pthread_self();
1382 
1383 	if (be->be_readonly) {
1384 		int r;
1385 		assert(t == BACKEND_TYPE_NORMAL);
1386 
1387 		r = backend_check_readonly(be, writing, ts);
1388 		if (r != REP_PROTOCOL_SUCCESS) {
1389 			be->be_thread = 0;
1390 			(void) pthread_mutex_unlock(&be->be_lock);
1391 			return (r);
1392 		}
1393 	}
1394 
1395 	if (backend_do_trace)
1396 		(void) sqlite_trace(be->be_db, backend_trace_sql, be);
1397 	else
1398 		(void) sqlite_trace(be->be_db, NULL, NULL);
1399 
1400 	be->be_writing = writing;
1401 	*bep = be;
1402 	return (REP_PROTOCOL_SUCCESS);
1403 }
1404 
1405 static void
1406 backend_unlock(sqlite_backend_t *be)
1407 {
1408 	be->be_writing = 0;
1409 	be->be_thread = 0;
1410 	(void) pthread_mutex_unlock(&be->be_lock);
1411 }
1412 
1413 static void
1414 backend_destroy(sqlite_backend_t *be)
1415 {
1416 	if (be->be_db != NULL) {
1417 		sqlite_close(be->be_db);
1418 		be->be_db = NULL;
1419 	}
1420 	be->be_thread = 0;
1421 	(void) pthread_mutex_unlock(&be->be_lock);
1422 	(void) pthread_mutex_destroy(&be->be_lock);
1423 }
1424 
1425 static void
1426 backend_create_finish(backend_type_t backend_id, sqlite_backend_t *be)
1427 {
1428 	assert(MUTEX_HELD(&be->be_lock));
1429 	assert(be == &be_info[backend_id]);
1430 
1431 	bes[backend_id] = be;
1432 	(void) pthread_mutex_unlock(&be->be_lock);
1433 }
1434 
1435 static int
1436 backend_fd_write(int fd, const char *mess)
1437 {
1438 	int len = strlen(mess);
1439 	int written;
1440 
1441 	while (len > 0) {
1442 		if ((written = write(fd, mess, len)) < 0)
1443 			return (-1);
1444 		mess += written;
1445 		len -= written;
1446 	}
1447 	return (0);
1448 }
1449 
1450 /*
1451  * Can return:
1452  *	_BAD_REQUEST		name is not valid
1453  *	_TRUNCATED		name is too long for current repository path
1454  *	_UNKNOWN		failed for unknown reason (details written to
1455  *				console)
1456  *	_BACKEND_READONLY	backend is not writable
1457  *	_NO_RESOURCES		out of memory
1458  *	_SUCCESS		Backup completed successfully.
1459  */
1460 rep_protocol_responseid_t
1461 backend_create_backup(const char *name)
1462 {
1463 	rep_protocol_responseid_t result;
1464 	sqlite_backend_t *be;
1465 
1466 	flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_CLIENT);
1467 	result = backend_lock(BACKEND_TYPE_NORMAL, 0, &be);
1468 	assert(result == REP_PROTOCOL_SUCCESS);
1469 
1470 	result = backend_create_backup_locked(be, name);
1471 	backend_unlock(be);
1472 
1473 	return (result);
1474 }
1475 
1476 /*
1477  * This function makes a copy of the repository at src, placing the copy at
1478  * dst.  It is used to copy a repository on permanent storage to volatile
1479  * storage or vice versa.  If the source file is on volatile storage, it is
1480  * often times desirable to delete it after the copy has been made and
1481  * verified.  To remove the source repository, set remove_src to 1.
1482  *
1483  * Can return:
1484  *
1485  *	REP_PROTOCOL_SUCCESS		successful copy and rename
1486  *	REP_PROTOCOL_FAIL_UNKNOWN	file operation error
1487  *	REP_PROTOCOL_FAIL_NO_RESOURCES	out of memory
1488  */
1489 static rep_protocol_responseid_t
1490 backend_copy_repository(const char *src, const char *dst, int remove_src)
1491 {
1492 	int srcfd, dstfd;
1493 	char *tmppath = malloc(PATH_MAX);
1494 	rep_protocol_responseid_t res = REP_PROTOCOL_SUCCESS;
1495 	struct stat s_buf;
1496 	size_t cpsz, sz;
1497 
1498 	if (tmppath == NULL) {
1499 		res = REP_PROTOCOL_FAIL_NO_RESOURCES;
1500 		goto out;
1501 	}
1502 
1503 	/*
1504 	 * Create and open the related db files
1505 	 */
1506 	(void) strlcpy(tmppath, dst, PATH_MAX);
1507 	sz = strlcat(tmppath, "-XXXXXX", PATH_MAX);
1508 	assert(sz < PATH_MAX);
1509 	if (sz >= PATH_MAX) {
1510 		configd_critical(
1511 		    "Backend copy failed: strlcat %s: overflow\n", tmppath);
1512 		abort();
1513 	}
1514 
1515 	if ((dstfd = mkstemp(tmppath)) < 0) {
1516 		configd_critical("Backend copy failed: mkstemp %s: %s\n",
1517 		    tmppath, strerror(errno));
1518 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1519 		goto out;
1520 	}
1521 
1522 	if ((srcfd = open(src, O_RDONLY)) < 0) {
1523 		configd_critical("Backend copy failed: opening %s: %s\n",
1524 		    src, strerror(errno));
1525 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1526 		goto errexit;
1527 	}
1528 
1529 	/*
1530 	 * fstat the backend before copy for sanity check.
1531 	 */
1532 	if (fstat(srcfd, &s_buf) < 0) {
1533 		configd_critical("Backend copy failed: fstat %s: %s\n",
1534 		    src, strerror(errno));
1535 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1536 		goto errexit;
1537 	}
1538 
1539 	if ((res = backend_do_copy(src, srcfd, dst, dstfd, &cpsz)) !=
1540 	    REP_PROTOCOL_SUCCESS)
1541 		goto errexit;
1542 
1543 	if (cpsz != s_buf.st_size) {
1544 		configd_critical("Backend copy failed: incomplete copy\n");
1545 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1546 		goto errexit;
1547 	}
1548 
1549 	/*
1550 	 * Rename tmppath to dst
1551 	 */
1552 	if (rename(tmppath, dst) < 0) {
1553 		configd_critical(
1554 		    "Backend copy failed: rename %s to %s: %s\n",
1555 		    tmppath, dst, strerror(errno));
1556 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1557 	}
1558 
1559 errexit:
1560 	if (res != REP_PROTOCOL_SUCCESS && unlink(tmppath) < 0)
1561 		configd_critical(
1562 		    "Backend copy failed: remove %s: %s\n",
1563 		    tmppath, strerror(errno));
1564 
1565 	(void) close(srcfd);
1566 	(void) close(dstfd);
1567 
1568 out:
1569 	free(tmppath);
1570 	if (remove_src) {
1571 		if (unlink(src) < 0)
1572 			configd_critical(
1573 			    "Backend copy failed: remove %s: %s\n",
1574 			    src, strerror(errno));
1575 	}
1576 
1577 	return (res);
1578 }
1579 
1580 /*
1581  * Perform sanity check on the repository.
1582  * Return 0 if check succeeds or -1 if fails.
1583  */
1584 static int
1585 backend_switch_check(struct sqlite *be_db, char **errp)
1586 {
1587 	struct run_single_int_info info;
1588 	uint32_t val = -1UL;
1589 	int r;
1590 
1591 	info.rs_out = &val;
1592 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1593 
1594 	r = sqlite_exec(be_db,
1595 	    "SELECT schema_version FROM schema_version;",
1596 	    run_single_int_callback, &info, errp);
1597 
1598 	if (r == SQLITE_OK &&
1599 	    info.rs_result != REP_PROTOCOL_FAIL_NOT_FOUND &&
1600 	    val == BACKEND_SCHEMA_VERSION)
1601 		return (0);
1602 	else
1603 		return (-1);
1604 }
1605 
1606 /*
1607  * backend_switch() implements the REP_PROTOCOL_SWITCH request from
1608  * clients.  First, it blocks all other clients from accessing the
1609  * repository by calling backend_lock to lock the repository.  It either
1610  * copies the repository from it's permanent storage location
1611  * (REPOSITORY_DB) to its fast volatile location (FAST_REPOSITORY_DB), or
1612  * vice versa.  dir determines the direction of the copy.
1613  *
1614  *	dir = 0	Copy from permanent location to volatile location.
1615  *	dir = 1	Copy from volatile location to permanent location.
1616  *
1617  * Can return:
1618  *	REP_PROTOCOL_SUCCESS			successful switch
1619  *	REP_PROTOCOL_FAIL_BACKEND_ACCESS	backen access fails
1620  *	REP_PROTOCOL_FAIL_BACKEND_READONLY	backend is not writable
1621  *	REP_PROTOCOL_FAIL_UNKNOWN		file operation error
1622  *	REP_PROTOCOL_FAIL_NO_RESOURCES		out of memory
1623  */
1624 rep_protocol_responseid_t
1625 backend_switch(int dir)
1626 {
1627 	rep_protocol_responseid_t result;
1628 	sqlite_backend_t *be;
1629 	struct sqlite *new;
1630 	char *errp;
1631 	const char *dst;
1632 
1633 	flight_recorder_event(BE_FLIGHT_EV_SWITCH, BE_FLIGHT_ST_CLIENT);
1634 
1635 	/*
1636 	 * If switching back to the main repository, lock for writing.
1637 	 * Otherwise, lock for reading.
1638 	 */
1639 	result = backend_lock(BACKEND_TYPE_NORMAL, dir ? 1 : 0,
1640 	    &be);
1641 	if (result != REP_PROTOCOL_SUCCESS)
1642 		return (result);
1643 
1644 	if (dir) {
1645 		flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1646 		    BE_FLIGHT_ST_PERMANENT);
1647 		dst = REPOSITORY_DB;
1648 	} else {
1649 		flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1650 		    BE_FLIGHT_ST_FAST);
1651 		dst = FAST_REPOSITORY_DB;
1652 	}
1653 
1654 	/*
1655 	 * Do the actual copy and rename
1656 	 */
1657 	if (strcmp(be->be_path, dst) == 0) {
1658 		flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1659 		    BE_FLIGHT_ST_DUPLICATE);
1660 		result = REP_PROTOCOL_SUCCESS;
1661 		goto errout;
1662 	}
1663 
1664 	result = backend_copy_repository(be->be_path, dst, dir);
1665 	if (result != REP_PROTOCOL_SUCCESS) {
1666 		goto errout;
1667 	}
1668 
1669 	/*
1670 	 * Do the backend sanity check and switch
1671 	 */
1672 	new = sqlite_open(dst, 0600, &errp);
1673 	if (new != NULL) {
1674 		/*
1675 		 * Sanity check
1676 		 */
1677 		if (backend_switch_check(new, &errp) == 0) {
1678 			free((char *)be->be_path);
1679 			be->be_path = strdup(dst);
1680 			if (be->be_path == NULL) {
1681 				configd_critical(
1682 				    "Backend switch failed: strdup %s: %s\n",
1683 				    dst, strerror(errno));
1684 				result = REP_PROTOCOL_FAIL_NO_RESOURCES;
1685 				sqlite_close(new);
1686 			} else {
1687 				sqlite_close(be->be_db);
1688 				be->be_db = new;
1689 				if (dir) {
1690 					/* We're back on permanent storage. */
1691 					be->be_ppath = NULL;
1692 				} else {
1693 					/*
1694 					 * Repository is now on volatile
1695 					 * storage.  Save the location of
1696 					 * the persistent repository.
1697 					 */
1698 					be->be_ppath = REPOSITORY_DB;
1699 				}
1700 			}
1701 		} else {
1702 			configd_critical(
1703 			    "Backend switch failed: integrity check %s: %s\n",
1704 			    dst, errp);
1705 			result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1706 		}
1707 	} else {
1708 		configd_critical("Backend switch failed: sqlite_open %s: %s\n",
1709 		    dst, errp);
1710 		result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1711 	}
1712 
1713 errout:
1714 	if (result == REP_PROTOCOL_SUCCESS) {
1715 		flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1716 		    BE_FLIGHT_ST_SUCCESS);
1717 	} else {
1718 		flight_recorder_event(BE_FLIGHT_EV_SWITCH, BE_FLIGHT_ST_FAIL);
1719 	}
1720 	backend_unlock(be);
1721 	return (result);
1722 }
1723 
1724 /*
1725  * This routine is called to attempt the recovery of
1726  * the most recent valid repository if possible when configd
1727  * is restarted for some reasons or when system crashes
1728  * during the switch operation.  The repository databases
1729  * referenced here are indicators of successful switch
1730  * operations.
1731  */
1732 static backend_switch_results_t
1733 backend_switch_recovery(void)
1734 {
1735 	const char *fast_db = FAST_REPOSITORY_DB;
1736 	char *errp = NULL;
1737 	struct stat s_buf;
1738 	struct sqlite *be_db;
1739 	int r;
1740 	backend_switch_results_t res = BACKEND_SWITCH_OK;
1741 
1742 	/*
1743 	 * A good transient db containing most recent data can
1744 	 * exist if svc.configd crashes during the
1745 	 * switch operation.  If that is the case, check its
1746 	 * integrity and use it.
1747 	 */
1748 	if (stat(fast_db, &s_buf) < 0) {
1749 		return (BACKEND_SWITCH_OK);
1750 	}
1751 
1752 	/* Determine if persistent repository is read-only */
1753 	be_db = sqlite_open(REPOSITORY_DB, 0600, &errp);
1754 	if (be_db == NULL) {
1755 		configd_critical("Unable to open \"%s\".  %s\n",
1756 		    REPOSITORY_DB, errp == NULL ? "" : errp);
1757 		free(errp);
1758 		return (BACKEND_SWITCH_FATAL);
1759 	}
1760 	r = backend_is_readonly(be_db, REPOSITORY_DB);
1761 	sqlite_close(be_db);
1762 	if (r != SQLITE_OK) {
1763 		if (r == SQLITE_READONLY) {
1764 			return (BACKEND_SWITCH_RO);
1765 		}
1766 		return (BACKEND_SWITCH_FATAL);
1767 	}
1768 
1769 	/*
1770 	 * Do sanity check on the db
1771 	 */
1772 	be_db = sqlite_open(fast_db, 0600, &errp);
1773 
1774 	if (be_db != NULL) {
1775 		if (backend_switch_check(be_db, &errp) == 0) {
1776 			if (backend_copy_repository(fast_db,
1777 			    REPOSITORY_DB, 1) != REP_PROTOCOL_SUCCESS) {
1778 				res = BACKEND_SWITCH_FATAL;
1779 			}
1780 		}
1781 		sqlite_close(be_db);
1782 	}
1783 	free(errp);
1784 
1785 	/*
1786 	 * If we get to this point, the fast_db has either been copied or
1787 	 * it is useless.  Either way, get rid of it.
1788 	 */
1789 	(void) unlink(fast_db);
1790 
1791 	return (res);
1792 }
1793 
1794 /*ARGSUSED*/
1795 static int
1796 backend_integrity_callback(void *private, int narg, char **vals, char **cols)
1797 {
1798 	char **out = private;
1799 	char *old = *out;
1800 	char *new;
1801 	const char *info;
1802 	size_t len;
1803 	int x;
1804 
1805 	for (x = 0; x < narg; x++) {
1806 		if ((info = vals[x]) != NULL &&
1807 		    strcmp(info, "ok") != 0) {
1808 			len = (old == NULL)? 0 : strlen(old);
1809 			len += strlen(info) + 2;	/* '\n' + '\0' */
1810 
1811 			new = realloc(old, len);
1812 			if (new == NULL)
1813 				return (BACKEND_CALLBACK_ABORT);
1814 			if (old == NULL)
1815 				new[0] = 0;
1816 			old = *out = new;
1817 			(void) strlcat(new, info, len);
1818 			(void) strlcat(new, "\n", len);
1819 		}
1820 	}
1821 	return (BACKEND_CALLBACK_CONTINUE);
1822 }
1823 
1824 #define	BACKEND_CREATE_LOCKED		-2
1825 #define	BACKEND_CREATE_FAIL		-1
1826 #define	BACKEND_CREATE_SUCCESS		0
1827 #define	BACKEND_CREATE_READONLY		1
1828 #define	BACKEND_CREATE_NEED_INIT	2
1829 static int
1830 backend_create(backend_type_t backend_id, const char *db_file,
1831     sqlite_backend_t **bep)
1832 {
1833 	char *errp;
1834 	char *integrity_results = NULL;
1835 	sqlite_backend_t *be;
1836 	int r;
1837 	uint32_t val = -1UL;
1838 	struct run_single_int_info info;
1839 	int fd;
1840 
1841 	assert(backend_id >= 0 && backend_id < BACKEND_TYPE_TOTAL);
1842 
1843 	be = &be_info[backend_id];
1844 
1845 	assert(be->be_db == NULL);
1846 
1847 	(void) pthread_mutex_init(&be->be_lock, NULL);
1848 	(void) pthread_mutex_lock(&be->be_lock);
1849 
1850 	be->be_type = backend_id;
1851 	be->be_path = strdup(db_file);
1852 	if (be->be_path == NULL) {
1853 		perror("malloc");
1854 		goto fail;
1855 	}
1856 
1857 	be->be_db = sqlite_open(be->be_path, 0600, &errp);
1858 
1859 	if (be->be_db == NULL) {
1860 		if (strstr(errp, "out of memory") != NULL) {
1861 			configd_critical("%s: %s\n", db_file, errp);
1862 			free(errp);
1863 
1864 			goto fail;
1865 		}
1866 
1867 		/* report it as an integrity failure */
1868 		integrity_results = errp;
1869 		errp = NULL;
1870 		goto integrity_fail;
1871 	}
1872 
1873 	/*
1874 	 * check if we are inited and of the correct schema version
1875 	 *
1876 	 */
1877 	info.rs_out = &val;
1878 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1879 
1880 	r = sqlite_exec(be->be_db, "SELECT schema_version FROM schema_version;",
1881 	    run_single_int_callback, &info, &errp);
1882 	if (r == SQLITE_ERROR &&
1883 	    strcmp("no such table: schema_version", errp) == 0) {
1884 		free(errp);
1885 		/*
1886 		 * Could be an empty repository, could be pre-schema_version
1887 		 * schema.  Check for id_tbl, which has always been there.
1888 		 */
1889 		r = sqlite_exec(be->be_db, "SELECT count() FROM id_tbl;",
1890 		    NULL, NULL, &errp);
1891 		if (r == SQLITE_ERROR &&
1892 		    strcmp("no such table: id_tbl", errp) == 0) {
1893 			free(errp);
1894 			*bep = be;
1895 			return (BACKEND_CREATE_NEED_INIT);
1896 		}
1897 
1898 		configd_critical("%s: schema version mismatch\n", db_file);
1899 		goto fail;
1900 	}
1901 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1902 		free(errp);
1903 		*bep = NULL;
1904 		backend_destroy(be);
1905 		return (BACKEND_CREATE_LOCKED);
1906 	}
1907 	if (r == SQLITE_OK) {
1908 		if (info.rs_result == REP_PROTOCOL_FAIL_NOT_FOUND ||
1909 		    val != BACKEND_SCHEMA_VERSION) {
1910 			configd_critical("%s: schema version mismatch\n",
1911 			    db_file);
1912 			goto fail;
1913 		}
1914 	}
1915 
1916 	/*
1917 	 * pull in the whole database sequentially.
1918 	 */
1919 	if ((fd = open(db_file, O_RDONLY)) >= 0) {
1920 		size_t sz = 64 * 1024;
1921 		char *buffer = malloc(sz);
1922 		if (buffer != NULL) {
1923 			while (read(fd, buffer, sz) > 0)
1924 				;
1925 			free(buffer);
1926 		}
1927 		(void) close(fd);
1928 	}
1929 
1930 	/*
1931 	 * run an integrity check
1932 	 */
1933 	r = sqlite_exec(be->be_db, "PRAGMA integrity_check;",
1934 	    backend_integrity_callback, &integrity_results, &errp);
1935 
1936 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1937 		free(errp);
1938 		*bep = NULL;
1939 		backend_destroy(be);
1940 		return (BACKEND_CREATE_LOCKED);
1941 	}
1942 	if (r == SQLITE_ABORT) {
1943 		free(errp);
1944 		errp = NULL;
1945 		integrity_results = "out of memory running integrity check\n";
1946 	} else if (r != SQLITE_OK && integrity_results == NULL) {
1947 		integrity_results = errp;
1948 		errp = NULL;
1949 	}
1950 
1951 integrity_fail:
1952 	if (integrity_results != NULL) {
1953 		const char *fname = "/etc/svc/volatile/db_errors";
1954 		if ((fd = open(fname, O_CREAT|O_WRONLY|O_APPEND, 0600)) < 0) {
1955 			fname = NULL;
1956 		} else {
1957 			if (backend_fd_write(fd, "\n\n") < 0 ||
1958 			    backend_fd_write(fd, db_file) < 0 ||
1959 			    backend_fd_write(fd,
1960 			    ": PRAGMA integrity_check; failed.  Results:\n") <
1961 			    0 || backend_fd_write(fd, integrity_results) < 0 ||
1962 			    backend_fd_write(fd, "\n\n") < 0) {
1963 				fname = NULL;
1964 			}
1965 			(void) close(fd);
1966 		}
1967 
1968 		if (!is_main_repository ||
1969 		    backend_id == BACKEND_TYPE_NONPERSIST) {
1970 			if (fname != NULL)
1971 				configd_critical(
1972 				    "%s: integrity check failed. Details in "
1973 				    "%s\n", db_file, fname);
1974 			else
1975 				configd_critical(
1976 				    "%s: integrity check failed.\n",
1977 				    db_file);
1978 		} else {
1979 			(void) fprintf(stderr,
1980 "\n"
1981 "svc.configd: smf(5) database integrity check of:\n"
1982 "\n"
1983 "    %s\n"
1984 "\n"
1985 "  failed. The database might be damaged or a media error might have\n"
1986 "  prevented it from being verified.  Additional information useful to\n"
1987 "  your service provider%s%s\n"
1988 "\n"
1989 "  The system will not be able to boot until you have restored a working\n"
1990 "  database.  svc.startd(1M) will provide a sulogin(1M) prompt for recovery\n"
1991 "  purposes.  The command:\n"
1992 "\n"
1993 "    /lib/svc/bin/restore_repository\n"
1994 "\n"
1995 "  can be run to restore a backup version of your repository.  See\n"
1996 "  http://illumos.org/msg/SMF-8000-MY for more information.\n"
1997 "\n",
1998 			    db_file,
1999 			    (fname == NULL)? ":\n\n" : " is in:\n\n    ",
2000 			    (fname == NULL)? integrity_results : fname);
2001 		}
2002 		free(errp);
2003 		goto fail;
2004 	}
2005 
2006 	/*
2007 	 * Simply do check if backend has been upgraded.  We do not wish
2008 	 * to actually carry out upgrade here - the main repository may
2009 	 * not be writable at this point.  Actual upgrade is carried out
2010 	 * via backend_check_readonly().  This check is done so that
2011 	 * we determine repository state - upgraded or not - and then
2012 	 * the appropriate SELECT statement (value-ordered or not)
2013 	 * can be used when retrieving property values early in boot.
2014 	 */
2015 	if (backend_id == BACKEND_TYPE_NORMAL)
2016 		backend_check_upgrade(be, B_FALSE);
2017 	/*
2018 	 * check if we are writable
2019 	 */
2020 	r = backend_is_readonly(be->be_db, be->be_path);
2021 
2022 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
2023 		free(errp);
2024 		*bep = NULL;
2025 		backend_destroy(be);
2026 		return (BACKEND_CREATE_LOCKED);
2027 	}
2028 	if (r != SQLITE_OK && r != SQLITE_FULL) {
2029 		free(errp);
2030 		be->be_readonly = 1;
2031 		*bep = be;
2032 		return (BACKEND_CREATE_READONLY);
2033 	}
2034 
2035 	*bep = be;
2036 	return (BACKEND_CREATE_SUCCESS);
2037 
2038 fail:
2039 	*bep = NULL;
2040 	backend_destroy(be);
2041 	return (BACKEND_CREATE_FAIL);
2042 }
2043 
2044 /*
2045  * (arg & -arg) is, through the magic of twos-complement arithmetic, the
2046  * lowest set bit in arg.
2047  */
2048 static size_t
2049 round_up_to_p2(size_t arg)
2050 {
2051 	/*
2052 	 * Don't allow a zero result.
2053 	 */
2054 	assert(arg > 0 && ((ssize_t)arg > 0));
2055 
2056 	while ((arg & (arg - 1)) != 0)
2057 		arg += (arg & -arg);
2058 
2059 	return (arg);
2060 }
2061 
2062 /*
2063  * Returns
2064  *   _NO_RESOURCES - out of memory
2065  *   _BACKEND_ACCESS - backend type t (other than _NORMAL) doesn't exist
2066  *   _DONE - callback aborted query
2067  *   _SUCCESS
2068  */
2069 int
2070 backend_run(backend_type_t t, backend_query_t *q,
2071     backend_run_callback_f *cb, void *data)
2072 {
2073 	char *errmsg = NULL;
2074 	int ret;
2075 	sqlite_backend_t *be;
2076 	hrtime_t ts, vts;
2077 
2078 	if (q == NULL || q->bq_buf == NULL)
2079 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2080 
2081 	if ((ret = backend_lock(t, 0, &be)) != REP_PROTOCOL_SUCCESS)
2082 		return (ret);
2083 
2084 	ts = gethrtime();
2085 	vts = gethrvtime();
2086 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
2087 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2088 	ret = backend_error(be, ret, errmsg);
2089 	backend_unlock(be);
2090 
2091 	return (ret);
2092 }
2093 
2094 /*
2095  * Starts a "read-only" transaction -- i.e., locks out writers as long
2096  * as it is active.
2097  *
2098  * Fails with
2099  *   _NO_RESOURCES - out of memory
2100  *
2101  * If t is not _NORMAL, can also fail with
2102  *   _BACKEND_ACCESS - backend does not exist
2103  *
2104  * If writable is true, can also fail with
2105  *   _BACKEND_READONLY
2106  */
2107 static int
2108 backend_tx_begin_common(backend_type_t t, backend_tx_t **txp, int writable)
2109 {
2110 	backend_tx_t *ret;
2111 	sqlite_backend_t *be;
2112 	int r;
2113 
2114 	*txp = NULL;
2115 
2116 	ret = uu_zalloc(sizeof (*ret));
2117 	if (ret == NULL)
2118 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2119 
2120 	if ((r = backend_lock(t, writable, &be)) != REP_PROTOCOL_SUCCESS) {
2121 		uu_free(ret);
2122 		return (r);
2123 	}
2124 
2125 	ret->bt_be = be;
2126 	ret->bt_readonly = !writable;
2127 	ret->bt_type = t;
2128 	ret->bt_full = 0;
2129 
2130 	*txp = ret;
2131 	return (REP_PROTOCOL_SUCCESS);
2132 }
2133 
2134 int
2135 backend_tx_begin_ro(backend_type_t t, backend_tx_t **txp)
2136 {
2137 	return (backend_tx_begin_common(t, txp, 0));
2138 }
2139 
2140 static void
2141 backend_tx_end(backend_tx_t *tx)
2142 {
2143 	sqlite_backend_t *be;
2144 
2145 	be = tx->bt_be;
2146 
2147 	if (tx->bt_full) {
2148 		struct sqlite *new;
2149 
2150 		/*
2151 		 * sqlite tends to be sticky with SQLITE_FULL, so we try
2152 		 * to get a fresh database handle if we got a FULL warning
2153 		 * along the way.  If that fails, no harm done.
2154 		 */
2155 		new = sqlite_open(be->be_path, 0600, NULL);
2156 		if (new != NULL) {
2157 			sqlite_close(be->be_db);
2158 			be->be_db = new;
2159 		}
2160 	}
2161 	backend_unlock(be);
2162 	tx->bt_be = NULL;
2163 	uu_free(tx);
2164 }
2165 
2166 void
2167 backend_tx_end_ro(backend_tx_t *tx)
2168 {
2169 	assert(tx->bt_readonly);
2170 	backend_tx_end(tx);
2171 }
2172 
2173 /*
2174  * Fails with
2175  *   _NO_RESOURCES - out of memory
2176  *   _BACKEND_ACCESS
2177  *   _BACKEND_READONLY
2178  */
2179 int
2180 backend_tx_begin(backend_type_t t, backend_tx_t **txp)
2181 {
2182 	int r;
2183 	char *errmsg;
2184 	hrtime_t ts, vts;
2185 
2186 	r = backend_tx_begin_common(t, txp, 1);
2187 	if (r != REP_PROTOCOL_SUCCESS)
2188 		return (r);
2189 
2190 	ts = gethrtime();
2191 	vts = gethrvtime();
2192 	r = sqlite_exec((*txp)->bt_be->be_db, "BEGIN TRANSACTION", NULL, NULL,
2193 	    &errmsg);
2194 	UPDATE_TOTALS((*txp)->bt_be, bt_exec, ts, vts);
2195 	if (r == SQLITE_FULL)
2196 		(*txp)->bt_full = 1;
2197 	r = backend_error((*txp)->bt_be, r, errmsg);
2198 
2199 	if (r != REP_PROTOCOL_SUCCESS) {
2200 		assert(r != REP_PROTOCOL_DONE);
2201 		(void) sqlite_exec((*txp)->bt_be->be_db,
2202 		    "ROLLBACK TRANSACTION", NULL, NULL, NULL);
2203 		backend_tx_end(*txp);
2204 		*txp = NULL;
2205 		return (r);
2206 	}
2207 
2208 	(*txp)->bt_readonly = 0;
2209 
2210 	return (REP_PROTOCOL_SUCCESS);
2211 }
2212 
2213 void
2214 backend_tx_rollback(backend_tx_t *tx)
2215 {
2216 	int r;
2217 	char *errmsg;
2218 	sqlite_backend_t *be;
2219 	hrtime_t ts, vts;
2220 
2221 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2222 	be = tx->bt_be;
2223 
2224 	ts = gethrtime();
2225 	vts = gethrvtime();
2226 	r = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
2227 	    &errmsg);
2228 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2229 	if (r == SQLITE_FULL)
2230 		tx->bt_full = 1;
2231 	(void) backend_error(be, r, errmsg);
2232 
2233 	backend_tx_end(tx);
2234 }
2235 
2236 /*
2237  * Fails with
2238  *   _NO_RESOURCES - out of memory
2239  */
2240 int
2241 backend_tx_commit(backend_tx_t *tx)
2242 {
2243 	int r, r2;
2244 	char *errmsg;
2245 	sqlite_backend_t *be;
2246 	hrtime_t ts, vts;
2247 
2248 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2249 	be = tx->bt_be;
2250 	ts = gethrtime();
2251 	vts = gethrvtime();
2252 	r = sqlite_exec(be->be_db, "COMMIT TRANSACTION", NULL, NULL,
2253 	    &errmsg);
2254 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2255 	if (r == SQLITE_FULL)
2256 		tx->bt_full = 1;
2257 
2258 	r = backend_error(be, r, errmsg);
2259 	assert(r != REP_PROTOCOL_DONE);
2260 
2261 	if (r != REP_PROTOCOL_SUCCESS) {
2262 		r2 = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
2263 		    &errmsg);
2264 		r2 = backend_error(be, r2, errmsg);
2265 		if (r2 != REP_PROTOCOL_SUCCESS)
2266 			backend_panic("cannot rollback failed commit");
2267 
2268 		backend_tx_end(tx);
2269 		return (r);
2270 	}
2271 	backend_tx_end(tx);
2272 	return (REP_PROTOCOL_SUCCESS);
2273 }
2274 
2275 static const char *
2276 id_space_to_name(enum id_space id)
2277 {
2278 	switch (id) {
2279 	case BACKEND_ID_SERVICE_INSTANCE:
2280 		return ("SI");
2281 	case BACKEND_ID_PROPERTYGRP:
2282 		return ("PG");
2283 	case BACKEND_ID_GENERATION:
2284 		return ("GEN");
2285 	case BACKEND_ID_PROPERTY:
2286 		return ("PROP");
2287 	case BACKEND_ID_VALUE:
2288 		return ("VAL");
2289 	case BACKEND_ID_SNAPNAME:
2290 		return ("SNAME");
2291 	case BACKEND_ID_SNAPSHOT:
2292 		return ("SHOT");
2293 	case BACKEND_ID_SNAPLEVEL:
2294 		return ("SLVL");
2295 	default:
2296 		abort();
2297 		/*NOTREACHED*/
2298 	}
2299 }
2300 
2301 /*
2302  * Returns a new id or 0 if the id argument is invalid or the query fails.
2303  */
2304 uint32_t
2305 backend_new_id(backend_tx_t *tx, enum id_space id)
2306 {
2307 	struct run_single_int_info info;
2308 	uint32_t new_id = 0;
2309 	const char *name = id_space_to_name(id);
2310 	char *errmsg;
2311 	int ret;
2312 	sqlite_backend_t *be;
2313 	hrtime_t ts, vts;
2314 
2315 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2316 	be = tx->bt_be;
2317 
2318 	info.rs_out = &new_id;
2319 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
2320 
2321 	ts = gethrtime();
2322 	vts = gethrvtime();
2323 	ret = sqlite_exec_printf(be->be_db,
2324 	    "SELECT id_next FROM id_tbl WHERE (id_name = '%q');"
2325 	    "UPDATE id_tbl SET id_next = id_next + 1 WHERE (id_name = '%q');",
2326 	    run_single_int_callback, &info, &errmsg, name, name);
2327 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2328 	if (ret == SQLITE_FULL)
2329 		tx->bt_full = 1;
2330 
2331 	ret = backend_error(be, ret, errmsg);
2332 
2333 	if (ret != REP_PROTOCOL_SUCCESS) {
2334 		return (0);
2335 	}
2336 
2337 	return (new_id);
2338 }
2339 
2340 /*
2341  * Returns
2342  *   _NO_RESOURCES - out of memory
2343  *   _DONE - callback aborted query
2344  *   _SUCCESS
2345  */
2346 int
2347 backend_tx_run(backend_tx_t *tx, backend_query_t *q,
2348     backend_run_callback_f *cb, void *data)
2349 {
2350 	char *errmsg = NULL;
2351 	int ret;
2352 	sqlite_backend_t *be;
2353 	hrtime_t ts, vts;
2354 
2355 	assert(tx != NULL && tx->bt_be != NULL);
2356 	be = tx->bt_be;
2357 
2358 	if (q == NULL || q->bq_buf == NULL)
2359 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2360 
2361 	ts = gethrtime();
2362 	vts = gethrvtime();
2363 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
2364 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2365 	if (ret == SQLITE_FULL)
2366 		tx->bt_full = 1;
2367 	ret = backend_error(be, ret, errmsg);
2368 
2369 	return (ret);
2370 }
2371 
2372 /*
2373  * Returns
2374  *   _NO_RESOURCES - out of memory
2375  *   _NOT_FOUND - the query returned no results
2376  *   _SUCCESS - the query returned a single integer
2377  */
2378 int
2379 backend_tx_run_single_int(backend_tx_t *tx, backend_query_t *q, uint32_t *buf)
2380 {
2381 	struct run_single_int_info info;
2382 	int ret;
2383 
2384 	info.rs_out = buf;
2385 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
2386 
2387 	ret = backend_tx_run(tx, q, run_single_int_callback, &info);
2388 	assert(ret != REP_PROTOCOL_DONE);
2389 
2390 	if (ret != REP_PROTOCOL_SUCCESS)
2391 		return (ret);
2392 
2393 	return (info.rs_result);
2394 }
2395 
2396 /*
2397  * Fails with
2398  *   _NO_RESOURCES - out of memory
2399  */
2400 int
2401 backend_tx_run_update(backend_tx_t *tx, const char *format, ...)
2402 {
2403 	va_list a;
2404 	char *errmsg;
2405 	int ret;
2406 	sqlite_backend_t *be;
2407 	hrtime_t ts, vts;
2408 
2409 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2410 	be = tx->bt_be;
2411 
2412 	va_start(a, format);
2413 	ts = gethrtime();
2414 	vts = gethrvtime();
2415 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2416 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2417 	if (ret == SQLITE_FULL)
2418 		tx->bt_full = 1;
2419 	va_end(a);
2420 	ret = backend_error(be, ret, errmsg);
2421 	assert(ret != REP_PROTOCOL_DONE);
2422 
2423 	return (ret);
2424 }
2425 
2426 /*
2427  * returns REP_PROTOCOL_FAIL_NOT_FOUND if no changes occured
2428  */
2429 int
2430 backend_tx_run_update_changed(backend_tx_t *tx, const char *format, ...)
2431 {
2432 	va_list a;
2433 	char *errmsg;
2434 	int ret;
2435 	sqlite_backend_t *be;
2436 	hrtime_t ts, vts;
2437 
2438 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2439 	be = tx->bt_be;
2440 
2441 	va_start(a, format);
2442 	ts = gethrtime();
2443 	vts = gethrvtime();
2444 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2445 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2446 	if (ret == SQLITE_FULL)
2447 		tx->bt_full = 1;
2448 	va_end(a);
2449 
2450 	ret = backend_error(be, ret, errmsg);
2451 
2452 	return (ret);
2453 }
2454 
2455 #define	BACKEND_ADD_SCHEMA(be, file, tbls, idxs) \
2456 	(backend_add_schema((be), (file), \
2457 	    (tbls), sizeof (tbls) / sizeof (*(tbls)), \
2458 	    (idxs), sizeof (idxs) / sizeof (*(idxs))))
2459 
2460 static int
2461 backend_add_schema(sqlite_backend_t *be, const char *file,
2462     struct backend_tbl_info *tbls, int tbl_count,
2463     struct backend_idx_info *idxs, int idx_count)
2464 {
2465 	int i;
2466 	char *errmsg;
2467 	int ret;
2468 
2469 	/*
2470 	 * Create the tables.
2471 	 */
2472 	for (i = 0; i < tbl_count; i++) {
2473 		if (tbls[i].bti_name == NULL) {
2474 			assert(i + 1 == tbl_count);
2475 			break;
2476 		}
2477 		ret = sqlite_exec_printf(be->be_db,
2478 		    "CREATE TABLE %s (%s);\n",
2479 		    NULL, NULL, &errmsg, tbls[i].bti_name, tbls[i].bti_cols);
2480 
2481 		if (ret != SQLITE_OK) {
2482 			configd_critical(
2483 			    "%s: %s table creation fails: %s\n", file,
2484 			    tbls[i].bti_name, errmsg);
2485 			free(errmsg);
2486 			return (-1);
2487 		}
2488 	}
2489 
2490 	/*
2491 	 * Make indices on key tables and columns.
2492 	 */
2493 	for (i = 0; i < idx_count; i++) {
2494 		if (idxs[i].bxi_tbl == NULL) {
2495 			assert(i + 1 == idx_count);
2496 			break;
2497 		}
2498 
2499 		ret = sqlite_exec_printf(be->be_db,
2500 		    "CREATE INDEX %s_%s ON %s (%s);\n",
2501 		    NULL, NULL, &errmsg, idxs[i].bxi_tbl, idxs[i].bxi_idx,
2502 		    idxs[i].bxi_tbl, idxs[i].bxi_cols);
2503 
2504 		if (ret != SQLITE_OK) {
2505 			configd_critical(
2506 			    "%s: %s_%s index creation fails: %s\n", file,
2507 			    idxs[i].bxi_tbl, idxs[i].bxi_idx, errmsg);
2508 			free(errmsg);
2509 			return (-1);
2510 		}
2511 	}
2512 	return (0);
2513 }
2514 
2515 static int
2516 backend_init_schema(sqlite_backend_t *be, const char *db_file, backend_type_t t)
2517 {
2518 	int i;
2519 	char *errmsg;
2520 	int ret;
2521 
2522 	assert(t == BACKEND_TYPE_NORMAL || t == BACKEND_TYPE_NONPERSIST);
2523 
2524 	if (t == BACKEND_TYPE_NORMAL) {
2525 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_normal, idxs_normal);
2526 	} else if (t == BACKEND_TYPE_NONPERSIST) {
2527 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_np, idxs_np);
2528 	} else {
2529 		abort();		/* can't happen */
2530 	}
2531 
2532 	if (ret < 0) {
2533 		return (ret);
2534 	}
2535 
2536 	ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_common, idxs_common);
2537 	if (ret < 0) {
2538 		return (ret);
2539 	}
2540 
2541 	/*
2542 	 * Add the schema version to the table
2543 	 */
2544 	ret = sqlite_exec_printf(be->be_db,
2545 	    "INSERT INTO schema_version (schema_version) VALUES (%d)",
2546 	    NULL, NULL, &errmsg, BACKEND_SCHEMA_VERSION);
2547 	if (ret != SQLITE_OK) {
2548 		configd_critical(
2549 		    "setting schema version fails: %s\n", errmsg);
2550 		free(errmsg);
2551 	}
2552 
2553 	/*
2554 	 * Populate id_tbl with initial IDs.
2555 	 */
2556 	for (i = 0; i < BACKEND_ID_INVALID; i++) {
2557 		const char *name = id_space_to_name(i);
2558 
2559 		ret = sqlite_exec_printf(be->be_db,
2560 		    "INSERT INTO id_tbl (id_name, id_next) "
2561 		    "VALUES ('%q', %d);", NULL, NULL, &errmsg, name, 1);
2562 		if (ret != SQLITE_OK) {
2563 			configd_critical(
2564 			    "id insertion for %s fails: %s\n", name, errmsg);
2565 			free(errmsg);
2566 			return (-1);
2567 		}
2568 	}
2569 	/*
2570 	 * Set the persistance of the database.  The normal database is marked
2571 	 * "synchronous", so that all writes are synchronized to stable storage
2572 	 * before proceeding.
2573 	 */
2574 	ret = sqlite_exec_printf(be->be_db,
2575 	    "PRAGMA default_synchronous = %s; PRAGMA synchronous = %s;",
2576 	    NULL, NULL, &errmsg,
2577 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF",
2578 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF");
2579 	if (ret != SQLITE_OK) {
2580 		configd_critical("pragma setting fails: %s\n", errmsg);
2581 		free(errmsg);
2582 		return (-1);
2583 	}
2584 
2585 	return (0);
2586 }
2587 
2588 int
2589 backend_init(const char *db_file, const char *npdb_file, int have_np)
2590 {
2591 	sqlite_backend_t *be;
2592 	char *errp;
2593 	struct sqlite *fast_db;
2594 	int r;
2595 	backend_switch_results_t switch_result = BACKEND_SWITCH_OK;
2596 	int writable_persist = 1;
2597 
2598 	/* set up our temporary directory */
2599 	sqlite_temp_directory = "/etc/svc/volatile";
2600 
2601 	if (strcmp(SQLITE_VERSION, sqlite_version) != 0) {
2602 		configd_critical("Mismatched link!  (%s should be %s)\n",
2603 		    sqlite_version, SQLITE_VERSION);
2604 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2605 	}
2606 
2607 	if (db_file == NULL)
2608 		db_file = REPOSITORY_DB;
2609 	if (strcmp(db_file, REPOSITORY_DB) != 0) {
2610 		is_main_repository = 0;
2611 	}
2612 
2613 	/*
2614 	 * If the svc.configd crashed, there might be a leftover transient
2615 	 * database at FAST_REPOSITORY_DB,which contains useful
2616 	 * information.  Both early manifest import and late manifest
2617 	 * import use svcadm to copy the repository to FAST_REPOSITORY_DB.
2618 	 * One reason for doing this is that it improves the performance of
2619 	 * manifest import.  The other reason is that the repository may be
2620 	 * on read-only root in the case of early manifest import.
2621 	 *
2622 	 * If FAST_REPOSITORY_DB exists, it is an indication that
2623 	 * svc.configd has been restarted for some reason.  Since we have
2624 	 * no way of knowing where we are in the boot process, the safe
2625 	 * thing to do is to move the repository back to it's non-transient
2626 	 * location, REPOSITORY_DB.  This may slow manifest import
2627 	 * performance, but it avoids the problem of missing the command to
2628 	 * move the repository to permanent storage.
2629 	 *
2630 	 * There is a caveat, though.  If root is read-only, we'll need to
2631 	 * leave the repository at FAST_REPOSITORY_DB.  If root is
2632 	 * read-only, late manifest import has not yet run, so it will move
2633 	 * the repository back to permanent storage when it runs.
2634 	 */
2635 	if (is_main_repository)
2636 		switch_result = backend_switch_recovery();
2637 
2638 	r = backend_create(BACKEND_TYPE_NORMAL, db_file, &be);
2639 	switch (r) {
2640 	case BACKEND_CREATE_FAIL:
2641 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2642 	case BACKEND_CREATE_LOCKED:
2643 		return (CONFIGD_EXIT_DATABASE_LOCKED);
2644 	case BACKEND_CREATE_SUCCESS:
2645 		break;		/* success */
2646 	case BACKEND_CREATE_READONLY:
2647 		writable_persist = 0;
2648 		break;
2649 	case BACKEND_CREATE_NEED_INIT:
2650 		if (backend_init_schema(be, db_file, BACKEND_TYPE_NORMAL)) {
2651 			backend_destroy(be);
2652 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2653 		}
2654 		break;
2655 	default:
2656 		abort();
2657 		/*NOTREACHED*/
2658 	}
2659 	backend_create_finish(BACKEND_TYPE_NORMAL, be);
2660 	flight_recorder_event(BE_FLIGHT_EV_REPO_CREATE,
2661 	    writable_persist == 1 ? BE_FLIGHT_ST_RW : BE_FLIGHT_ST_RO);
2662 	/*
2663 	 * If there was a transient repository that could not be copied
2664 	 * back because the root file system was read-only, switch over to
2665 	 * using the transient repository.
2666 	 */
2667 	if (switch_result == BACKEND_SWITCH_RO) {
2668 		char *db_name_copy = NULL;
2669 
2670 		fast_db = sqlite_open(FAST_REPOSITORY_DB, 0600, &errp);
2671 		if (fast_db == NULL) {
2672 			/* Can't open fast repository.  Stick with permanent. */
2673 			configd_critical("Cannot open \"%s\".  %s\n",
2674 			    FAST_REPOSITORY_DB, errp == NULL ? "" : errp);
2675 			free(errp);
2676 		} else {
2677 			db_name_copy = strdup(FAST_REPOSITORY_DB);
2678 			if (db_name_copy == NULL) {
2679 				configd_critical("backend_init: out of "
2680 				    "memory.\n");
2681 				sqlite_close(fast_db);
2682 				return (CONFIGD_EXIT_INIT_FAILED);
2683 			} else {
2684 				flight_recorder_event(
2685 				    BE_FLIGHT_EV_LINGERING_FAST,
2686 				    BE_FLIGHT_ST_RO);
2687 				sqlite_close(be->be_db);
2688 				be->be_db = fast_db;
2689 				be->be_ppath = be->be_path;
2690 				be->be_path = db_name_copy;
2691 			}
2692 		}
2693 	}
2694 
2695 	if (have_np) {
2696 		if (npdb_file == NULL)
2697 			npdb_file = NONPERSIST_DB;
2698 
2699 		r = backend_create(BACKEND_TYPE_NONPERSIST, npdb_file, &be);
2700 		switch (r) {
2701 		case BACKEND_CREATE_SUCCESS:
2702 			break;		/* success */
2703 		case BACKEND_CREATE_FAIL:
2704 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2705 		case BACKEND_CREATE_LOCKED:
2706 			return (CONFIGD_EXIT_DATABASE_LOCKED);
2707 		case BACKEND_CREATE_READONLY:
2708 			configd_critical("%s: unable to write\n", npdb_file);
2709 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2710 		case BACKEND_CREATE_NEED_INIT:
2711 			if (backend_init_schema(be, db_file,
2712 			    BACKEND_TYPE_NONPERSIST)) {
2713 				backend_destroy(be);
2714 				return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2715 			}
2716 			break;
2717 		default:
2718 			abort();
2719 			/*NOTREACHED*/
2720 		}
2721 		backend_create_finish(BACKEND_TYPE_NONPERSIST, be);
2722 
2723 		if (r != BACKEND_CREATE_NEED_INIT) {
2724 			flight_recorder_event(BE_FLIGHT_EV_RESTART,
2725 			    BE_FLIGHT_ST_INFO);
2726 		}
2727 
2728 		/*
2729 		 * If we started up with a writable filesystem, but the
2730 		 * non-persistent database needed initialization, we are
2731 		 * booting a non-global zone or a system with a writable
2732 		 * root (ZFS), so do a backup.  Checking to see if the
2733 		 * non-persistent database needed initialization also keeps
2734 		 * us from making additional backups if configd gets
2735 		 * restarted.
2736 		 */
2737 		if (r == BACKEND_CREATE_NEED_INIT && writable_persist &&
2738 		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2739 		    REP_PROTOCOL_SUCCESS) {
2740 			if (backend_create_backup_locked(be,
2741 			    REPOSITORY_BOOT_BACKUP) != REP_PROTOCOL_SUCCESS) {
2742 				configd_critical(
2743 				    "unable to create \"%s\" backup of "
2744 				    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2745 				    be->be_path);
2746 			}
2747 			backend_unlock(be);
2748 		}
2749 
2750 		/*
2751 		 * On the other hand if we started with a read-only file
2752 		 * system and the non-persistent database needed
2753 		 * initialization, then we need to take a checkpoint of the
2754 		 * repository.  We grab the checkpoint now before Early
2755 		 * Manifest Import starts modifying the repository.  Then
2756 		 * when the file system becomes writable, the checkpoint
2757 		 * can be used to create the boot time backup of the
2758 		 * repository.  Checking that the non-persistent database
2759 		 * needed initialization, keeps us from making additional
2760 		 * checkpoints if configd gets restarted.
2761 		 */
2762 		if (r == BACKEND_CREATE_NEED_INIT && writable_persist == 0 &&
2763 		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2764 		    REP_PROTOCOL_SUCCESS) {
2765 			r = backend_checkpoint_repository(be);
2766 			if (r != REP_PROTOCOL_SUCCESS) {
2767 				configd_critical("unable to create checkpoint "
2768 				    "of \"%s\"\n", be->be_path);
2769 			}
2770 			backend_unlock(be);
2771 		}
2772 
2773 		/*
2774 		 * If the non-persistent database did not need
2775 		 * initialization, svc.configd has been restarted.  See if
2776 		 * the boot time checkpoint exists.  If it does, use it to
2777 		 * make a backup if root is writable.
2778 		 */
2779 		if (r != BACKEND_CREATE_NEED_INIT &&
2780 		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2781 		    REP_PROTOCOL_SUCCESS) {
2782 			struct stat sb;
2783 
2784 			if ((stat(REPOSITORY_CHECKPOINT, &sb) == 0) &&
2785 			    (sb.st_size > 0) && (sb.st_mode & S_IFREG)) {
2786 				be->be_checkpoint = REPOSITORY_CHECKPOINT;
2787 				flight_recorder_event(
2788 				    BE_FLIGHT_EV_CHECKPOINT_EXISTS,
2789 				    BE_FLIGHT_ST_INFO);
2790 			}
2791 
2792 			/*
2793 			 * If we have a checkpoint and root is writable,
2794 			 * make the backup now.
2795 			 */
2796 			if (be->be_checkpoint && writable_persist) {
2797 				if (backend_create_backup_locked(be,
2798 				    REPOSITORY_BOOT_BACKUP) !=
2799 				    REP_PROTOCOL_SUCCESS) {
2800 					configd_critical(
2801 					    "unable to create \"%s\" backup of "
2802 					    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2803 					    be->be_path);
2804 				}
2805 			}
2806 			backend_unlock(be);
2807 		}
2808 	}
2809 
2810 	/*
2811 	 * If the persistent backend is writable at this point, upgrade it.
2812 	 * This can occur in a few cases, most notably on UFS roots if
2813 	 * we are operating on the backend from another root, as is the case
2814 	 * during alternate-root BFU.
2815 	 *
2816 	 * Otherwise, upgrade will occur via backend_check_readonly() when
2817 	 * the repository is re-opened read-write.
2818 	 */
2819 	if (writable_persist) {
2820 		r = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
2821 		assert(r == REP_PROTOCOL_SUCCESS);
2822 		backend_check_upgrade(be, B_TRUE);
2823 		backend_unlock(be);
2824 	}
2825 
2826 	return (CONFIGD_EXIT_OKAY);
2827 }
2828 
2829 /*
2830  * quiesce all database activity prior to exiting
2831  */
2832 void
2833 backend_fini(void)
2834 {
2835 	sqlite_backend_t *be_normal, *be_np;
2836 
2837 	(void) backend_lock(BACKEND_TYPE_NORMAL, 1, &be_normal);
2838 	(void) backend_lock(BACKEND_TYPE_NONPERSIST, 1, &be_np);
2839 }
2840 
2841 #define	QUERY_BASE	128
2842 backend_query_t *
2843 backend_query_alloc(void)
2844 {
2845 	backend_query_t *q;
2846 	q = calloc(1, sizeof (backend_query_t));
2847 	if (q != NULL) {
2848 		q->bq_size = QUERY_BASE;
2849 		q->bq_buf = calloc(1, q->bq_size);
2850 		if (q->bq_buf == NULL) {
2851 			q->bq_size = 0;
2852 		}
2853 
2854 	}
2855 	return (q);
2856 }
2857 
2858 void
2859 backend_query_append(backend_query_t *q, const char *value)
2860 {
2861 	char *alloc;
2862 	int count;
2863 	size_t size, old_len;
2864 
2865 	if (q == NULL) {
2866 		/* We'll discover the error when we try to run the query. */
2867 		return;
2868 	}
2869 
2870 	while (q->bq_buf != NULL) {
2871 		old_len = strlen(q->bq_buf);
2872 		size = q->bq_size;
2873 		count = strlcat(q->bq_buf, value, size);
2874 
2875 		if (count < size)
2876 			break;				/* success */
2877 
2878 		q->bq_buf[old_len] = 0;
2879 		size = round_up_to_p2(count + 1);
2880 
2881 		assert(size > q->bq_size);
2882 		alloc = realloc(q->bq_buf, size);
2883 		if (alloc == NULL) {
2884 			free(q->bq_buf);
2885 			q->bq_buf = NULL;
2886 			break;				/* can't grow */
2887 		}
2888 
2889 		q->bq_buf = alloc;
2890 		q->bq_size = size;
2891 	}
2892 }
2893 
2894 void
2895 backend_query_add(backend_query_t *q, const char *format, ...)
2896 {
2897 	va_list args;
2898 	char *new;
2899 
2900 	if (q == NULL || q->bq_buf == NULL)
2901 		return;
2902 
2903 	va_start(args, format);
2904 	new = sqlite_vmprintf(format, args);
2905 	va_end(args);
2906 
2907 	if (new == NULL) {
2908 		free(q->bq_buf);
2909 		q->bq_buf = NULL;
2910 		return;
2911 	}
2912 
2913 	backend_query_append(q, new);
2914 
2915 	free(new);
2916 }
2917 
2918 void
2919 backend_query_free(backend_query_t *q)
2920 {
2921 	if (q != NULL) {
2922 		if (q->bq_buf != NULL) {
2923 			free(q->bq_buf);
2924 		}
2925 		free(q);
2926 	}
2927 }
2928