xref: /titanic_50/usr/src/uts/common/os/retire_store.c (revision aab83bb83be7342f6cfccaed8d5fe0b2f404855d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/ddi.h>
27 #include <sys/sunddi.h>
28 #include <sys/sunndi.h>
29 #include <sys/ddi_impldefs.h>
30 #include <sys/ddi_implfuncs.h>
31 #include <sys/list.h>
32 #include <sys/reboot.h>
33 #include <sys/sysmacros.h>
34 #include <sys/console.h>
35 #include <sys/devcache.h>
36 
37 /*
38  * The nvpair name in the I/O retire specific sub-nvlist
39  */
40 #define	RIO_STORE_VERSION_STR	"rio-store-version"
41 #define	RIO_STORE_MAGIC_STR	"rio-store-magic"
42 #define	RIO_STORE_FLAGS_STR	"rio-store-flags"
43 
44 #define	RIO_STORE_VERSION_1	1
45 #define	RIO_STORE_VERSION	RIO_STORE_VERSION_1
46 
47 /*
48  * decoded retire list element
49  */
50 
51 typedef enum rio_store_flags {
52 	RIO_STORE_F_INVAL = 0,
53 	RIO_STORE_F_RETIRED = 1,
54 	RIO_STORE_F_BYPASS = 2
55 } rio_store_flags_t;
56 
57 typedef struct rio_store {
58 	char			*rst_devpath;
59 	rio_store_flags_t	rst_flags;
60 	list_node_t		rst_next;
61 } rio_store_t;
62 
63 #define	RIO_STORE_MAGIC		0x601fcace	/* retire */
64 
65 static int rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name);
66 static int rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl);
67 static void retire_list_free(nvf_handle_t  nvfh);
68 
69 
70 /*
71  * Retire I/O persistent store registration info
72  */
73 static nvf_ops_t rio_store_ops = {
74 	"/etc/devices/retire_store",	/* path to store */
75 	rio_store_decode,		/* decode nvlist into retire_list */
76 	rio_store_encode,		/* encode retire_list into nvlist */
77 	retire_list_free,		/* free retire_list */
78 	NULL				/* write complete callback */
79 };
80 
81 static nvf_handle_t	rio_store_handle;
82 static char		store_path[MAXPATHLEN];
83 static int		store_debug = 0;
84 static int		bypass_msg = 0;
85 static int		retire_msg = 0;
86 
87 #define	STORE_DEBUG	0x0001
88 #define	STORE_TRACE	0x0002
89 
90 #define	STORE_DBG(args)		if (store_debug & STORE_DEBUG)	cmn_err args
91 #define	STORE_TRC(args)		if (store_debug & STORE_TRACE)	cmn_err args
92 
93 /*
94  * We don't use the simple read disable offered by the
95  * caching framework (see devcache.c) as it will not
96  * have the desired effect of bypassing the persistent
97  * store. A simple read disable will
98  *
99  *	1. cause any additions to the cache to destroy the
100  *	   existing on-disk cache
101  *
102  *	2. prevent deletions from the existing on-disk
103  *	   cache which is needed for recovery from bad
104  *	   retire decisions.
105  *
106  * Use the following tunable instead
107  *
108  */
109 int	ddi_retire_store_bypass = 0;
110 
111 
112 
113 /*
114  * Initialize retire store data structures
115  */
116 void
retire_store_init(void)117 retire_store_init(void)
118 {
119 	if (boothowto & RB_ASKNAME) {
120 
121 		printf("Retire store [%s] (/dev/null to bypass): ",
122 		    rio_store_ops.nvfr_cache_path);
123 		console_gets(store_path, sizeof (store_path) - 1);
124 		store_path[sizeof (store_path) - 1] = '\0';
125 
126 		if (strcmp(store_path, "/dev/null") == 0) {
127 			ddi_retire_store_bypass = 1;
128 		} else if (store_path[0] != '\0') {
129 			if (store_path[0] != '/') {
130 				printf("Invalid store path: %s. Using default"
131 				    "\n", store_path);
132 			} else {
133 				rio_store_ops.nvfr_cache_path = store_path;
134 			}
135 		}
136 	}
137 
138 	rio_store_handle = nvf_register_file(&rio_store_ops);
139 
140 	list_create(nvf_list(rio_store_handle), sizeof (rio_store_t),
141 	    offsetof(rio_store_t, rst_next));
142 }
143 
144 /*
145  * Read and populate the in-core retire store
146  */
147 void
retire_store_read(void)148 retire_store_read(void)
149 {
150 	rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
151 	ASSERT(list_head(nvf_list(rio_store_handle)) == NULL);
152 	(void) nvf_read_file(rio_store_handle);
153 	rw_exit(nvf_lock(rio_store_handle));
154 	STORE_DBG((CE_NOTE, "Read on-disk retire store"));
155 }
156 
157 static void
rio_store_free(rio_store_t * rsp)158 rio_store_free(rio_store_t *rsp)
159 {
160 	int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
161 
162 	ASSERT(rsp);
163 	ASSERT(rsp->rst_devpath);
164 	ASSERT(rsp->rst_flags & RIO_STORE_F_RETIRED);
165 	ASSERT(!(rsp->rst_flags & ~flag_mask));
166 
167 	STORE_TRC((CE_NOTE, "store: freed path: %s", rsp->rst_devpath));
168 
169 	kmem_free(rsp->rst_devpath, strlen(rsp->rst_devpath) + 1);
170 	kmem_free(rsp, sizeof (*rsp));
171 }
172 
173 static void
retire_list_free(nvf_handle_t nvfh)174 retire_list_free(nvf_handle_t  nvfh)
175 {
176 	list_t		*listp;
177 	rio_store_t	*rsp;
178 
179 	ASSERT(nvfh == rio_store_handle);
180 	ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
181 
182 	listp = nvf_list(nvfh);
183 	while (rsp = list_head(listp)) {
184 		list_remove(listp, rsp);
185 		rio_store_free(rsp);
186 	}
187 
188 	STORE_DBG((CE_NOTE, "store: freed retire list"));
189 }
190 
191 static int
rio_store_decode(nvf_handle_t nvfh,nvlist_t * line_nvl,char * name)192 rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name)
193 {
194 	rio_store_t	*rsp;
195 	int32_t		version;
196 	int32_t		magic;
197 	int32_t		flags;
198 	int		rval;
199 
200 	ASSERT(nvfh == rio_store_handle);
201 	ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
202 	ASSERT(name);
203 
204 	version = 0;
205 	rval = nvlist_lookup_int32(line_nvl, RIO_STORE_VERSION_STR, &version);
206 	if (rval != 0 || version != RIO_STORE_VERSION) {
207 		return (EINVAL);
208 	}
209 
210 	magic = 0;
211 	rval = nvlist_lookup_int32(line_nvl, RIO_STORE_MAGIC_STR, &magic);
212 	if (rval != 0 || magic != RIO_STORE_MAGIC) {
213 		return (EINVAL);
214 	}
215 
216 	flags = 0;
217 	rval = nvlist_lookup_int32(line_nvl, RIO_STORE_FLAGS_STR, &flags);
218 	if (rval != 0 || flags != RIO_STORE_F_RETIRED) {
219 		return (EINVAL);
220 	}
221 
222 	if (ddi_retire_store_bypass) {
223 		flags |= RIO_STORE_F_BYPASS;
224 		if (!bypass_msg) {
225 			bypass_msg = 1;
226 			cmn_err(CE_WARN,
227 			    "Bypassing retire store /etc/devices/retire_store");
228 		}
229 	}
230 
231 	rsp = kmem_zalloc(sizeof (rio_store_t), KM_SLEEP);
232 	rsp->rst_devpath = i_ddi_strdup(name, KM_SLEEP);
233 	rsp->rst_flags = flags;
234 	list_insert_tail(nvf_list(nvfh), rsp);
235 
236 	STORE_TRC((CE_NOTE, "store: added to retire list: %s", name));
237 	if (!retire_msg) {
238 		retire_msg = 1;
239 		cmn_err(CE_NOTE, "One or more I/O devices have been retired");
240 	}
241 
242 	return (0);
243 }
244 
245 static int
rio_store_encode(nvf_handle_t nvfh,nvlist_t ** ret_nvl)246 rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl)
247 {
248 	nvlist_t	*nvl;
249 	nvlist_t	*line_nvl;
250 	list_t		*listp;
251 	rio_store_t	*rsp;
252 	int		rval;
253 
254 	ASSERT(nvfh == rio_store_handle);
255 	ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
256 
257 	*ret_nvl = NULL;
258 
259 	nvl = NULL;
260 	rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
261 	if (rval != 0) {
262 		return (DDI_FAILURE);
263 	}
264 
265 	listp = nvf_list(nvfh);
266 	for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
267 		int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
268 		int flags;
269 		ASSERT(rsp->rst_devpath);
270 		ASSERT(!(rsp->rst_flags & ~flag_mask));
271 
272 		line_nvl = NULL;
273 		rval = nvlist_alloc(&line_nvl, NV_UNIQUE_NAME, KM_SLEEP);
274 		if (rval != 0) {
275 			line_nvl = NULL;
276 			goto error;
277 		}
278 
279 		rval = nvlist_add_int32(line_nvl, RIO_STORE_VERSION_STR,
280 			RIO_STORE_VERSION);
281 		if (rval != 0) {
282 			goto error;
283 		}
284 		rval = nvlist_add_int32(line_nvl, RIO_STORE_MAGIC_STR,
285 			RIO_STORE_MAGIC);
286 		if (rval != 0) {
287 			goto error;
288 		}
289 
290 		/* don't save the bypass flag */
291 		flags = RIO_STORE_F_RETIRED;
292 		rval = nvlist_add_int32(line_nvl, RIO_STORE_FLAGS_STR,
293 			flags);
294 		if (rval != 0) {
295 			goto error;
296 		}
297 
298 		rval = nvlist_add_nvlist(nvl, rsp->rst_devpath, line_nvl);
299 		if (rval != 0) {
300 			goto error;
301 		}
302 		nvlist_free(line_nvl);
303 		line_nvl = NULL;
304 	}
305 
306 	*ret_nvl = nvl;
307 	STORE_DBG((CE_NOTE, "packed retire list into nvlist"));
308 	return (DDI_SUCCESS);
309 
310 error:
311 	nvlist_free(line_nvl);
312 	ASSERT(nvl);
313 	nvlist_free(nvl);
314 	return (DDI_FAILURE);
315 }
316 
317 int
e_ddi_retire_persist(char * devpath)318 e_ddi_retire_persist(char *devpath)
319 {
320 	rio_store_t	*rsp;
321 	rio_store_t	*new_rsp;
322 	list_t		*listp;
323 	char		*new_path;
324 
325 	STORE_DBG((CE_NOTE, "e_ddi_retire_persist: entered: %s", devpath));
326 
327 	new_rsp = kmem_zalloc(sizeof (*new_rsp), KM_SLEEP);
328 	new_rsp->rst_devpath = new_path = i_ddi_strdup(devpath, KM_SLEEP);
329 	new_rsp->rst_flags = RIO_STORE_F_RETIRED;
330 
331 	rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
332 
333 	listp = nvf_list(rio_store_handle);
334 	for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
335 		int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
336 		ASSERT(!(rsp->rst_flags & ~flag_mask));
337 
338 		/* already there */
339 		if (strcmp(devpath, rsp->rst_devpath) == 0) {
340 			/* explicit retire, clear bypass flag (if any) */
341 			rsp->rst_flags &= ~RIO_STORE_F_BYPASS;
342 			ASSERT(rsp->rst_flags == RIO_STORE_F_RETIRED);
343 			rw_exit(nvf_lock(rio_store_handle));
344 			kmem_free(new_path, strlen(new_path) + 1);
345 			kmem_free(new_rsp, sizeof (*new_rsp));
346 			STORE_DBG((CE_NOTE, "store: already in. Clear bypass "
347 			    ": %s", devpath));
348 			return (0);
349 		}
350 
351 	}
352 
353 	ASSERT(rsp == NULL);
354 	list_insert_tail(listp, new_rsp);
355 
356 	nvf_mark_dirty(rio_store_handle);
357 
358 	rw_exit(nvf_lock(rio_store_handle));
359 
360 	nvf_wake_daemon();
361 
362 	STORE_DBG((CE_NOTE, "store: New, added to list, dirty: %s", devpath));
363 
364 	return (0);
365 }
366 
367 int
e_ddi_retire_unpersist(char * devpath)368 e_ddi_retire_unpersist(char *devpath)
369 {
370 	rio_store_t	*rsp;
371 	rio_store_t	*next;
372 	list_t		*listp;
373 	int		is_dirty = 0;
374 
375 	STORE_DBG((CE_NOTE, "e_ddi_retire_unpersist: entered: %s", devpath));
376 
377 	rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
378 
379 	listp = nvf_list(rio_store_handle);
380 	for (rsp = list_head(listp); rsp; rsp = next) {
381 		next = list_next(listp, rsp);
382 		if (strcmp(devpath, rsp->rst_devpath) != 0)
383 			continue;
384 
385 		list_remove(listp, rsp);
386 		rio_store_free(rsp);
387 
388 		STORE_DBG((CE_NOTE, "store: found in list. Freed: %s",
389 		    devpath));
390 
391 		nvf_mark_dirty(rio_store_handle);
392 		is_dirty = 1;
393 	}
394 
395 	rw_exit(nvf_lock(rio_store_handle));
396 
397 	if (is_dirty)
398 		nvf_wake_daemon();
399 
400 	return (is_dirty);
401 }
402 
403 int
e_ddi_device_retired(char * devpath)404 e_ddi_device_retired(char *devpath)
405 {
406 	list_t		*listp;
407 	rio_store_t	*rsp;
408 	size_t		len;
409 	int		retired;
410 
411 	retired = 0;
412 
413 	rw_enter(nvf_lock(rio_store_handle), RW_READER);
414 
415 	listp = nvf_list(rio_store_handle);
416 	for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
417 		int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
418 		ASSERT(!(rsp->rst_flags & ~flag_mask));
419 
420 		/*
421 		 * If the "bypass" flag is set, then the device
422 		 * is *not* retired for the current boot of the
423 		 * system. It indicates that the retire store
424 		 * was read but the devices in the retire store
425 		 * were not retired i.e. effectively the store
426 		 * was bypassed. For why we bother to even read
427 		 * the store when we bypass it, see the comments
428 		 * for the tunable ddi_retire_store_bypass.
429 		 */
430 		if (rsp->rst_flags & RIO_STORE_F_BYPASS) {
431 			STORE_TRC((CE_NOTE, "store: found & bypassed: %s",
432 			    rsp->rst_devpath));
433 			continue;
434 		}
435 
436 		/*
437 		 * device is retired, if it or a parent exists
438 		 * in the in-core list
439 		 */
440 		len = strlen(rsp->rst_devpath);
441 		if (strncmp(devpath, rsp->rst_devpath, len) != 0)
442 			continue;
443 		if (devpath[len] == '\0' || devpath[len] == '/') {
444 			/* exact match or a child */
445 			retired = 1;
446 			STORE_TRC((CE_NOTE, "store: found & !bypassed: %s",
447 			    devpath));
448 			break;
449 		}
450 	}
451 	rw_exit(nvf_lock(rio_store_handle));
452 
453 	return (retired);
454 }
455