xref: /titanic_44/usr/src/uts/common/avs/ns/sdbc/sd_pcu.c (revision fcf3ce441efd61da9bb2884968af01cb7c1452cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/ksynch.h>
28 #include <sys/cmn_err.h>
29 #include <sys/errno.h>
30 #include <sys/kmem.h>
31 #include <sys/cred.h>
32 #include <sys/buf.h>
33 #include <sys/ddi.h>
34 #include <sys/nsc_thread.h>
35 
36 
37 #include "sd_bcache.h"
38 #include "sd_trace.h"
39 #include "sd_io.h"
40 #include "sd_bio.h"
41 #include "sd_ft.h"
42 #include "sd_misc.h"
43 #include "sd_pcu.h"
44 
45 /*
46  * PCU (aka UPS) handling -
47  */
48 #define	bitmap_next cc_dirty_link
49 #define	bitmap_tail cc_dirty_next
50 
51 #define	anon_next cc_dirty_link
52 #define	anon_tail cc_dirty_next
53 #define	anon_data cc_data
54 
55 struct bitmap {
56 	_sd_cctl_t *bmps;
57 	int bmaps_per_block;
58 	int inuse;			/* In use in the _last_ block */
59 };
60 
61 #define	SDBC_PCU_MAXSWAPIL  3		/* Watch for 5 fields in ioctl arg. */
62 
63 struct swapfiles {
64 	int nswpf;			/* Number of filenames */
65 	int colsize;			/* In cache blocks */
66 	char *names[SDBC_PCU_MAXSWAPIL];
67 };
68 
69 static void _sdbc_pcu_cleanup(struct swapfiles *);
70 
71 /*
72  * Forward declare functions containing 64-bit argument types to enforce
73  * type-checking.
74  */
75 static int add_bitmap_entry(struct bitmap *bmp, _sd_bitmap_t bits, int any_fail,
76     nsc_off_t fba_num);
77 static int flush_bitmap_list(struct bitmap *bmp, dev_t dev, nsc_off_t *blkno);
78 static int flush_centry_list(_sd_cd_info_t *cdi, _sd_cctl_t *dirty, dev_t dev,
79     nsc_off_t *blkno, int failed, struct bitmap *bmaps);
80 static int flush_hdr(_sd_cctl_t *hdr, dev_t dev, nsc_off_t blkno);
81 static int flush_anon_list(_sd_cctl_t *anon_list, dev_t dev, nsc_off_t *blkno);
82 static void sdbc_anon_copy(caddr_t src, nsc_size_t len, _sd_cctl_t *dest,
83     nsc_off_t dest_off);
84 static void sdbc_anon_get(_sd_cctl_t *src, nsc_off_t src_off, caddr_t dest,
85     nsc_size_t len);
86 static _sd_cctl_t *sdbc_get_anon_list(nsc_size_t bytes);
87 
88 static int got_hint;			/* did we capture hint at power_lost */
89 static unsigned int wrthru_hint;	/* saved hint at power_lost */
90 static int saw_power_lost;
91 
92 char _sdbc_shutdown_in_progress;
93 static struct swapfiles swfs;
94 
95 /*
96  * sdbc_get_anon_list - allocate a set of anonymous cache block
97  * entries that can pretend to be a single blocks of data holding
98  * a virtual character array holding "bytes" entries.
99  *
100  * returns - the cache block heading the chain.
101  */
102 static _sd_cctl_t *
sdbc_get_anon_list(nsc_size_t bytes)103 sdbc_get_anon_list(nsc_size_t bytes)
104 {
105 	_sd_cctl_t *list, *prev;
106 	nsc_size_t i, blks;
107 
108 	prev = NULL;
109 	blks = (bytes + CACHE_BLOCK_SIZE - 1) / CACHE_BLOCK_SIZE;
110 	for (i = 0; i < blks; i++) {
111 
112 		list = sdbc_centry_alloc_blks(_CD_NOHASH, 0, 1, 0);
113 		bzero(list->cc_data, CACHE_BLOCK_SIZE);
114 		list->anon_next = prev;
115 		prev = list;
116 	};
117 
118 	return (list);
119 }
120 
121 /*
122  * sdbc_anon_get - gets "len" bytes of data virtual character array represented
123  * by "src" begining at index "dest_off" and copy to buffer "dest".
124  *
125  * dest - pointer to our virtual array (chain of cache blocks).
126  * dest_off - first location to copy data to.
127  * src - pointer to data to copy
128  * len - the number of bytes of data to copy
129  *
130  */
131 static void
sdbc_anon_get(_sd_cctl_t * src,nsc_off_t src_off,caddr_t dest,nsc_size_t len)132 sdbc_anon_get(_sd_cctl_t *src, nsc_off_t src_off, caddr_t dest, nsc_size_t len)
133 {
134 	nsc_size_t i;
135 	nsc_size_t nlen;
136 	nsc_off_t blk_start, blk_end;
137 
138 	if (len == 0)
139 		return;
140 
141 	blk_start = src_off / CACHE_BLOCK_SIZE;
142 	blk_end = (src_off + len) / CACHE_BLOCK_SIZE;
143 
144 	for (i = 0; i < blk_start; i++) {
145 		src = src->anon_next;
146 		src_off -= CACHE_BLOCK_SIZE;
147 	}
148 
149 	nlen = min(len, CACHE_BLOCK_SIZE - src_off);
150 	bcopy(&src->anon_data[src_off], dest, (size_t)nlen);
151 
152 	for (i = 1; i < blk_end - blk_start; i++) {
153 		bcopy(src->anon_data, &dest[nlen], (size_t)CACHE_BLOCK_SIZE);
154 		nlen += CACHE_BLOCK_SIZE;
155 		src = src->anon_next;
156 	}
157 	if (nlen != len) {
158 		bcopy(src->anon_data, &dest[nlen], (size_t)(len - nlen));
159 	}
160 }
161 
162 /*
163  * sdbc_anon_copy - copies "len" bytes of data from "src" to the
164  * virtual character array represented by "dest" begining at index
165  * "dest_off".
166  *
167  * src - pointer to data to copy
168  * len - the number of bytes of data to copy
169  * dest - pointer to our virtual array (chain of cache blocks).
170  * dest_off - first location to copy data to.
171  *
172  */
173 static void
sdbc_anon_copy(caddr_t src,nsc_size_t len,_sd_cctl_t * dest,nsc_off_t dest_off)174 sdbc_anon_copy(caddr_t src, nsc_size_t len, _sd_cctl_t *dest,
175     nsc_off_t dest_off)
176 {
177 	nsc_size_t i;
178 	nsc_size_t nlen;
179 	nsc_off_t blk_start, blk_end;
180 
181 	if (len == 0)
182 		return;
183 
184 	blk_start = dest_off / CACHE_BLOCK_SIZE;
185 	blk_end = (dest_off + len) / CACHE_BLOCK_SIZE;
186 
187 	for (i = 0; i < blk_start; i++) {
188 		dest = dest->anon_next;
189 		dest_off -= CACHE_BLOCK_SIZE;
190 	}
191 
192 	nlen = min(len, CACHE_BLOCK_SIZE - dest_off);
193 	bcopy(src, &dest->anon_data[dest_off], (size_t)nlen);
194 
195 	for (i = 1; i < blk_end - blk_start; i++) {
196 		bcopy(&src[nlen], dest->anon_data, (size_t)CACHE_BLOCK_SIZE);
197 		nlen += CACHE_BLOCK_SIZE;
198 		dest = dest->anon_next;
199 	}
200 	if (nlen != len) {
201 		bcopy(&src[nlen], dest->anon_data, (size_t)(len - nlen));
202 	}
203 }
204 
205 /*
206  * flush_anon_list - flush a chain of anonymous cache blocks
207  * to the state file. Anonymous chains of cache blocks represent
208  * virtual arrays for the state flushing code and can contain
209  * various types of data.
210  *
211  * anon_list - chain of cache blocks to flush.
212  *
213  * dev - the state file device
214  *
215  * blkno - on input the cache block number to begin writing at.
216  * On exit the next cache block number following the data
217  * just written.
218  *
219  * returns - 0 on success, error number on failure.
220  */
221 static int
flush_anon_list(_sd_cctl_t * anon_list,dev_t dev,nsc_off_t * blkno)222 flush_anon_list(_sd_cctl_t *anon_list,
223 		dev_t dev,
224 		nsc_off_t *blkno)
225 {
226 	struct buf *bp;
227 	int rc;
228 	_sd_cctl_t *prev;
229 	nsc_size_t bcnt;
230 
231 	if (anon_list == NULL)
232 		return (0);
233 
234 	bcnt = 0;
235 	do {
236 		bp = sd_alloc_iob(dev, BLK_TO_FBA_NUM(*blkno),
237 		    BLK_TO_FBA_NUM(1), 0);
238 		sd_add_fba(bp, &anon_list->cc_addr, 0, BLK_FBAS);
239 		rc = sd_start_io(bp, NULL, NULL, 0);
240 		(*blkno)++;
241 
242 		/*
243 		 * A failure here is death. This is harsh but not sure
244 		 * what else to do
245 		 */
246 
247 		if (rc != NSC_DONE)
248 			return (rc);
249 		bcnt++;
250 
251 		prev = anon_list;
252 		anon_list = anon_list->anon_next;
253 		_sd_centry_release(prev);
254 
255 	} while (anon_list);
256 
257 	cmn_err(CE_CONT, "sdbc(flush_anon_list) %" NSC_SZFMT "\n", bcnt);
258 	return (0);
259 
260 }
261 
262 /*
263  * start_bitmap_list - allocate an anonymous cache block entry
264  * to anchor a chain of cache blocks representing a virtual
265  * array of bitmap entries.
266  *
267  * returns - the cache block heading the chain.
268  */
269 static void
start_bitmap_list(struct bitmap * bmp,int bpb)270 start_bitmap_list(struct bitmap *bmp, int bpb)
271 {
272 	_sd_cctl_t *list;
273 
274 	list = sdbc_centry_alloc_blks(_CD_NOHASH, 0, 1, 0);
275 	bzero(list->cc_data, CACHE_BLOCK_SIZE);
276 	list->bitmap_next = NULL;
277 	list->bitmap_tail = list;
278 
279 	bmp->bmps = list;
280 	bmp->inuse = 0;
281 	bmp->bmaps_per_block = bpb;
282 }
283 
284 /*
285  * add_bitmap_entry - Add a bitmap entry to the chain of bitmap
286  * entries we are creating for cd's entry in the state file.
287  *
288  * Bitmaps are stored in a chain of anonymous cache blocks. Each
289  * cache block can hold bmaps_per_block in it. As each block is
290  * filled a new block is added to the tail of the chain.
291  *
292  * list - the chain of cache blocks containing the bitmaps.
293  * bits - the bitmap entry to add.
294  * any_fail - flag saying whether the data corresponding to this
295  * bitmap entry had previously failed going to disk.
296  * fba_num - FBA number corresponding to the entry.
297  *
298  * returns - 0 on success, error number on failure.
299  */
300 static int
add_bitmap_entry(struct bitmap * bmp,_sd_bitmap_t bits,int any_fail,nsc_off_t fba_num)301 add_bitmap_entry(struct bitmap *bmp,
302     _sd_bitmap_t bits, int any_fail, nsc_off_t fba_num)
303 {
304 	sdbc_pwf_bitmap_t *bmap;
305 	_sd_cctl_t *list = bmp->bmps;
306 	int i;
307 
308 	bmap = (sdbc_pwf_bitmap_t *)list->bitmap_tail->cc_data;
309 	if (bmp->inuse == bmp->bmaps_per_block) {
310 		_sd_cctl_t *nlist;
311 
312 		nlist = sdbc_centry_alloc_blks(_CD_NOHASH, 0, 1, 0);
313 		bzero(nlist->cc_data, CACHE_BLOCK_SIZE);
314 		nlist->bitmap_next = NULL;
315 		nlist->bitmap_tail = NULL;
316 		list->bitmap_tail->bitmap_next = nlist;
317 		list->bitmap_tail = nlist;
318 		bmp->inuse = 0;
319 	}
320 	i = bmp->inuse++;
321 	bmap->bitmaps[i].fba_num = fba_num;
322 	bmap->bitmaps[i].dirty = bits;
323 	bmap->bitmaps[i].errs = (char)any_fail;
324 
325 	return (0);
326 }
327 
328 /*
329  * flush_bitmap_list - flush a chain of anonymous cache blocks
330  * containing the dirty/valid bitmaps for a set of cache blocks.
331  *
332  * b_list - the chain of bitmap data.
333  * dev - the state file device.
334  * blkno - on input the cache block number to begin writing at.
335  * On exit the next cache block number following the data
336  * just written.
337  *
338  * returns - 0 on success, error number on failure.
339  */
340 static int
flush_bitmap_list(struct bitmap * bmp,dev_t dev,nsc_off_t * blkno)341 flush_bitmap_list(struct bitmap *bmp, dev_t dev, nsc_off_t *blkno)
342 {
343 	_sd_cctl_t *b_list;
344 	struct buf *bp;
345 	int rc;
346 	_sd_cctl_t *prev;
347 	int bcnt = 0;	/* P3 temp */
348 
349 	if ((b_list = bmp->bmps) == NULL)
350 		return (0);
351 
352 	do {
353 		bp = sd_alloc_iob(dev, BLK_TO_FBA_NUM(*blkno),
354 		    BLK_TO_FBA_NUM(1), 0);
355 		sd_add_fba(bp, &b_list->cc_addr, 0, BLK_FBAS);
356 		rc = sd_start_io(bp, NULL, NULL, 0);
357 		(*blkno)++;
358 
359 		/*
360 		 * A failure here is death. This is harsh but not sure
361 		 * what else to do
362 		 */
363 
364 		if (rc != NSC_DONE)
365 			return (rc);
366 		bcnt++;
367 
368 		prev = b_list;
369 		b_list = b_list->bitmap_next;
370 		_sd_centry_release(prev);
371 
372 	} while (b_list);
373 	cmn_err(CE_CONT, "sdbc(flush_bitmap_list) %d\n", bcnt);  /* P3 */
374 
375 	return (0);
376 
377 }
378 
379 /*
380  * flush_centry_list - flush a chain of cache blocks for the
381  * cache descriptor described by "cdi" to the state file.
382  * In addition the bitmaps describing the validity and dirty
383  * state of each entry are captured to the bitmap chain.
384  *
385  * cdi - pointer to description of the cd we are writing.
386  * dirty - chain of dirty cache blocks to flush (linked
387  * by dirty_next (sequential) and dirty_link (disjoint).
388  *
389  * dev - the state file device.
390  *
391  * blkno - on input the cache block number to begin writing at.
392  * On exit the next cache block number following the data
393  * just written.
394  *
395  * failed - a flag noting whether these blocks had already
396  * been attempted to write to their true destination and
397  * failed. (i.e. is the chain from fail_head).
398  *
399  * bmaps - a chain of anonymous cache blocks containing all
400  * the dirty/valid bitmaps for the cache blocks we write.
401  *
402  * returns - 0 on success, error number on failure.
403  */
404 static int
flush_centry_list(_sd_cd_info_t * cdi,_sd_cctl_t * dirty,dev_t dev,nsc_off_t * blkno,int failed,struct bitmap * bmaps)405 flush_centry_list(_sd_cd_info_t *cdi,
406 		_sd_cctl_t *dirty,
407 		dev_t dev,
408 		nsc_off_t *blkno,
409 		int failed,
410 		struct bitmap *bmaps)
411 {
412 	_sd_cctl_t *cc_ent;
413 	nsc_size_t count; /* count of cache blocks in a sequential chain */
414 	struct buf *bp;
415 	int rc;
416 	int bcnt = 0;
417 
418 	if (dirty == NULL)
419 		return (0);
420 
421 	mutex_enter(&cdi->cd_lock);
422 
423 	do {
424 		/*
425 		 * each cache block is written to the disk regardless of its
426 		 * valid/dirty masks.
427 		 */
428 		count = 0;
429 		cc_ent = dirty;
430 		do {
431 			count++;
432 			cc_ent = cc_ent->cc_dirty_next;
433 		} while (cc_ent);
434 
435 		bp = sd_alloc_iob(dev, BLK_TO_FBA_NUM(*blkno),
436 		    BLK_TO_FBA_NUM(count), 0);
437 
438 		cc_ent = dirty;
439 		do {
440 			sd_add_fba(bp, &cc_ent->cc_addr, 0, BLK_FBAS);
441 			rc = add_bitmap_entry(bmaps,
442 			    cc_ent->cc_dirty | cc_ent->cc_toflush, failed,
443 			    BLK_TO_FBA_NUM(CENTRY_BLK(cc_ent)));
444 			if (rc)
445 				return (rc);
446 			cc_ent = cc_ent->cc_dirty_next;
447 		} while (cc_ent);
448 
449 		*blkno += count;
450 		rc = sd_start_io(bp, NULL, NULL, 0);
451 
452 		/*
453 		 * A failure here is death. This is harsh but not sure
454 		 * what else to do
455 		 */
456 
457 		if (rc != NSC_DONE)
458 			return (rc);
459 		bcnt += count;
460 
461 		dirty = dirty->cc_dirty_link;
462 	} while (dirty);
463 	cmn_err(CE_CONT, "sdbc(flush_centry_list) %d\n", bcnt);  /* P3 */
464 
465 	mutex_exit(&cdi->cd_lock);
466 	return (0);
467 }
468 
469 /*
470  * flush_hdr - Flush the state file header to the disk partition
471  * "dev" at FBA "blkno". Return the result of the i/o operation.
472  * hdr - a cache block containing the header.
473  * dev - the state file device.
474  * blkno -  cache block position to write the header.
475  *
476  * returns - 0 on success, error number on failure.
477  */
478 static int
flush_hdr(_sd_cctl_t * hdr,dev_t dev,nsc_off_t blkno)479 flush_hdr(_sd_cctl_t *hdr, dev_t dev, nsc_off_t blkno)
480 {
481 	struct buf *bp;
482 	int rc;
483 
484 	bp = sd_alloc_iob(dev, BLK_TO_FBA_NUM(blkno), BLK_TO_FBA_NUM(1), 0);
485 	sd_add_fba(bp, &hdr->cc_addr, 0, BLK_FBAS);
486 	rc = sd_start_io(bp, NULL, NULL, 0);
487 	_sd_centry_release(hdr);
488 	return (rc);
489 
490 }
491 
492 /*
493  * _sdbc_power_flush - flushd the state of sdbc to the state "file"
494  * on the system disk. All dirty blocks (in progress, unscheduled,
495  * failed) are written along with the bitmap for each block. The
496  * data is written using normal sdbc i/o via anonymous cache blocks.
497  * This is done to simplify the job here (and to limit memory
498  * requests) at the expense of making the recovery programs more
499  * complex. Since recovery is done at user level this seems to be
500  * a good trade off.
501  *
502  * Returns: 0 on success, error number on failure.
503  */
504 static int
_sdbc_power_flush(void)505 _sdbc_power_flush(void)
506 {
507 	_sd_cctl_t *name_pool;
508 	int string_size;
509 
510 	sdbc_pwf_hdr_t *hdr;
511 	_sd_cctl_t *hdrblk;
512 
513 	struct bitmap bmap;
514 
515 	_sd_cd_info_t *cdi;
516 	int open_files;
517 	_sd_cctl_t *file_pool;
518 	sdbc_pwf_desc_t current;
519 
520 	nsc_fd_t *state_fd;
521 	dev_t state_rdev;
522 	int devmaj, devmin;
523 	nsc_off_t blkno;
524 	long len;
525 	long total_len;
526 	int pending;
527 	int rc = 0;
528 
529 	/*
530 	 * Force wrthru just in case SLM software didn't really send us a
531 	 * warning. (Also makes for easier testing)
532 	 */
533 	(void) _sd_set_node_hint(NSC_FORCED_WRTHRU);
534 	/* disable all (dangerous) cache entry points */
535 
536 	cmn_err(CE_CONT, "sdbc(sdbc_power_flush) hint set..\n"); /* P3 */
537 
538 	_sdbc_shutdown_in_progress = 1;
539 
540 #if 0
541 	if (sdbc_io && (rc = nsc_unregister_io(sdbc_io, NSC_PCATCH)) != 0) {
542 		/*
543 		 * this is bad, in theory we could just busy-out all our
544 		 * interfaces and continue.
545 		 */
546 		cmn_err(CE_WARN,
547 		    "sdbc(_sdbc_power_flush) couldn't unregister i/o %d", rc);
548 		return (rc);
549 	}
550 
551 	sdbc_io = NULL;
552 #endif
553 
554 	/* wait for all i/o to finish/timeout ? */
555 
556 	if ((pending = _sdbc_wait_pending()) != 0)
557 		cmn_err(CE_NOTE, "sdbc(_sdbc_power_flush) %d I/Os were"
558 		    " pending at power shutdown", pending);
559 
560 	cmn_err(CE_CONT, "sdbc(sdbc_power_flush) over pending\n"); /* P3 */
561 
562 	/* prevent any further async flushing */
563 
564 	_sdbc_flush_deconfigure();
565 
566 	/*
567 	 * At this point no higher level clients should be able to get thru.
568 	 * Failover i/o from the other node is our only other concern as
569 	 * far as disturbing the state of sdbc.
570 	 */
571 
572 	/* figure out the names for the string pool */
573 
574 	string_size = 0;
575 	open_files = 0;
576 	cdi = _sd_cache_files;
577 	do {
578 
579 		if (cdi->cd_info == NULL)
580 			continue;
581 		if (cdi->cd_info->sh_alloc == 0)
582 			continue;
583 		open_files++;
584 		string_size += strlen(cdi->cd_info->sh_filename) + 1;
585 	} while (++cdi != &_sd_cache_files[sdbc_max_devs]);
586 
587 	if (open_files == 0) {
588 		return (0);
589 	}
590 
591 	hdrblk = sdbc_centry_alloc_blks(_CD_NOHASH, 0, 1, 0);
592 	bzero(hdrblk->cc_data, CACHE_BLOCK_SIZE);
593 	hdr = (sdbc_pwf_hdr_t *)hdrblk->cc_data;
594 	hdr->magic = SDBC_PWF_MAGIC;
595 	hdr->alignment = CACHE_BLOCK_SIZE;
596 	hdr->cd_count = open_files;
597 	/* XXX bmap_size is redundant */
598 	hdr->bmap_size = CACHE_BLOCK_SIZE / sizeof (sdbc_pwf_bitmap_t);
599 
600 	name_pool = sdbc_get_anon_list(string_size);
601 	file_pool = sdbc_get_anon_list(sizeof (sdbc_pwf_desc_t) * open_files);
602 
603 	open_files = 0;
604 	cdi = _sd_cache_files;
605 	total_len = 0;
606 	do {
607 
608 		if (cdi->cd_info == NULL)
609 			continue;
610 		if (cdi->cd_info->sh_alloc == 0)
611 			continue;
612 
613 		len = strlen(cdi->cd_info->sh_filename) + 1;
614 
615 		/* copy the name to string pool */
616 		sdbc_anon_copy(cdi->cd_info->sh_filename,
617 		    len, name_pool, total_len);
618 
619 		bzero(&current, sizeof (current));
620 		current.name = total_len;
621 		sdbc_anon_copy((caddr_t)&current, sizeof (current), file_pool,
622 		    open_files * sizeof (sdbc_pwf_desc_t));
623 
624 		open_files++;
625 		total_len += len;
626 
627 	} while (++cdi != &_sd_cache_files[sdbc_max_devs]);
628 
629 	/* flush dirty data */
630 
631 	if (swfs.nswpf == 0 || swfs.names[0] == NULL) {
632 		cmn_err(CE_WARN, "sdbc(_sdbc_power_flush): State file"
633 		    " is not configured");
634 		rc = ENODEV;
635 		goto cleanup;
636 	}
637 
638 	if (!(state_fd =
639 	    nsc_open(swfs.names[0], NSC_DEVICE, NULL, NULL, &rc)) ||
640 	    !nsc_getval(state_fd, "DevMaj", (int *)&devmaj) ||
641 	    !nsc_getval(state_fd, "DevMin", (int *)&devmin)) {
642 		if (state_fd) {
643 			(void) nsc_close(state_fd);
644 		}
645 		/*
646 		 * We are hosed big time. We can't get device to write the
647 		 * state file opened.
648 		 */
649 		cmn_err(CE_WARN, "sdbc(_sdbc_power_flush): Couldn't "
650 		    "open %s for saved state file", swfs.names[0]);
651 		rc = EIO;
652 		goto cleanup;
653 	}
654 
655 	state_rdev = makedevice(devmaj, devmin);
656 
657 	blkno = 1;
658 
659 	hdr->string_pool = blkno;
660 	rc = flush_anon_list(name_pool, state_rdev, &blkno);
661 
662 	hdr->descriptor_pool = blkno;
663 	rc = flush_anon_list(file_pool, state_rdev, &blkno);
664 
665 	/*
666 	 * iterate across all devices, flushing the data and collecting bitmaps
667 	 */
668 
669 	open_files = 0;
670 	for (cdi = _sd_cache_files;
671 	    cdi != &_sd_cache_files[sdbc_max_devs]; cdi++) {
672 		nsc_off_t blk2;
673 		nsc_off_t fp_off;
674 
675 		if (cdi->cd_info == NULL)
676 			continue;
677 		if (cdi->cd_info->sh_alloc == 0)
678 			continue;
679 
680 		/* retrieve the file description so we can update it */
681 		fp_off = (open_files++) * sizeof (sdbc_pwf_desc_t);
682 		sdbc_anon_get(file_pool, fp_off,
683 		    (caddr_t)&current, sizeof (current));
684 
685 		current.blocks = blkno;
686 
687 		if (cdi->cd_io_head) {
688 			/*
689 			 * Need to wait for this to timeout?
690 			 * Seems like worst case we just write the data twice
691 			 * so we should be ok.
692 			 */
693 			/*EMPTY*/
694 			;
695 		}
696 
697 		start_bitmap_list(&bmap, hdr->bmap_size);
698 
699 		/* Flush the enqueued dirty data blocks */
700 
701 		(void) flush_centry_list(cdi, cdi->cd_dirty_head, state_rdev,
702 		    &blkno, 0, &bmap);
703 		cdi->cd_dirty_head = NULL;
704 		cdi->cd_dirty_tail = NULL;
705 
706 		/* Flush the failed dirty data blocks */
707 
708 		(void) flush_centry_list(cdi, cdi->cd_fail_head, state_rdev,
709 		    &blkno, 1, &bmap);
710 		cdi->cd_fail_head = NULL;
711 
712 		/*
713 		 * Flush the in progress dirty data blocks. These really should
714 		 * really be null by now. Worst case we write the data again
715 		 * on recovery as we know the dirty masks won't change since
716 		 * flusher is stopped.
717 		 */
718 
719 		(void) flush_centry_list(cdi, cdi->cd_io_head, state_rdev,
720 		    &blkno, 0, &bmap);
721 		cdi->cd_io_head = NULL;
722 		cdi->cd_io_tail = NULL;
723 
724 		current.bitmaps = blkno;
725 		current.nblocks = blkno - current.blocks;
726 
727 		(void) flush_bitmap_list(&bmap, state_rdev, &blkno);
728 
729 		/* update the current cd's file description */
730 		sdbc_anon_copy((caddr_t)&current, sizeof (current), file_pool,
731 		    fp_off);
732 
733 		blk2 = hdr->descriptor_pool;
734 		rc = flush_anon_list(file_pool, state_rdev, &blk2);
735 	}
736 
737 #if !defined(_SunOS_5_6)
738 	hdr->dump_time = ddi_get_time();
739 #else
740 	hdr->dump_time = hrestime.tv_sec;
741 #endif
742 	/* write the header at front and back */
743 	(void) flush_hdr(hdrblk, state_rdev, blkno);
744 	(void) flush_hdr(hdrblk, state_rdev, 0L);
745 
746 	/* P3 */
747 	cmn_err(CE_CONT, "sdbc(sdbc_power_flush) %" NSC_SZFMT " total\n",
748 	    blkno);
749 
750 cleanup:
751 	;
752 	return (rc);
753 
754 }
755 
756 /*
757  * _sdbc_power_lost - System is running on UPS power we have "rideout"
758  * minutes of power left prior to shutdown. Get into a state where we
759  * will be ready should we need to shutdown.
760  *
761  * ARGUMENTS:
762  *	rideout - minutes of power left prior to shutdown.
763  */
764 void
_sdbc_power_lost(int rideout)765 _sdbc_power_lost(int rideout)
766 {
767 	cmn_err(CE_WARN, "sdbc(_sdbc_power_lost) battery time "
768 	    "remaining %d minute(s)", rideout);
769 
770 	got_hint = 1;
771 	if (_sd_get_node_hint(&wrthru_hint))
772 		got_hint = 0;
773 
774 	cmn_err(CE_WARN, "sdbc(_sdbc_power_lost) got hint %d "
775 		"hint 0x%x", got_hint, wrthru_hint);
776 
777 	(void) _sd_set_node_hint(NSC_FORCED_WRTHRU);
778 	saw_power_lost = 1;
779 }
780 
781 /*
782  * _sdbc_power_ok - System is back running on mains power after
783  * seeing a power fail. Return to normal power up operation.
784  *
785  */
786 void
_sdbc_power_ok(void)787 _sdbc_power_ok(void)
788 {
789 	cmn_err(CE_WARN, "sdbc(_sdbc_power_ok) power ok");
790 	if (saw_power_lost && got_hint) {
791 		/*
792 		 * In theory we have a race here between _sdbc_power_lost
793 		 * and here. However it is expected that power ioctls that
794 		 * cause these to be generated are sequential in nature
795 		 * so there is no race.
796 		 */
797 		saw_power_lost = 0;
798 		if (wrthru_hint & _SD_WRTHRU_MASK)
799 			(void) _sd_set_node_hint(wrthru_hint & _SD_WRTHRU_MASK);
800 		else
801 			(void) _sd_clear_node_hint(_SD_WRTHRU_MASK);
802 	}
803 }
804 
805 /*
806  * _sdbc_power_down - System is running on UPS power and we must stop
807  * operation as the machine is now going down. Schedule a shutdown
808  * thread.
809  *
810  * When we return all cache activity will be blocked.
811  */
812 void
_sdbc_power_down(void)813 _sdbc_power_down(void)
814 {
815 	cmn_err(CE_WARN, "sdbc(_sdbc_power_down) powering down...");
816 	(void) _sdbc_power_flush();
817 }
818 
819 /*
820  * Configure safe store from the general cache configuration ioctl.
821  */
822 int
_sdbc_pcu_config(int namec,char ** namev)823 _sdbc_pcu_config(int namec, char **namev)
824 {
825 	int i;
826 
827 	if (swfs.nswpf != 0) {
828 		/*
829 		 * This should not happen because cache protects itself
830 		 * from double configuration in sd_conf.c.
831 		 */
832 		cmn_err(CE_CONT, "sdbc(_sdbc_pcu_config) double "
833 		    "configuration of Safe Store\n");
834 		return (EINVAL);
835 	}
836 	swfs.colsize = 32;	/* No way to configure in the general ioctl */
837 
838 	for (i = 0; i < namec; i++) {
839 		if ((swfs.names[i] = kmem_alloc(strlen(namev[i])+1,
840 		    KM_NOSLEEP)) == NULL) {
841 			_sdbc_pcu_cleanup(&swfs);
842 			return (ENOMEM);
843 		}
844 		swfs.nswpf++;
845 		(void) strcpy(swfs.names[i], namev[i]);
846 	}
847 
848 	return (0);
849 }
850 
851 /*
852  */
853 void
_sdbc_pcu_unload()854 _sdbc_pcu_unload()
855 {
856 	_sdbc_pcu_cleanup(&swfs);
857 }
858 
859 /*
860  * Destructor for struct swapfiles.
861  */
862 static void
_sdbc_pcu_cleanup(struct swapfiles * swp)863 _sdbc_pcu_cleanup(struct swapfiles *swp)
864 {
865 	int i;
866 	char *s;
867 
868 	for (i = 0; i < swp->nswpf; i++) {
869 		if ((s = swp->names[i]) != NULL)
870 			kmem_free(s, strlen(s)+1);
871 		swp->names[i] = NULL;
872 	}
873 	swp->nswpf = 0;
874 }
875