1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 /*
30 * NAME: raid_resync.c
31 * DESCRIPTION: RAID driver source file containing routines related to resync
32 * operation.
33 * ROUTINES PROVIDED FOR EXTERNAL USE:
34 * resync_request() - get resync lock if available
35 * release_resync_request() - relinquish resync lock
36 * erred_check_line() - provide write instruction for erred column
37 * init_pw_area() - initialize pre-write area
38 * copy_pw_area() - copy pre-write area from one device to another
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/conf.h>
44 #include <sys/file.h>
45 #include <sys/user.h>
46 #include <sys/uio.h>
47 #include <sys/t_lock.h>
48 #include <sys/buf.h>
49 #include <sys/dkio.h>
50 #include <sys/vtoc.h>
51 #include <sys/kmem.h>
52 #include <vm/page.h>
53 #include <sys/sysmacros.h>
54 #include <sys/types.h>
55 #include <sys/mkdev.h>
56 #include <sys/stat.h>
57 #include <sys/open.h>
58 #include <sys/disp.h>
59 #include <sys/modctl.h>
60 #include <sys/ddi.h>
61 #include <sys/sunddi.h>
62 #include <sys/lvm/md_raid.h>
63
64 #include <sys/sysevent/eventdefs.h>
65 #include <sys/sysevent/svm.h>
66
67 #define NOCOLUMN (-1)
68
69 extern md_set_t md_set[];
70 extern kmem_cache_t *raid_child_cache;
71 extern kmem_cache_t *raid_parent_cache;
72 extern md_resync_t md_cpr_resync;
73 extern major_t md_major;
74 extern void raid_parent_init(md_raidps_t *ps);
75 extern void raid_child_init(md_raidcs_t *ps);
76
77 /*
78 * NAMES: xor
79 * DESCRIPTION: Xor two chunks of data together. The data referenced by
80 * addr1 and addr2 are xor'd together for size and written into
81 * addr1.
82 * PARAMETERS: caddr_t addr1 - address of first chunk of data and destination
83 * caddr_t addr2 - address of second chunk of data
84 * u_int size - number to xor
85 */
86 static void
xor(caddr_t addr1,caddr_t addr2,size_t size)87 xor(caddr_t addr1, caddr_t addr2, size_t size)
88 {
89 while (size--) {
90 *addr1++ ^= *addr2++;
91 }
92 }
93
94 /*
95 * NAME: release_resync_request
96 *
97 * DESCRIPTION: Release resync active flag and reset unit values accordingly.
98 *
99 * PARAMETERS: minor_t mnum - minor number identity of metadevice
100 *
101 * LOCKS: Expects Unit Writer Lock to be held across call.
102 */
103 void
release_resync_request(minor_t mnum)104 release_resync_request(
105 minor_t mnum
106 )
107 {
108 mr_unit_t *un;
109
110 un = MD_UNIT(mnum);
111 ASSERT(un != NULL);
112
113 un->c.un_status &= ~MD_UN_RESYNC_ACTIVE;
114
115 un->un_column[un->un_resync_index].un_devflags &= ~MD_RAID_RESYNC;
116 un->un_column[un->un_resync_index].un_devflags &= ~MD_RAID_RESYNC_ERRED;
117 un->un_column[un->un_resync_index].un_devflags &=
118 ~(MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC);
119
120 un->un_resync_line_index = 0;
121 un->un_resync_index = NOCOLUMN;
122 }
123
124 /*
125 * NAME: resync_request
126 *
127 * DESCRIPTION: Request resync. If resync is available (no current active
128 * resync), mark unit as resync active and initialize.
129 *
130 * PARAMETERS: minor_t mnum - minor number identity of metadevice
131 * int column_index - index of column to resync
132 * int copysize - copysize of ioctl request
133 * md_error_t *ep - error output parameter
134 *
135 * RETURN: 0 if resync is available, 1 otherwise.
136 *
137 * LOCKS: Expects Unit Writer Lock to be held across call.
138 *
139 * NOTE: Sets un_resync_copysize to the input value in copysize, the
140 * existing value from an incomplete previous resync with an
141 * input value in copysize, or the lesser of the unit segment
142 * size or maxio.
143 */
144 /* ARGSUSED */
145 int
resync_request(minor_t mnum,int column_index,size_t copysize,md_error_t * mde)146 resync_request(
147 minor_t mnum,
148 int column_index,
149 size_t copysize,
150 md_error_t *mde
151 )
152 {
153 mr_unit_t *un;
154
155 un = MD_UNIT(mnum);
156 ASSERT(un != NULL);
157
158 /* if resync or grow not already active, set resync active for unit */
159 if (! (un->un_column[column_index].un_devflags & MD_RAID_RESYNC) &&
160 ((un->c.un_status & MD_UN_RESYNC_ACTIVE) ||
161 (un->c.un_status & MD_UN_GROW_PENDING) ||
162 (un->un_column[column_index].un_devstate & RCS_RESYNC))) {
163 if (mde)
164 return (mdmderror(mde, MDE_GROW_DELAYED, mnum));
165 return (1);
166 }
167
168 if (un->un_column[column_index].un_devstate &
169 (RCS_ERRED | RCS_LAST_ERRED))
170 un->un_column[column_index].un_devflags |= MD_RAID_DEV_ERRED;
171 else
172 un->un_column[column_index].un_devflags &= ~MD_RAID_DEV_ERRED;
173 un->c.un_status |= MD_UN_RESYNC_ACTIVE;
174 un->un_resync_index = column_index;
175 un->un_resync_line_index = 0;
176 raid_set_state(un, column_index, RCS_RESYNC, 0);
177
178 return (0);
179 }
180
181 /*
182 * Name: alloc_bufs
183 *
184 * DESCRIPTION: Initialize resync_comp buffers.
185 *
186 * PARAMETERS: size_t bsize - size of buffer
187 * buf_t *read_buf1 - first read buf
188 * buf_t *read_buf2 - second read buf
189 * buf_t *write_buf - write buf
190 */
191 static void
alloc_bufs(md_raidcs_t * cs,size_t bsize)192 alloc_bufs(md_raidcs_t *cs, size_t bsize)
193 {
194 /* allocate buffers, write uses the read_buf1 buffer */
195 cs->cs_dbuffer = kmem_zalloc(bsize, KM_SLEEP);
196 cs->cs_pbuffer = kmem_zalloc(bsize, KM_SLEEP);
197 }
198
199 void
init_buf(buf_t * bp,int flags,size_t size)200 init_buf(buf_t *bp, int flags, size_t size)
201 {
202 /* zero buf */
203 bzero((caddr_t)bp, sizeof (buf_t));
204
205 /* set b_back and b_forw to point back to buf */
206 bp->b_back = bp;
207 bp->b_forw = bp;
208
209 /* set flags size */
210 bp->b_flags = flags;
211 bp->b_bufsize = size;
212 bp->b_offset = -1;
213
214 /* setup semaphores */
215 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
216 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
217 }
218
219 void
destroy_buf(buf_t * bp)220 destroy_buf(buf_t *bp)
221 {
222 sema_destroy(&bp->b_io);
223 sema_destroy(&bp->b_sem);
224 }
225
226 void
reset_buf(buf_t * bp,int flags,size_t size)227 reset_buf(buf_t *bp, int flags, size_t size)
228 {
229 destroy_buf(bp);
230 init_buf(bp, flags, size);
231 }
232
233 /*
234 * NAME: free_bufs
235 *
236 * DESCRIPTION: Free up buffers.
237 *
238 * PARAMETERS: size_t bsize - size of buffer
239 * buf_t *read_buf1 - first read buf
240 * buf_t *read_buf2 - second read buf
241 * buf_t *write_buf - write buf
242 */
243 static void
free_bufs(size_t bsize,md_raidcs_t * cs)244 free_bufs(size_t bsize, md_raidcs_t *cs)
245 {
246 kmem_free(cs->cs_dbuffer, bsize);
247 kmem_free(cs->cs_pbuffer, bsize);
248 }
249
250 /*
251 * NAME: init_pw_area
252 *
253 * DESCRIPTION: Initialize pre-write area to all zeros.
254 *
255 * PARAMETERS: minor_t mnum - minor number identity of metadevice
256 * md_dev64_t dev_to_write - index of column to resync
257 * int column_index - index of column to resync
258 *
259 * RETURN: 1 if write error on resync device, otherwise 0
260 *
261 * LOCKS: Expects Unit Reader Lock to be held across call.
262 */
263 int
init_pw_area(mr_unit_t * un,md_dev64_t dev_to_write,diskaddr_t pwstart,uint_t col)264 init_pw_area(
265 mr_unit_t *un,
266 md_dev64_t dev_to_write,
267 diskaddr_t pwstart,
268 uint_t col
269 )
270 {
271 buf_t buf;
272 caddr_t databuffer;
273 size_t copysize;
274 size_t bsize;
275 int error = 0;
276 int i;
277
278 ASSERT(un != NULL);
279 ASSERT(un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN);
280
281 bsize = un->un_iosize;
282 copysize = dbtob(bsize);
283 databuffer = kmem_zalloc(copysize, KM_SLEEP);
284 init_buf(&buf, (B_BUSY | B_WRITE), copysize);
285
286 for (i = 0; i < un->un_pwcnt; i++) {
287 /* magic field is 0 for 4.0 compatability */
288 RAID_FILLIN_RPW(databuffer, un, 0, 0,
289 0, 0, 0,
290 0, col, 0);
291 buf.b_un.b_addr = (caddr_t)databuffer;
292 buf.b_edev = md_dev64_to_dev(dev_to_write);
293 buf.b_bcount = dbtob(bsize);
294 buf.b_lblkno = pwstart + (i * un->un_iosize);
295
296 /* write buf */
297 (void) md_call_strategy(&buf, MD_STR_NOTTOP, NULL);
298
299 if (biowait(&buf)) {
300 error = 1;
301 break;
302 }
303 reset_buf(&buf, (B_BUSY | B_WRITE), copysize);
304 } /* for */
305
306 destroy_buf(&buf);
307 kmem_free(databuffer, copysize);
308
309 return (error);
310 }
311
312 /*
313 * NAME: raid_open_alt
314 *
315 * DESCRIPTION: opens the alt device used during resync.
316 *
317 * PARAMETERS: un
318 *
319 * RETURN: 0 - successfull
320 * 1 - failed
321 *
322 * LOCKS: requires unit writer lock
323 */
324
325 static int
raid_open_alt(mr_unit_t * un,int index)326 raid_open_alt(mr_unit_t *un, int index)
327 {
328 mr_column_t *column = &un->un_column[index];
329 set_t setno = MD_MIN2SET(MD_SID(un));
330 side_t side = mddb_getsidenum(setno);
331 md_dev64_t tmpdev = column->un_alt_dev;
332
333 /* correct locks */
334 ASSERT(UNIT_WRITER_HELD(un));
335 /* not already writing to */
336 ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT));
337 /* not already open */
338 ASSERT(! (column->un_devflags & MD_RAID_ALT_ISOPEN));
339
340 if (tmpdev != NODEV64) {
341 /*
342 * Open by device id. We use orig_key since alt_dev
343 * has been set by the caller to be the same as orig_dev.
344 */
345 if ((md_getmajor(tmpdev) != md_major) &&
346 md_devid_found(setno, side, column->un_orig_key) == 1) {
347 tmpdev = md_resolve_bydevid(MD_SID(un), tmpdev,
348 column->un_orig_key);
349 }
350 if (md_layered_open(MD_SID(un), &tmpdev, MD_OFLG_NULL)) {
351 /* failed open */
352 column->un_alt_dev = tmpdev;
353 return (1);
354 } else {
355 /* open suceeded */
356 column->un_alt_dev = tmpdev;
357 column->un_devflags |= MD_RAID_ALT_ISOPEN;
358 return (0);
359 }
360 } else
361 /* no alt device to open */
362 return (1);
363 }
364
365
366 /*
367 * NAME: raid_close_alt
368 *
369 * DESCRIPTION: closes the alt device used during resync.
370 *
371 * PARAMETERS: un - raid unit structure
372 * indes - raid column
373 *
374 * RETURN: none
375 *
376 * LOCKS: requires unit writer lock
377 */
378
379 static void
raid_close_alt(mr_unit_t * un,int index)380 raid_close_alt(mr_unit_t *un, int index)
381 {
382 mr_column_t *column = &un->un_column[index];
383 md_dev64_t tmpdev = column->un_alt_dev;
384
385 ASSERT(UNIT_WRITER_HELD(un)); /* correct locks */
386 ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT)); /* not writing */
387 ASSERT(column->un_devflags & MD_RAID_ALT_ISOPEN); /* already open */
388 ASSERT(tmpdev != NODEV64); /* is a device */
389
390 md_layered_close(column->un_alt_dev, MD_OFLG_NULL);
391 column->un_devflags &= ~MD_RAID_ALT_ISOPEN;
392 column->un_alt_dev = NODEV64;
393 }
394
395 static diskaddr_t
raid_resync_fillin_cs(diskaddr_t line,uint_t line_count,md_raidcs_t * cs)396 raid_resync_fillin_cs(diskaddr_t line, uint_t line_count, md_raidcs_t *cs)
397 {
398 mr_unit_t *un = cs->cs_un;
399
400 ASSERT(line < un->un_segsincolumn);
401
402 cs->cs_line = line;
403 cs->cs_blkno = line * un->un_segsize;
404 cs->cs_blkcnt = un->un_segsize * line_count;
405 cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1;
406 raid_line_reader_lock(cs, 1);
407
408 return (line + line_count);
409 }
410
411 /* states returned by raid_resync_line */
412
413 #define RAID_RESYNC_OKAY 0
414 #define RAID_RESYNC_RDERROR 2
415 #define RAID_RESYNC_WRERROR 3
416 #define RAID_RESYNC_STATE 4
417
418 int
raid_resync_region(md_raidcs_t * cs,diskaddr_t line,uint_t line_count,int * single_read,hs_cmds_t * hs_state,int * err_col,md_dev64_t dev_to_write,diskaddr_t write_dev_start)419 raid_resync_region(
420 md_raidcs_t *cs,
421 diskaddr_t line,
422 uint_t line_count,
423 int *single_read,
424 hs_cmds_t *hs_state,
425 int *err_col,
426 md_dev64_t dev_to_write,
427 diskaddr_t write_dev_start)
428 {
429 mr_unit_t *un = cs->cs_un;
430 buf_t *readb1 = &cs->cs_pbuf;
431 buf_t *readb2 = &cs->cs_dbuf;
432 buf_t *writeb = &cs->cs_hbuf;
433 diskaddr_t off;
434 size_t tcopysize;
435 size_t copysize;
436 int resync;
437 int quit = 0;
438 size_t leftinseg;
439 int i;
440
441 resync = un->un_resync_index;
442 off = line * un->un_segsize;
443 copysize = un->un_resync_copysize;
444
445 /* find first column to read, skip resync column */
446
447 leftinseg = un->un_segsize * line_count;
448 while (leftinseg) {
449
450 /* truncate last chunk to end if needed */
451 if (copysize > leftinseg)
452 tcopysize = leftinseg;
453 else
454 tcopysize = copysize;
455 leftinseg -= tcopysize;
456
457 /*
458 * One of two scenarios:
459 * 1) resync device with hotspare ok. This implies that
460 * we are copying from a good hotspare to a new good original
461 * device. In this case readb1 is used as the buf for
462 * the read from the hotspare device.
463 * 2) For all other cases, including when in case 1) and an
464 * error is detected on the (formerly good) hotspare device,
465 * readb1 is used for the initial read. readb2 is used for
466 * all other reads. Each readb2 buffer is xor'd into the
467 * readb1 buffer.
468 *
469 * In both cases, writeb is used for the write, using readb1's
470 * buffer.
471 *
472 * For case 2, we could alternatively perform the read for all
473 * devices concurrently to improve performance. However,
474 * this could diminish performance for concurrent reads and
475 * writes if low on memory.
476 */
477
478 /* read first buffer */
479
480 /* switch to read from good columns if single_read */
481 if (*single_read) {
482 if (un->un_column[resync].un_dev == NODEV64)
483 return (RAID_RESYNC_RDERROR);
484
485 reset_buf(readb1, B_READ | B_BUSY,
486 dbtob(copysize));
487 readb1->b_bcount = dbtob(tcopysize);
488 readb1->b_un.b_addr = cs->cs_pbuffer;
489 readb1->b_edev = md_dev64_to_dev(
490 un->un_column[resync].un_dev);
491 readb1->b_lblkno =
492 un->un_column[resync].un_devstart + off;
493 (void) md_call_strategy(readb1, MD_STR_NOTTOP, NULL);
494 if (biowait(readb1)) {
495 /*
496 * at this point just start rebuilding the
497 * data and go on since the other column
498 * are ok.
499 */
500 *single_read = 0;
501 *hs_state = HS_BAD;
502 un->un_column[resync].un_devflags &=
503 ~MD_RAID_COPY_RESYNC;
504 un->un_column[resync].un_devflags |=
505 MD_RAID_REGEN_RESYNC;
506 }
507 }
508
509 /* if reading from all non-resync columns */
510 if (!*single_read) {
511 /* for each column, read line and xor into write buf */
512 bzero(cs->cs_pbuffer, dbtob(tcopysize));
513 for (i = 0; i < un->un_totalcolumncnt; i++) {
514
515 if (un->un_column[i].un_dev == NODEV64)
516 return (RAID_RESYNC_RDERROR);
517
518 /* skip column getting resync'ed */
519 if (i == resync) {
520 continue;
521 }
522 reset_buf(readb1, B_READ | B_BUSY,
523 dbtob(copysize));
524 readb1->b_bcount = dbtob(tcopysize);
525 readb1->b_un.b_addr = cs->cs_dbuffer;
526 readb1->b_edev = md_dev64_to_dev(
527 un->un_column[i].un_dev);
528 readb1->b_lblkno =
529 un->un_column[i].un_devstart + off;
530
531 (void) md_call_strategy(readb1, MD_STR_NOTTOP,
532 NULL);
533 if (biowait(readb1)) {
534 *err_col = i;
535 quit = RAID_RESYNC_RDERROR;
536 }
537
538 if (quit)
539 return (quit);
540
541 /* xor readb2 data into readb1 */
542 xor(cs->cs_pbuffer, readb1->b_un.b_addr,
543 dbtob(tcopysize));
544 } /* for */
545 }
546
547 reset_buf(writeb, B_WRITE | B_BUSY,
548 dbtob(copysize));
549 writeb->b_bcount = dbtob(tcopysize);
550 writeb->b_un.b_addr = cs->cs_pbuffer;
551 writeb->b_lblkno = off + write_dev_start;
552 writeb->b_edev = md_dev64_to_dev(dev_to_write);
553
554 /* set write block number and perform the write */
555 (void) md_call_strategy(writeb, MD_STR_NOTTOP, NULL);
556 if (biowait(writeb)) {
557 if (*single_read == 0) {
558 *hs_state = HS_BAD;
559 }
560 return (RAID_RESYNC_WRERROR);
561 }
562 writeb->b_blkno += tcopysize;
563 off += tcopysize;
564 } /* while */
565 sema_destroy(&readb1->b_io);
566 sema_destroy(&readb1->b_sem);
567 sema_destroy(&readb2->b_io);
568 sema_destroy(&readb2->b_sem);
569 sema_destroy(&writeb->b_io);
570 sema_destroy(&writeb->b_sem);
571 return (RAID_RESYNC_OKAY);
572 }
573
574 /*
575 * NAME: resync_comp
576 *
577 * DESCRIPTION: Resync the component. Iterate through the raid unit a line at
578 * a time, read from the good device(s) and write the resync
579 * device.
580 *
581 * PARAMETERS: minor_t mnum - minor number identity of metadevice
582 * md_raidcs_t *cs - child save struct
583 *
584 * RETURN: 0 - successfull
585 * 1 - failed
586 * -1 - aborted
587 *
588 * LOCKS: Expects Unit Reader Lock to be held across call. Acquires and
589 * releases Line Reader Lock for per-line I/O.
590 */
591 static void
resync_comp(minor_t mnum,md_raidcs_t * cs)592 resync_comp(
593 minor_t mnum,
594 md_raidcs_t *cs
595 )
596 {
597 mdi_unit_t *ui;
598 mr_unit_t *un;
599 mddb_recid_t recids[2];
600 rcs_state_t state;
601 md_dev64_t dev_to_write;
602 diskaddr_t write_pwstart;
603 diskaddr_t write_devstart;
604 md_dev64_t dev;
605 int resync;
606 int i;
607 int single_read = 0;
608 int err;
609 int err_cnt;
610 int last_err;
611 diskaddr_t line;
612 diskaddr_t segsincolumn;
613 size_t bsize;
614 uint_t line_count;
615
616 /*
617 * hs_state is the state of the hotspare on the column being resynced
618 * dev_state is the state of the resync target
619 */
620 hs_cmds_t hs_state;
621 int err_col = -1;
622 diskaddr_t resync_end_pos;
623
624 ui = MDI_UNIT(mnum);
625 ASSERT(ui != NULL);
626
627 un = cs->cs_un;
628
629 md_unit_readerexit(ui);
630 un = (mr_unit_t *)md_io_writerlock(ui);
631 un = (mr_unit_t *)md_unit_writerlock(ui);
632 resync = un->un_resync_index;
633 state = un->un_column[resync].un_devstate;
634 line_count = un->un_maxio / un->un_segsize;
635 if (line_count == 0) { /* handle the case of segsize > maxio */
636 line_count = 1;
637 bsize = un->un_maxio;
638 } else
639 bsize = line_count * un->un_segsize;
640
641 un->un_resync_copysize = (uint_t)bsize;
642
643 ASSERT(un->c.un_status & MD_UN_RESYNC_ACTIVE);
644 ASSERT(un->un_column[resync].un_devflags &
645 (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC));
646
647 /*
648 * if the column is not in resync then just bail out.
649 */
650 if (! (un->un_column[resync].un_devstate & RCS_RESYNC)) {
651 md_unit_writerexit(ui);
652 md_io_writerexit(ui);
653 un = (mr_unit_t *)md_unit_readerlock(ui);
654 return;
655 }
656 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, SVM_TAG_METADEVICE,
657 MD_UN2SET(un), MD_SID(un));
658
659 /* identify device to write and its start block */
660
661 if (un->un_column[resync].un_alt_dev != NODEV64) {
662 if (raid_open_alt(un, resync)) {
663 raid_set_state(un, resync, state, 0);
664 md_unit_writerexit(ui);
665 md_io_writerexit(ui);
666 un = (mr_unit_t *)md_unit_readerlock(ui);
667 cmn_err(CE_WARN, "md: %s: %s open failed replace "
668 "terminated", md_shortname(MD_SID(un)),
669 md_devname(MD_UN2SET(un),
670 un->un_column[resync].un_alt_dev,
671 NULL, 0));
672 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED,
673 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
674 return;
675 }
676 ASSERT(un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC);
677 dev_to_write = un->un_column[resync].un_alt_dev;
678 write_devstart = un->un_column[resync].un_alt_devstart;
679 write_pwstart = un->un_column[resync].un_alt_pwstart;
680 if (un->un_column[resync].un_devflags & MD_RAID_DEV_ERRED) {
681 single_read = 0;
682 hs_state = HS_BAD;
683 } else {
684 hs_state = HS_FREE;
685 single_read = 1;
686 }
687 un->un_column[resync].un_devflags |= MD_RAID_WRITE_ALT;
688 } else {
689 dev_to_write = un->un_column[resync].un_dev;
690 write_devstart = un->un_column[resync].un_devstart;
691 write_pwstart = un->un_column[resync].un_pwstart;
692 single_read = 0;
693 hs_state = HS_FREE;
694 ASSERT(un->un_column[resync].un_devflags &
695 MD_RAID_REGEN_RESYNC);
696 }
697
698 alloc_bufs(cs, dbtob(bsize));
699 /* initialize pre-write area */
700 if (init_pw_area(un, dev_to_write, write_pwstart, resync)) {
701 un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT;
702 if (un->un_column[resync].un_alt_dev != NODEV64) {
703 raid_close_alt(un, resync);
704 }
705 md_unit_writerexit(ui);
706 md_io_writerexit(ui);
707 if (dev_to_write == un->un_column[resync].un_dev)
708 hs_state = HS_BAD;
709 err = RAID_RESYNC_WRERROR;
710 goto resync_comp_error;
711 }
712
713 un->c.un_status &= ~MD_UN_RESYNC_CANCEL;
714 segsincolumn = un->un_segsincolumn;
715 err_cnt = raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED);
716
717 /* commit the record */
718
719 md_unit_writerexit(ui);
720 md_io_writerexit(ui);
721
722
723 /* resync each line of the unit */
724 for (line = 0; line < segsincolumn; line += line_count) {
725 /*
726 * Update address range in child struct and lock the line.
727 *
728 * The reader version of the line lock is used since only
729 * resync will use data beyond un_resync_line_index on the
730 * resync device.
731 */
732 un = (mr_unit_t *)md_io_readerlock(ui);
733 if (line + line_count > segsincolumn)
734 line_count = segsincolumn - line;
735 resync_end_pos = raid_resync_fillin_cs(line, line_count, cs);
736 (void) md_unit_readerlock(ui);
737 ASSERT(un->un_resync_line_index == resync_end_pos);
738 err = raid_resync_region(cs, line, (int)line_count,
739 &single_read, &hs_state, &err_col, dev_to_write,
740 write_devstart);
741
742 /*
743 * if the column failed to resync then stop writing directly
744 * to the column.
745 */
746 if (err)
747 un->un_resync_line_index = 0;
748
749 md_unit_readerexit(ui);
750 raid_line_exit(cs);
751 md_io_readerexit(ui);
752
753 if (err)
754 break;
755
756 un = (mr_unit_t *)md_unit_writerlock(ui);
757
758 if (raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED) != err_cnt) {
759 err = RAID_RESYNC_STATE;
760 md_unit_writerexit(ui);
761 break;
762 }
763 md_unit_writerexit(ui);
764 } /* for */
765
766 resync_comp_error:
767 un = (mr_unit_t *)md_io_writerlock(ui);
768 (void) md_unit_writerlock(ui);
769 un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT;
770
771 recids[0] = 0;
772 recids[1] = 0;
773 switch (err) {
774 /*
775 * successful resync
776 */
777 case RAID_RESYNC_OKAY:
778 /* initialize pre-write area */
779 if ((un->un_column[resync].un_orig_dev != NODEV64) &&
780 (un->un_column[resync].un_orig_dev ==
781 un->un_column[resync].un_alt_dev)) {
782 /*
783 * replacing a hot spare
784 * release the hot spare, which will close the hotspare
785 * and mark it closed.
786 */
787 raid_hs_release(hs_state, un, &recids[0], resync);
788 /*
789 * make the resync target the main device and
790 * mark open
791 */
792 un->un_column[resync].un_hs_id = 0;
793 un->un_column[resync].un_dev =
794 un->un_column[resync].un_orig_dev;
795 un->un_column[resync].un_devstart =
796 un->un_column[resync].un_orig_devstart;
797 un->un_column[resync].un_pwstart =
798 un->un_column[resync].un_orig_pwstart;
799 un->un_column[resync].un_devflags |= MD_RAID_DEV_ISOPEN;
800 /* alt becomes the device so don't close it */
801 un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT;
802 un->un_column[resync].un_devflags &=
803 ~MD_RAID_ALT_ISOPEN;
804 un->un_column[resync].un_alt_dev = NODEV64;
805 }
806 raid_set_state(un, resync, RCS_OKAY, 0);
807 break;
808
809 case RAID_RESYNC_WRERROR:
810 if (HOTSPARED(un, resync) && single_read &&
811 (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) {
812 /*
813 * this is the case where the resync target is
814 * bad but there is a good hotspare. In this
815 * case keep the hotspare, and go back to okay.
816 */
817 raid_set_state(un, resync, RCS_OKAY, 0);
818 cmn_err(CE_WARN, "md: %s: %s write error, replace "
819 "terminated", md_shortname(MD_SID(un)),
820 md_devname(MD_UN2SET(un),
821 un->un_column[resync].un_orig_dev,
822 NULL, 0));
823 break;
824 }
825 if (HOTSPARED(un, resync)) {
826 raid_hs_release(hs_state, un, &recids[0], resync);
827 un->un_column[resync].un_dev =
828 un->un_column[resync].un_orig_dev;
829 un->un_column[resync].un_devstart =
830 un->un_column[resync].un_orig_devstart;
831 un->un_column[resync].un_pwstart =
832 un->un_column[resync].un_orig_pwstart;
833 }
834 raid_set_state(un, resync, RCS_ERRED, 0);
835 if (un->un_column[resync].un_devflags & MD_RAID_REGEN_RESYNC)
836 dev = un->un_column[resync].un_dev;
837 else
838 dev = un->un_column[resync].un_alt_dev;
839 cmn_err(CE_WARN, "md: %s: %s write error replace terminated",
840 md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), dev,
841 NULL, 0));
842 break;
843
844 case RAID_RESYNC_STATE:
845 if (HOTSPARED(un, resync) && single_read &&
846 (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) {
847 /*
848 * this is the case where the resync target is
849 * bad but there is a good hotspare. In this
850 * case keep the hotspare, and go back to okay.
851 */
852 raid_set_state(un, resync, RCS_OKAY, 0);
853 cmn_err(CE_WARN, "md: %s: needs maintenance, replace "
854 "terminated", md_shortname(MD_SID(un)));
855 break;
856 }
857 if (HOTSPARED(un, resync)) {
858 raid_hs_release(hs_state, un, &recids[0], resync);
859 un->un_column[resync].un_dev =
860 un->un_column[resync].un_orig_dev;
861 un->un_column[resync].un_devstart =
862 un->un_column[resync].un_orig_devstart;
863 un->un_column[resync].un_pwstart =
864 un->un_column[resync].un_orig_pwstart;
865 }
866 break;
867 case RAID_RESYNC_RDERROR:
868 if (HOTSPARED(un, resync)) {
869 raid_hs_release(hs_state, un, &recids[0], resync);
870 un->un_column[resync].un_dev =
871 un->un_column[resync].un_orig_dev;
872 un->un_column[resync].un_devstart =
873 un->un_column[resync].un_orig_devstart;
874 un->un_column[resync].un_pwstart =
875 un->un_column[resync].un_orig_pwstart;
876 }
877
878 if ((resync != err_col) && (err_col != NOCOLUMN))
879 raid_set_state(un, err_col, RCS_ERRED, 0);
880 break;
881
882 default:
883 ASSERT(0);
884 }
885 if (un->un_column[resync].un_alt_dev != NODEV64) {
886 raid_close_alt(un, resync);
887 }
888
889 /*
890 * an io operation may have gotten an error and placed a
891 * column in erred state. This will abort the resync, which
892 * will end up in last erred. This is ugly so go through
893 * the columns and do cleanup
894 */
895 err_cnt = 0;
896 last_err = 0;
897 for (i = 0; i < un->un_totalcolumncnt; i++) {
898 if (un->un_column[i].un_devstate & RCS_OKAY)
899 continue;
900 if (i == resync) {
901 raid_set_state(un, i, RCS_ERRED, 1);
902 err_cnt++;
903 } else if (err == RAID_RESYNC_OKAY) {
904 err_cnt++;
905 } else {
906 raid_set_state(un, i, RCS_LAST_ERRED, 1);
907 last_err++;
908 }
909 }
910 if ((err_cnt == 0) && (last_err == 0))
911 un->un_state = RUS_OKAY;
912 else if (last_err == 0) {
913 un->un_state = RUS_ERRED;
914 ASSERT(err_cnt == 1);
915 } else if (last_err > 0) {
916 un->un_state = RUS_LAST_ERRED;
917 }
918
919 uniqtime32(&un->un_column[resync].un_devtimestamp);
920 un->un_resync_copysize = 0;
921 un->un_column[resync].un_devflags &=
922 ~(MD_RAID_REGEN_RESYNC | MD_RAID_COPY_RESYNC);
923 raid_commit(un, recids);
924 /* release unit writer lock and acquire unit reader lock */
925 md_unit_writerexit(ui);
926 md_io_writerexit(ui);
927 (void) md_unit_readerlock(ui);
928 if (err == RAID_RESYNC_OKAY) {
929 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE,
930 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
931 } else {
932 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED,
933 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
934 if (raid_state_cnt(un, RCS_ERRED |
935 RCS_LAST_ERRED) > 1) {
936 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED,
937 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
938 } else {
939 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED,
940 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
941 }
942 }
943
944 free_bufs(dbtob(bsize), cs);
945 }
946
947 /*
948 * NAME: resync_unit
949 *
950 * DESCRIPTION: Start of RAID resync thread. Perform up front allocations,
951 * initializations and consistency checking, then call
952 * resync_comp to resync the component.
953 *
954 * PARAMETERS: minor_t mnum - minor number identity of metadevice
955 *
956 * LOCKS: Acquires and releases Unit Reader Lock to maintain unit
957 * existence during resync.
958 * Acquires and releases the resync count lock for cpr.
959 */
960 static void
resync_unit(minor_t mnum)961 resync_unit(
962 minor_t mnum
963 )
964 {
965 mdi_unit_t *ui;
966 mr_unit_t *un;
967 md_raidps_t *ps = NULL;
968 md_raidcs_t *cs = NULL;
969 int resync;
970
971 /*
972 * Increment the raid resync count for cpr
973 */
974 mutex_enter(&md_cpr_resync.md_resync_mutex);
975 md_cpr_resync.md_raid_resync++;
976 mutex_exit(&md_cpr_resync.md_resync_mutex);
977
978 ui = MDI_UNIT(mnum);
979 ASSERT(ui != NULL);
980
981 un = (mr_unit_t *)md_unit_readerlock(ui);
982
983 /*
984 * Allocate parent and child memory pool structures. These are
985 * only needed to lock raid lines, so only the minimal
986 * required fields for this purpose are initialized.
987 *
988 * Do not use the reserve pool for resync.
989 */
990 ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS);
991 raid_parent_init(ps);
992 cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS);
993 raid_child_init(cs);
994 resync = un->un_resync_index;
995 ps->ps_un = un;
996 ps->ps_ui = ui;
997 ps->ps_flags = MD_RPS_INUSE;
998 cs->cs_ps = ps;
999 cs->cs_un = un;
1000
1001 ASSERT(!(un->un_column[resync].un_devflags & MD_RAID_WRITE_ALT));
1002
1003 resync_comp(mnum, cs);
1004 release_resync_request(mnum);
1005
1006 kmem_cache_free(raid_child_cache, cs);
1007 kmem_cache_free(raid_parent_cache, ps);
1008
1009 md_unit_readerexit(ui);
1010
1011 /* close raid unit */
1012 (void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
1013
1014 /* poke hot spare daemon */
1015 (void) raid_hotspares();
1016
1017 /*
1018 * Decrement the raid resync count for cpr
1019 */
1020 mutex_enter(&md_cpr_resync.md_resync_mutex);
1021 md_cpr_resync.md_raid_resync--;
1022 mutex_exit(&md_cpr_resync.md_resync_mutex);
1023
1024 thread_exit();
1025 }
1026
1027 /*
1028 * NAME: raid_resync_unit
1029 *
1030 * DESCRIPTION: RAID metadevice specific resync routine.
1031 * Open the unit and start resync_unit as a separate thread.
1032 *
1033 * PARAMETERS: minor_t mnum - minor number identity of metadevice
1034 * md_error_t *ep - output error parameter
1035 *
1036 * RETURN: On error return 1 or set ep to nonzero, otherwise return 0.
1037 *
1038 * LOCKS: Acquires and releases Unit Writer Lock.
1039 */
1040 int
raid_resync_unit(minor_t mnum,md_error_t * ep)1041 raid_resync_unit(
1042 minor_t mnum,
1043 md_error_t *ep
1044 )
1045 {
1046 mdi_unit_t *ui;
1047 set_t setno = MD_MIN2SET(mnum);
1048 mr_unit_t *un;
1049
1050 ui = MDI_UNIT(mnum);
1051 un = MD_UNIT(mnum);
1052
1053 if (md_get_setstatus(setno) & MD_SET_STALE)
1054 return (mdmddberror(ep, MDE_DB_STALE, mnum, setno));
1055
1056 ASSERT(un->un_column[un->un_resync_index].un_devflags &
1057 (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC));
1058
1059 /* Don't start a resync if the device is not available */
1060 if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) {
1061 return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
1062 }
1063
1064 if (raid_internal_open(mnum, FREAD | FWRITE, OTYP_LYR, 0)) {
1065 (void) md_unit_writerlock(ui);
1066 release_resync_request(mnum);
1067 md_unit_writerexit(ui);
1068 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE,
1069 setno, MD_SID(un));
1070 return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
1071 }
1072
1073 /* start resync_unit thread */
1074 (void) thread_create(NULL, 0, resync_unit, (void *)(uintptr_t)mnum,
1075 0, &p0, TS_RUN, minclsyspri);
1076
1077 return (0);
1078 }
1079