xref: /freebsd/sys/geom/raid/md_intel.c (revision 6ef6ba9950260f42b47499d17874d00ca9290955)
1 /*-
2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
3  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #include <sys/bio.h>
33 #include <sys/endian.h>
34 #include <sys/kernel.h>
35 #include <sys/kobj.h>
36 #include <sys/limits.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/systm.h>
41 #include <sys/taskqueue.h>
42 #include <geom/geom.h>
43 #include "geom/raid/g_raid.h"
44 #include "g_raid_md_if.h"
45 
46 static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
47 
48 struct intel_raid_map {
49 	uint32_t	offset;
50 	uint32_t	disk_sectors;
51 	uint32_t	stripe_count;
52 	uint16_t	strip_sectors;
53 	uint8_t		status;
54 #define INTEL_S_READY           0x00
55 #define INTEL_S_UNINITIALIZED   0x01
56 #define INTEL_S_DEGRADED        0x02
57 #define INTEL_S_FAILURE         0x03
58 
59 	uint8_t		type;
60 #define INTEL_T_RAID0           0x00
61 #define INTEL_T_RAID1           0x01
62 #define INTEL_T_RAID5           0x05
63 
64 	uint8_t		total_disks;
65 	uint8_t		total_domains;
66 	uint8_t		failed_disk_num;
67 	uint8_t		ddf;
68 	uint32_t	offset_hi;
69 	uint32_t	disk_sectors_hi;
70 	uint32_t	stripe_count_hi;
71 	uint32_t	filler_2[4];
72 	uint32_t	disk_idx[1];	/* total_disks entries. */
73 #define INTEL_DI_IDX	0x00ffffff
74 #define INTEL_DI_RBLD	0x01000000
75 } __packed;
76 
77 struct intel_raid_vol {
78 	uint8_t		name[16];
79 	u_int64_t	total_sectors __packed;
80 	uint32_t	state;
81 #define INTEL_ST_BOOTABLE		0x00000001
82 #define INTEL_ST_BOOT_DEVICE		0x00000002
83 #define INTEL_ST_READ_COALESCING	0x00000004
84 #define INTEL_ST_WRITE_COALESCING	0x00000008
85 #define INTEL_ST_LAST_SHUTDOWN_DIRTY	0x00000010
86 #define INTEL_ST_HIDDEN_AT_BOOT		0x00000020
87 #define INTEL_ST_CURRENTLY_HIDDEN	0x00000040
88 #define INTEL_ST_VERIFY_AND_FIX		0x00000080
89 #define INTEL_ST_MAP_STATE_UNINIT	0x00000100
90 #define INTEL_ST_NO_AUTO_RECOVERY	0x00000200
91 #define INTEL_ST_CLONE_N_GO		0x00000400
92 #define INTEL_ST_CLONE_MAN_SYNC		0x00000800
93 #define INTEL_ST_CNG_MASTER_DISK_NUM	0x00001000
94 	uint32_t	reserved;
95 	uint8_t		migr_priority;
96 	uint8_t		num_sub_vols;
97 	uint8_t		tid;
98 	uint8_t		cng_master_disk;
99 	uint16_t	cache_policy;
100 	uint8_t		cng_state;
101 #define INTEL_CNGST_UPDATED		0
102 #define INTEL_CNGST_NEEDS_UPDATE	1
103 #define INTEL_CNGST_MASTER_MISSING	2
104 	uint8_t		cng_sub_state;
105 	uint32_t	filler_0[10];
106 
107 	uint32_t	curr_migr_unit;
108 	uint32_t	checkpoint_id;
109 	uint8_t		migr_state;
110 	uint8_t		migr_type;
111 #define INTEL_MT_INIT		0
112 #define INTEL_MT_REBUILD	1
113 #define INTEL_MT_VERIFY		2
114 #define INTEL_MT_GEN_MIGR	3
115 #define INTEL_MT_STATE_CHANGE	4
116 #define INTEL_MT_REPAIR		5
117 	uint8_t		dirty;
118 	uint8_t		fs_state;
119 	uint16_t	verify_errors;
120 	uint16_t	bad_blocks;
121 	uint32_t	curr_migr_unit_hi;
122 	uint32_t	filler_1[3];
123 	struct intel_raid_map map[1];	/* 2 entries if migr_state != 0. */
124 } __packed;
125 
126 struct intel_raid_disk {
127 #define INTEL_SERIAL_LEN	16
128 	uint8_t		serial[INTEL_SERIAL_LEN];
129 	uint32_t	sectors;
130 	uint32_t	id;
131 	uint32_t	flags;
132 #define INTEL_F_SPARE		0x01
133 #define INTEL_F_ASSIGNED	0x02
134 #define INTEL_F_FAILED		0x04
135 #define INTEL_F_ONLINE		0x08
136 #define INTEL_F_DISABLED	0x80
137 	uint32_t	owner_cfg_num;
138 	uint32_t	sectors_hi;
139 	uint32_t	filler[3];
140 } __packed;
141 
142 struct intel_raid_conf {
143 	uint8_t		intel_id[24];
144 #define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
145 
146 	uint8_t		version[6];
147 #define INTEL_VERSION_1000	"1.0.00"	/* RAID0 */
148 #define INTEL_VERSION_1100	"1.1.00"	/* RAID1 */
149 #define INTEL_VERSION_1200	"1.2.00"	/* Many volumes */
150 #define INTEL_VERSION_1201	"1.2.01"	/* 3 or 4 disks */
151 #define INTEL_VERSION_1202	"1.2.02"	/* RAID5 */
152 #define INTEL_VERSION_1204	"1.2.04"	/* 5 or 6 disks */
153 #define INTEL_VERSION_1206	"1.2.06"	/* CNG */
154 #define INTEL_VERSION_1300	"1.3.00"	/* Attributes */
155 
156 	uint8_t		dummy_0[2];
157 	uint32_t	checksum;
158 	uint32_t	config_size;
159 	uint32_t	config_id;
160 	uint32_t	generation;
161 	uint32_t	error_log_size;
162 	uint32_t	attributes;
163 #define INTEL_ATTR_RAID0	0x00000001
164 #define INTEL_ATTR_RAID1	0x00000002
165 #define INTEL_ATTR_RAID10	0x00000004
166 #define INTEL_ATTR_RAID1E	0x00000008
167 #define INTEL_ATTR_RAID5	0x00000010
168 #define INTEL_ATTR_RAIDCNG	0x00000020
169 #define INTEL_ATTR_EXT_STRIP	0x00000040
170 #define INTEL_ATTR_NVM_CACHE	0x02000000
171 #define INTEL_ATTR_2TB_DISK	0x04000000
172 #define INTEL_ATTR_BBM		0x08000000
173 #define INTEL_ATTR_NVM_CACHE2	0x10000000
174 #define INTEL_ATTR_2TB		0x20000000
175 #define INTEL_ATTR_PM		0x40000000
176 #define INTEL_ATTR_CHECKSUM	0x80000000
177 
178 	uint8_t		total_disks;
179 	uint8_t		total_volumes;
180 	uint8_t		error_log_pos;
181 	uint8_t		dummy_2[1];
182 	uint32_t	cache_size;
183 	uint32_t	orig_config_id;
184 	uint32_t	pwr_cycle_count;
185 	uint32_t	bbm_log_size;
186 	uint32_t	filler_0[35];
187 	struct intel_raid_disk	disk[1];	/* total_disks entries. */
188 	/* Here goes total_volumes of struct intel_raid_vol. */
189 } __packed;
190 
191 #define INTEL_ATTR_SUPPORTED	( INTEL_ATTR_RAID0 | INTEL_ATTR_RAID1 |	\
192     INTEL_ATTR_RAID10 | INTEL_ATTR_RAID1E | INTEL_ATTR_RAID5 |		\
193     INTEL_ATTR_RAIDCNG | INTEL_ATTR_EXT_STRIP | INTEL_ATTR_2TB_DISK |	\
194     INTEL_ATTR_2TB | INTEL_ATTR_PM | INTEL_ATTR_CHECKSUM )
195 
196 #define INTEL_MAX_MD_SIZE(ndisks)				\
197     (sizeof(struct intel_raid_conf) +				\
198      sizeof(struct intel_raid_disk) * (ndisks - 1) +		\
199      sizeof(struct intel_raid_vol) * 2 +			\
200      sizeof(struct intel_raid_map) * 2 +			\
201      sizeof(uint32_t) * (ndisks - 1) * 4)
202 
203 struct g_raid_md_intel_perdisk {
204 	struct intel_raid_conf	*pd_meta;
205 	int			 pd_disk_pos;
206 	struct intel_raid_disk	 pd_disk_meta;
207 };
208 
209 struct g_raid_md_intel_pervolume {
210 	int			 pv_volume_pos;
211 	int			 pv_cng;
212 	int			 pv_cng_man_sync;
213 	int			 pv_cng_master_disk;
214 };
215 
216 struct g_raid_md_intel_object {
217 	struct g_raid_md_object	 mdio_base;
218 	uint32_t		 mdio_config_id;
219 	uint32_t		 mdio_orig_config_id;
220 	uint32_t		 mdio_generation;
221 	struct intel_raid_conf	*mdio_meta;
222 	struct callout		 mdio_start_co;	/* STARTING state timer. */
223 	int			 mdio_disks_present;
224 	int			 mdio_started;
225 	int			 mdio_incomplete;
226 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
227 };
228 
229 static g_raid_md_create_t g_raid_md_create_intel;
230 static g_raid_md_taste_t g_raid_md_taste_intel;
231 static g_raid_md_event_t g_raid_md_event_intel;
232 static g_raid_md_ctl_t g_raid_md_ctl_intel;
233 static g_raid_md_write_t g_raid_md_write_intel;
234 static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
235 static g_raid_md_free_disk_t g_raid_md_free_disk_intel;
236 static g_raid_md_free_volume_t g_raid_md_free_volume_intel;
237 static g_raid_md_free_t g_raid_md_free_intel;
238 
239 static kobj_method_t g_raid_md_intel_methods[] = {
240 	KOBJMETHOD(g_raid_md_create,	g_raid_md_create_intel),
241 	KOBJMETHOD(g_raid_md_taste,	g_raid_md_taste_intel),
242 	KOBJMETHOD(g_raid_md_event,	g_raid_md_event_intel),
243 	KOBJMETHOD(g_raid_md_ctl,	g_raid_md_ctl_intel),
244 	KOBJMETHOD(g_raid_md_write,	g_raid_md_write_intel),
245 	KOBJMETHOD(g_raid_md_fail_disk,	g_raid_md_fail_disk_intel),
246 	KOBJMETHOD(g_raid_md_free_disk,	g_raid_md_free_disk_intel),
247 	KOBJMETHOD(g_raid_md_free_volume,	g_raid_md_free_volume_intel),
248 	KOBJMETHOD(g_raid_md_free,	g_raid_md_free_intel),
249 	{ 0, 0 }
250 };
251 
252 static struct g_raid_md_class g_raid_md_intel_class = {
253 	"Intel",
254 	g_raid_md_intel_methods,
255 	sizeof(struct g_raid_md_intel_object),
256 	.mdc_enable = 1,
257 	.mdc_priority = 100
258 };
259 
260 
261 static struct intel_raid_map *
262 intel_get_map(struct intel_raid_vol *mvol, int i)
263 {
264 	struct intel_raid_map *mmap;
265 
266 	if (i > (mvol->migr_state ? 1 : 0))
267 		return (NULL);
268 	mmap = &mvol->map[0];
269 	for (; i > 0; i--) {
270 		mmap = (struct intel_raid_map *)
271 		    &mmap->disk_idx[mmap->total_disks];
272 	}
273 	return ((struct intel_raid_map *)mmap);
274 }
275 
276 static struct intel_raid_vol *
277 intel_get_volume(struct intel_raid_conf *meta, int i)
278 {
279 	struct intel_raid_vol *mvol;
280 	struct intel_raid_map *mmap;
281 
282 	if (i > 1)
283 		return (NULL);
284 	mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
285 	for (; i > 0; i--) {
286 		mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
287 		mvol = (struct intel_raid_vol *)
288 		    &mmap->disk_idx[mmap->total_disks];
289 	}
290 	return (mvol);
291 }
292 
293 static off_t
294 intel_get_map_offset(struct intel_raid_map *mmap)
295 {
296 	off_t offset = (off_t)mmap->offset_hi << 32;
297 
298 	offset += mmap->offset;
299 	return (offset);
300 }
301 
302 static void
303 intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
304 {
305 
306 	mmap->offset = offset & 0xffffffff;
307 	mmap->offset_hi = offset >> 32;
308 }
309 
310 static off_t
311 intel_get_map_disk_sectors(struct intel_raid_map *mmap)
312 {
313 	off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
314 
315 	disk_sectors += mmap->disk_sectors;
316 	return (disk_sectors);
317 }
318 
319 static void
320 intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
321 {
322 
323 	mmap->disk_sectors = disk_sectors & 0xffffffff;
324 	mmap->disk_sectors_hi = disk_sectors >> 32;
325 }
326 
327 static void
328 intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
329 {
330 
331 	mmap->stripe_count = stripe_count & 0xffffffff;
332 	mmap->stripe_count_hi = stripe_count >> 32;
333 }
334 
335 static off_t
336 intel_get_disk_sectors(struct intel_raid_disk *disk)
337 {
338 	off_t sectors = (off_t)disk->sectors_hi << 32;
339 
340 	sectors += disk->sectors;
341 	return (sectors);
342 }
343 
344 static void
345 intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
346 {
347 
348 	disk->sectors = sectors & 0xffffffff;
349 	disk->sectors_hi = sectors >> 32;
350 }
351 
352 static off_t
353 intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
354 {
355 	off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
356 
357 	curr_migr_unit += vol->curr_migr_unit;
358 	return (curr_migr_unit);
359 }
360 
361 static void
362 intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
363 {
364 
365 	vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
366 	vol->curr_migr_unit_hi = curr_migr_unit >> 32;
367 }
368 
369 static void
370 g_raid_md_intel_print(struct intel_raid_conf *meta)
371 {
372 	struct intel_raid_vol *mvol;
373 	struct intel_raid_map *mmap;
374 	int i, j, k;
375 
376 	if (g_raid_debug < 1)
377 		return;
378 
379 	printf("********* ATA Intel MatrixRAID Metadata *********\n");
380 	printf("intel_id            <%.24s>\n", meta->intel_id);
381 	printf("version             <%.6s>\n", meta->version);
382 	printf("checksum            0x%08x\n", meta->checksum);
383 	printf("config_size         0x%08x\n", meta->config_size);
384 	printf("config_id           0x%08x\n", meta->config_id);
385 	printf("generation          0x%08x\n", meta->generation);
386 	printf("error_log_size      %d\n", meta->error_log_size);
387 	printf("attributes          0x%08x\n", meta->attributes);
388 	printf("total_disks         %u\n", meta->total_disks);
389 	printf("total_volumes       %u\n", meta->total_volumes);
390 	printf("error_log_pos       %u\n", meta->error_log_pos);
391 	printf("cache_size          %u\n", meta->cache_size);
392 	printf("orig_config_id      0x%08x\n", meta->orig_config_id);
393 	printf("pwr_cycle_count     %u\n", meta->pwr_cycle_count);
394 	printf("bbm_log_size        %u\n", meta->bbm_log_size);
395 	printf("DISK#   serial disk_sectors disk_sectors_hi disk_id flags owner\n");
396 	for (i = 0; i < meta->total_disks; i++ ) {
397 		printf("    %d   <%.16s> %u %u 0x%08x 0x%08x %08x\n", i,
398 		    meta->disk[i].serial, meta->disk[i].sectors,
399 		    meta->disk[i].sectors_hi, meta->disk[i].id,
400 		    meta->disk[i].flags, meta->disk[i].owner_cfg_num);
401 	}
402 	for (i = 0; i < meta->total_volumes; i++) {
403 		mvol = intel_get_volume(meta, i);
404 		printf(" ****** Volume %d ******\n", i);
405 		printf(" name               %.16s\n", mvol->name);
406 		printf(" total_sectors      %ju\n", mvol->total_sectors);
407 		printf(" state              0x%08x\n", mvol->state);
408 		printf(" reserved           %u\n", mvol->reserved);
409 		printf(" migr_priority      %u\n", mvol->migr_priority);
410 		printf(" num_sub_vols       %u\n", mvol->num_sub_vols);
411 		printf(" tid                %u\n", mvol->tid);
412 		printf(" cng_master_disk    %u\n", mvol->cng_master_disk);
413 		printf(" cache_policy       %u\n", mvol->cache_policy);
414 		printf(" cng_state          %u\n", mvol->cng_state);
415 		printf(" cng_sub_state      %u\n", mvol->cng_sub_state);
416 		printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
417 		printf(" curr_migr_unit_hi  %u\n", mvol->curr_migr_unit_hi);
418 		printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
419 		printf(" migr_state         %u\n", mvol->migr_state);
420 		printf(" migr_type          %u\n", mvol->migr_type);
421 		printf(" dirty              %u\n", mvol->dirty);
422 		printf(" fs_state           %u\n", mvol->fs_state);
423 		printf(" verify_errors      %u\n", mvol->verify_errors);
424 		printf(" bad_blocks         %u\n", mvol->bad_blocks);
425 
426 		for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
427 			printf("  *** Map %d ***\n", j);
428 			mmap = intel_get_map(mvol, j);
429 			printf("  offset            %u\n", mmap->offset);
430 			printf("  offset_hi         %u\n", mmap->offset_hi);
431 			printf("  disk_sectors      %u\n", mmap->disk_sectors);
432 			printf("  disk_sectors_hi   %u\n", mmap->disk_sectors_hi);
433 			printf("  stripe_count      %u\n", mmap->stripe_count);
434 			printf("  stripe_count_hi   %u\n", mmap->stripe_count_hi);
435 			printf("  strip_sectors     %u\n", mmap->strip_sectors);
436 			printf("  status            %u\n", mmap->status);
437 			printf("  type              %u\n", mmap->type);
438 			printf("  total_disks       %u\n", mmap->total_disks);
439 			printf("  total_domains     %u\n", mmap->total_domains);
440 			printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
441 			printf("  ddf               %u\n", mmap->ddf);
442 			printf("  disk_idx         ");
443 			for (k = 0; k < mmap->total_disks; k++)
444 				printf(" 0x%08x", mmap->disk_idx[k]);
445 			printf("\n");
446 		}
447 	}
448 	printf("=================================================\n");
449 }
450 
451 static struct intel_raid_conf *
452 intel_meta_copy(struct intel_raid_conf *meta)
453 {
454 	struct intel_raid_conf *nmeta;
455 
456 	nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
457 	memcpy(nmeta, meta, meta->config_size);
458 	return (nmeta);
459 }
460 
461 static int
462 intel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
463 {
464 	int pos;
465 
466 	for (pos = 0; pos < meta->total_disks; pos++) {
467 		if (strncmp(meta->disk[pos].serial,
468 		    serial, INTEL_SERIAL_LEN) == 0)
469 			return (pos);
470 	}
471 	return (-1);
472 }
473 
474 static struct intel_raid_conf *
475 intel_meta_read(struct g_consumer *cp)
476 {
477 	struct g_provider *pp;
478 	struct intel_raid_conf *meta;
479 	struct intel_raid_vol *mvol;
480 	struct intel_raid_map *mmap, *mmap1;
481 	char *buf;
482 	int error, i, j, k, left, size;
483 	uint32_t checksum, *ptr;
484 
485 	pp = cp->provider;
486 
487 	/* Read the anchor sector. */
488 	buf = g_read_data(cp,
489 	    pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
490 	if (buf == NULL) {
491 		G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
492 		    pp->name, error);
493 		return (NULL);
494 	}
495 	meta = (struct intel_raid_conf *)buf;
496 
497 	/* Check if this is an Intel RAID struct */
498 	if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
499 		G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
500 		g_free(buf);
501 		return (NULL);
502 	}
503 	if (meta->config_size > 65536 ||
504 	    meta->config_size < sizeof(struct intel_raid_conf)) {
505 		G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
506 		    meta->config_size);
507 		g_free(buf);
508 		return (NULL);
509 	}
510 	size = meta->config_size;
511 	meta = malloc(size, M_MD_INTEL, M_WAITOK);
512 	memcpy(meta, buf, min(size, pp->sectorsize));
513 	g_free(buf);
514 
515 	/* Read all the rest, if needed. */
516 	if (meta->config_size > pp->sectorsize) {
517 		left = (meta->config_size - 1) / pp->sectorsize;
518 		buf = g_read_data(cp,
519 		    pp->mediasize - pp->sectorsize * (2 + left),
520 		    pp->sectorsize * left, &error);
521 		if (buf == NULL) {
522 			G_RAID_DEBUG(1, "Cannot read remaining metadata"
523 			    " part from %s (error=%d).",
524 			    pp->name, error);
525 			free(meta, M_MD_INTEL);
526 			return (NULL);
527 		}
528 		memcpy(((char *)meta) + pp->sectorsize, buf,
529 		    pp->sectorsize * left);
530 		g_free(buf);
531 	}
532 
533 	/* Check metadata checksum. */
534 	for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
535 	    i < (meta->config_size / sizeof(uint32_t)); i++) {
536 		checksum += *ptr++;
537 	}
538 	checksum -= meta->checksum;
539 	if (checksum != meta->checksum) {
540 		G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
541 		free(meta, M_MD_INTEL);
542 		return (NULL);
543 	}
544 
545 	/* Validate metadata size. */
546 	size = sizeof(struct intel_raid_conf) +
547 	    sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
548 	    sizeof(struct intel_raid_vol) * meta->total_volumes;
549 	if (size > meta->config_size) {
550 badsize:
551 		G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
552 		    meta->config_size, size);
553 		free(meta, M_MD_INTEL);
554 		return (NULL);
555 	}
556 	for (i = 0; i < meta->total_volumes; i++) {
557 		mvol = intel_get_volume(meta, i);
558 		mmap = intel_get_map(mvol, 0);
559 		size += 4 * (mmap->total_disks - 1);
560 		if (size > meta->config_size)
561 			goto badsize;
562 		if (mvol->migr_state) {
563 			size += sizeof(struct intel_raid_map);
564 			if (size > meta->config_size)
565 				goto badsize;
566 			mmap = intel_get_map(mvol, 1);
567 			size += 4 * (mmap->total_disks - 1);
568 			if (size > meta->config_size)
569 				goto badsize;
570 		}
571 	}
572 
573 	g_raid_md_intel_print(meta);
574 
575 	if (strncmp(meta->version, INTEL_VERSION_1300, 6) > 0) {
576 		G_RAID_DEBUG(1, "Intel unsupported version: '%.6s'",
577 		    meta->version);
578 		free(meta, M_MD_INTEL);
579 		return (NULL);
580 	}
581 
582 	if (strncmp(meta->version, INTEL_VERSION_1300, 6) >= 0 &&
583 	    (meta->attributes & ~INTEL_ATTR_SUPPORTED) != 0) {
584 		G_RAID_DEBUG(1, "Intel unsupported attributes: 0x%08x",
585 		    meta->attributes & ~INTEL_ATTR_SUPPORTED);
586 		free(meta, M_MD_INTEL);
587 		return (NULL);
588 	}
589 
590 	/* Validate disk indexes. */
591 	for (i = 0; i < meta->total_volumes; i++) {
592 		mvol = intel_get_volume(meta, i);
593 		for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
594 			mmap = intel_get_map(mvol, j);
595 			for (k = 0; k < mmap->total_disks; k++) {
596 				if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
597 				    meta->total_disks) {
598 					G_RAID_DEBUG(1, "Intel metadata disk"
599 					    " index %d too big (>%d)",
600 					    mmap->disk_idx[k] & INTEL_DI_IDX,
601 					    meta->total_disks);
602 					free(meta, M_MD_INTEL);
603 					return (NULL);
604 				}
605 			}
606 		}
607 	}
608 
609 	/* Validate migration types. */
610 	for (i = 0; i < meta->total_volumes; i++) {
611 		mvol = intel_get_volume(meta, i);
612 		/* Deny unknown migration types. */
613 		if (mvol->migr_state &&
614 		    mvol->migr_type != INTEL_MT_INIT &&
615 		    mvol->migr_type != INTEL_MT_REBUILD &&
616 		    mvol->migr_type != INTEL_MT_VERIFY &&
617 		    mvol->migr_type != INTEL_MT_GEN_MIGR &&
618 		    mvol->migr_type != INTEL_MT_REPAIR) {
619 			G_RAID_DEBUG(1, "Intel metadata has unsupported"
620 			    " migration type %d", mvol->migr_type);
621 			free(meta, M_MD_INTEL);
622 			return (NULL);
623 		}
624 		/* Deny general migrations except SINGLE->RAID1. */
625 		if (mvol->migr_state &&
626 		    mvol->migr_type == INTEL_MT_GEN_MIGR) {
627 			mmap = intel_get_map(mvol, 0);
628 			mmap1 = intel_get_map(mvol, 1);
629 			if (mmap1->total_disks != 1 ||
630 			    mmap->type != INTEL_T_RAID1 ||
631 			    mmap->total_disks != 2 ||
632 			    mmap->offset != mmap1->offset ||
633 			    mmap->disk_sectors != mmap1->disk_sectors ||
634 			    mmap->total_domains != mmap->total_disks ||
635 			    mmap->offset_hi != mmap1->offset_hi ||
636 			    mmap->disk_sectors_hi != mmap1->disk_sectors_hi ||
637 			    (mmap->disk_idx[0] != mmap1->disk_idx[0] &&
638 			     mmap->disk_idx[0] != mmap1->disk_idx[1])) {
639 				G_RAID_DEBUG(1, "Intel metadata has unsupported"
640 				    " variant of general migration");
641 				free(meta, M_MD_INTEL);
642 				return (NULL);
643 			}
644 		}
645 	}
646 
647 	return (meta);
648 }
649 
650 static int
651 intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
652 {
653 	struct g_provider *pp;
654 	char *buf;
655 	int error, i, sectors;
656 	uint32_t checksum, *ptr;
657 
658 	pp = cp->provider;
659 
660 	/* Recalculate checksum for case if metadata were changed. */
661 	meta->checksum = 0;
662 	for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
663 	    i < (meta->config_size / sizeof(uint32_t)); i++) {
664 		checksum += *ptr++;
665 	}
666 	meta->checksum = checksum;
667 
668 	/* Create and fill buffer. */
669 	sectors = (meta->config_size + pp->sectorsize - 1) / pp->sectorsize;
670 	buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
671 	if (sectors > 1) {
672 		memcpy(buf, ((char *)meta) + pp->sectorsize,
673 		    (sectors - 1) * pp->sectorsize);
674 	}
675 	memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
676 
677 	error = g_write_data(cp,
678 	    pp->mediasize - pp->sectorsize * (1 + sectors),
679 	    buf, pp->sectorsize * sectors);
680 	if (error != 0) {
681 		G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
682 		    pp->name, error);
683 	}
684 
685 	free(buf, M_MD_INTEL);
686 	return (error);
687 }
688 
689 static int
690 intel_meta_erase(struct g_consumer *cp)
691 {
692 	struct g_provider *pp;
693 	char *buf;
694 	int error;
695 
696 	pp = cp->provider;
697 	buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
698 	error = g_write_data(cp,
699 	    pp->mediasize - 2 * pp->sectorsize,
700 	    buf, pp->sectorsize);
701 	if (error != 0) {
702 		G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
703 		    pp->name, error);
704 	}
705 	free(buf, M_MD_INTEL);
706 	return (error);
707 }
708 
709 static int
710 intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
711 {
712 	struct intel_raid_conf *meta;
713 	int error;
714 
715 	/* Fill anchor and single disk. */
716 	meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
717 	memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
718 	memcpy(&meta->version[0], INTEL_VERSION_1000,
719 	    sizeof(INTEL_VERSION_1000) - 1);
720 	meta->config_size = INTEL_MAX_MD_SIZE(1);
721 	meta->config_id = meta->orig_config_id = arc4random();
722 	meta->generation = 1;
723 	meta->total_disks = 1;
724 	meta->disk[0] = *d;
725 	error = intel_meta_write(cp, meta);
726 	free(meta, M_MD_INTEL);
727 	return (error);
728 }
729 
730 static struct g_raid_disk *
731 g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
732 {
733 	struct g_raid_disk	*disk;
734 	struct g_raid_md_intel_perdisk *pd;
735 
736 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
737 		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
738 		if (pd->pd_disk_pos == id)
739 			break;
740 	}
741 	return (disk);
742 }
743 
744 static int
745 g_raid_md_intel_supported(int level, int qual, int disks, int force)
746 {
747 
748 	switch (level) {
749 	case G_RAID_VOLUME_RL_RAID0:
750 		if (disks < 1)
751 			return (0);
752 		if (!force && (disks < 2 || disks > 6))
753 			return (0);
754 		break;
755 	case G_RAID_VOLUME_RL_RAID1:
756 		if (disks < 1)
757 			return (0);
758 		if (!force && (disks != 2))
759 			return (0);
760 		break;
761 	case G_RAID_VOLUME_RL_RAID1E:
762 		if (disks < 2)
763 			return (0);
764 		if (!force && (disks != 4))
765 			return (0);
766 		break;
767 	case G_RAID_VOLUME_RL_RAID5:
768 		if (disks < 3)
769 			return (0);
770 		if (!force && disks > 6)
771 			return (0);
772 		if (qual != G_RAID_VOLUME_RLQ_R5LA)
773 			return (0);
774 		break;
775 	default:
776 		return (0);
777 	}
778 	if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
779 		return (0);
780 	return (1);
781 }
782 
783 static struct g_raid_volume *
784 g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
785 {
786 	struct g_raid_volume	*mvol;
787 	struct g_raid_md_intel_pervolume *pv;
788 
789 	TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
790 		pv = mvol->v_md_data;
791 		if (pv->pv_volume_pos == id)
792 			break;
793 	}
794 	return (mvol);
795 }
796 
797 static int
798 g_raid_md_intel_start_disk(struct g_raid_disk *disk)
799 {
800 	struct g_raid_softc *sc;
801 	struct g_raid_subdisk *sd, *tmpsd;
802 	struct g_raid_disk *olddisk, *tmpdisk;
803 	struct g_raid_md_object *md;
804 	struct g_raid_md_intel_object *mdi;
805 	struct g_raid_md_intel_pervolume *pv;
806 	struct g_raid_md_intel_perdisk *pd, *oldpd;
807 	struct intel_raid_conf *meta;
808 	struct intel_raid_vol *mvol;
809 	struct intel_raid_map *mmap0, *mmap1;
810 	int disk_pos, resurrection = 0, migr_global, i;
811 
812 	sc = disk->d_softc;
813 	md = sc->sc_md;
814 	mdi = (struct g_raid_md_intel_object *)md;
815 	meta = mdi->mdio_meta;
816 	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
817 	olddisk = NULL;
818 
819 	/* Find disk position in metadata by it's serial. */
820 	disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
821 	if (disk_pos < 0) {
822 		G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
823 		/* Failed stale disk is useless for us. */
824 		if ((pd->pd_disk_meta.flags & INTEL_F_FAILED) &&
825 		    !(pd->pd_disk_meta.flags & INTEL_F_DISABLED)) {
826 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
827 			return (0);
828 		}
829 		/* If we are in the start process, that's all for now. */
830 		if (!mdi->mdio_started)
831 			goto nofit;
832 		/*
833 		 * If we have already started - try to get use of the disk.
834 		 * Try to replace OFFLINE disks first, then FAILED.
835 		 */
836 		TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
837 			if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
838 			    tmpdisk->d_state != G_RAID_DISK_S_FAILED)
839 				continue;
840 			/* Make sure this disk is big enough. */
841 			TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
842 				off_t disk_sectors =
843 				    intel_get_disk_sectors(&pd->pd_disk_meta);
844 
845 				if (sd->sd_offset + sd->sd_size + 4096 >
846 				    disk_sectors * 512) {
847 					G_RAID_DEBUG1(1, sc,
848 					    "Disk too small (%llu < %llu)",
849 					    (unsigned long long)
850 					    disk_sectors * 512,
851 					    (unsigned long long)
852 					    sd->sd_offset + sd->sd_size + 4096);
853 					break;
854 				}
855 			}
856 			if (sd != NULL)
857 				continue;
858 			if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
859 				olddisk = tmpdisk;
860 				break;
861 			} else if (olddisk == NULL)
862 				olddisk = tmpdisk;
863 		}
864 		if (olddisk == NULL) {
865 nofit:
866 			if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
867 				g_raid_change_disk_state(disk,
868 				    G_RAID_DISK_S_SPARE);
869 				return (1);
870 			} else {
871 				g_raid_change_disk_state(disk,
872 				    G_RAID_DISK_S_STALE);
873 				return (0);
874 			}
875 		}
876 		oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
877 		disk_pos = oldpd->pd_disk_pos;
878 		resurrection = 1;
879 	}
880 
881 	if (olddisk == NULL) {
882 		/* Find placeholder by position. */
883 		olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
884 		if (olddisk == NULL)
885 			panic("No disk at position %d!", disk_pos);
886 		if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
887 			G_RAID_DEBUG1(1, sc, "More then one disk for pos %d",
888 			    disk_pos);
889 			g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
890 			return (0);
891 		}
892 		oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
893 	}
894 
895 	/* Replace failed disk or placeholder with new disk. */
896 	TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
897 		TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
898 		TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
899 		sd->sd_disk = disk;
900 	}
901 	oldpd->pd_disk_pos = -2;
902 	pd->pd_disk_pos = disk_pos;
903 
904 	/* If it was placeholder -- destroy it. */
905 	if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
906 		g_raid_destroy_disk(olddisk);
907 	} else {
908 		/* Otherwise, make it STALE_FAILED. */
909 		g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
910 		/* Update global metadata just in case. */
911 		memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
912 		    sizeof(struct intel_raid_disk));
913 	}
914 
915 	/* Welcome the new disk. */
916 	if ((meta->disk[disk_pos].flags & INTEL_F_DISABLED) &&
917 	    !(pd->pd_disk_meta.flags & INTEL_F_SPARE))
918 		g_raid_change_disk_state(disk, G_RAID_DISK_S_DISABLED);
919 	else if (resurrection)
920 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
921 	else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
922 		g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
923 	else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
924 		g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
925 	else
926 		g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
927 	TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
928 		pv = sd->sd_volume->v_md_data;
929 		mvol = intel_get_volume(meta, pv->pv_volume_pos);
930 		mmap0 = intel_get_map(mvol, 0);
931 		if (mvol->migr_state)
932 			mmap1 = intel_get_map(mvol, 1);
933 		else
934 			mmap1 = mmap0;
935 
936 		migr_global = 1;
937 		for (i = 0; i < mmap0->total_disks; i++) {
938 			if ((mmap0->disk_idx[i] & INTEL_DI_RBLD) == 0 &&
939 			    (mmap1->disk_idx[i] & INTEL_DI_RBLD) != 0)
940 				migr_global = 0;
941 		}
942 
943 		if ((meta->disk[disk_pos].flags & INTEL_F_DISABLED) &&
944 		    !(pd->pd_disk_meta.flags & INTEL_F_SPARE)) {
945 			/* Disabled disk, useless. */
946 			g_raid_change_subdisk_state(sd,
947 			    G_RAID_SUBDISK_S_NONE);
948 		} else if (resurrection) {
949 			/* Stale disk, almost same as new. */
950 			g_raid_change_subdisk_state(sd,
951 			    G_RAID_SUBDISK_S_NEW);
952 		} else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
953 			/* Failed disk, almost useless. */
954 			g_raid_change_subdisk_state(sd,
955 			    G_RAID_SUBDISK_S_FAILED);
956 		} else if (mvol->migr_state == 0) {
957 			if (mmap0->status == INTEL_S_UNINITIALIZED &&
958 			    (!pv->pv_cng || pv->pv_cng_master_disk != disk_pos)) {
959 				/* Freshly created uninitialized volume. */
960 				g_raid_change_subdisk_state(sd,
961 				    G_RAID_SUBDISK_S_UNINITIALIZED);
962 			} else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
963 				/* Freshly inserted disk. */
964 				g_raid_change_subdisk_state(sd,
965 				    G_RAID_SUBDISK_S_NEW);
966 			} else if (mvol->dirty && (!pv->pv_cng ||
967 			    pv->pv_cng_master_disk != disk_pos)) {
968 				/* Dirty volume (unclean shutdown). */
969 				g_raid_change_subdisk_state(sd,
970 				    G_RAID_SUBDISK_S_STALE);
971 			} else {
972 				/* Up to date disk. */
973 				g_raid_change_subdisk_state(sd,
974 				    G_RAID_SUBDISK_S_ACTIVE);
975 			}
976 		} else if (mvol->migr_type == INTEL_MT_INIT ||
977 			   mvol->migr_type == INTEL_MT_REBUILD) {
978 			if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
979 				/* Freshly inserted disk. */
980 				g_raid_change_subdisk_state(sd,
981 				    G_RAID_SUBDISK_S_NEW);
982 			} else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
983 				/* Rebuilding disk. */
984 				g_raid_change_subdisk_state(sd,
985 				    G_RAID_SUBDISK_S_REBUILD);
986 				if (mvol->dirty) {
987 					sd->sd_rebuild_pos = 0;
988 				} else {
989 					sd->sd_rebuild_pos =
990 					    intel_get_vol_curr_migr_unit(mvol) *
991 					    sd->sd_volume->v_strip_size *
992 					    mmap0->total_domains;
993 				}
994 			} else if (mvol->migr_type == INTEL_MT_INIT &&
995 			    migr_global) {
996 				/* Freshly created uninitialized volume. */
997 				g_raid_change_subdisk_state(sd,
998 				    G_RAID_SUBDISK_S_UNINITIALIZED);
999 			} else if (mvol->dirty && (!pv->pv_cng ||
1000 			    pv->pv_cng_master_disk != disk_pos)) {
1001 				/* Dirty volume (unclean shutdown). */
1002 				g_raid_change_subdisk_state(sd,
1003 				    G_RAID_SUBDISK_S_STALE);
1004 			} else {
1005 				/* Up to date disk. */
1006 				g_raid_change_subdisk_state(sd,
1007 				    G_RAID_SUBDISK_S_ACTIVE);
1008 			}
1009 		} else if (mvol->migr_type == INTEL_MT_VERIFY ||
1010 			   mvol->migr_type == INTEL_MT_REPAIR) {
1011 			if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
1012 				/* Freshly inserted disk. */
1013 				g_raid_change_subdisk_state(sd,
1014 				    G_RAID_SUBDISK_S_NEW);
1015 			} else if ((mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) ||
1016 			    migr_global) {
1017 				/* Resyncing disk. */
1018 				g_raid_change_subdisk_state(sd,
1019 				    G_RAID_SUBDISK_S_RESYNC);
1020 				if (mvol->dirty) {
1021 					sd->sd_rebuild_pos = 0;
1022 				} else {
1023 					sd->sd_rebuild_pos =
1024 					    intel_get_vol_curr_migr_unit(mvol) *
1025 					    sd->sd_volume->v_strip_size *
1026 					    mmap0->total_domains;
1027 				}
1028 			} else if (mvol->dirty) {
1029 				/* Dirty volume (unclean shutdown). */
1030 				g_raid_change_subdisk_state(sd,
1031 				    G_RAID_SUBDISK_S_STALE);
1032 			} else {
1033 				/* Up to date disk. */
1034 				g_raid_change_subdisk_state(sd,
1035 				    G_RAID_SUBDISK_S_ACTIVE);
1036 			}
1037 		} else if (mvol->migr_type == INTEL_MT_GEN_MIGR) {
1038 			if ((mmap1->disk_idx[0] & INTEL_DI_IDX) != disk_pos) {
1039 				/* Freshly inserted disk. */
1040 				g_raid_change_subdisk_state(sd,
1041 				    G_RAID_SUBDISK_S_NEW);
1042 			} else {
1043 				/* Up to date disk. */
1044 				g_raid_change_subdisk_state(sd,
1045 				    G_RAID_SUBDISK_S_ACTIVE);
1046 			}
1047 		}
1048 		g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1049 		    G_RAID_EVENT_SUBDISK);
1050 	}
1051 
1052 	/* Update status of our need for spare. */
1053 	if (mdi->mdio_started) {
1054 		mdi->mdio_incomplete =
1055 		    (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
1056 		     g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED) <
1057 		     meta->total_disks);
1058 	}
1059 
1060 	return (resurrection);
1061 }
1062 
1063 static void
1064 g_disk_md_intel_retaste(void *arg, int pending)
1065 {
1066 
1067 	G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
1068 	g_retaste(&g_raid_class);
1069 	free(arg, M_MD_INTEL);
1070 }
1071 
1072 static void
1073 g_raid_md_intel_refill(struct g_raid_softc *sc)
1074 {
1075 	struct g_raid_md_object *md;
1076 	struct g_raid_md_intel_object *mdi;
1077 	struct intel_raid_conf *meta;
1078 	struct g_raid_disk *disk;
1079 	struct task *task;
1080 	int update, na;
1081 
1082 	md = sc->sc_md;
1083 	mdi = (struct g_raid_md_intel_object *)md;
1084 	meta = mdi->mdio_meta;
1085 	update = 0;
1086 	do {
1087 		/* Make sure we miss anything. */
1088 		na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
1089 		    g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED);
1090 		if (na == meta->total_disks)
1091 			break;
1092 
1093 		G_RAID_DEBUG1(1, md->mdo_softc,
1094 		    "Array is not complete (%d of %d), "
1095 		    "trying to refill.", na, meta->total_disks);
1096 
1097 		/* Try to get use some of STALE disks. */
1098 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1099 			if (disk->d_state == G_RAID_DISK_S_STALE) {
1100 				update += g_raid_md_intel_start_disk(disk);
1101 				if (disk->d_state == G_RAID_DISK_S_ACTIVE ||
1102 				    disk->d_state == G_RAID_DISK_S_DISABLED)
1103 					break;
1104 			}
1105 		}
1106 		if (disk != NULL)
1107 			continue;
1108 
1109 		/* Try to get use some of SPARE disks. */
1110 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1111 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
1112 				update += g_raid_md_intel_start_disk(disk);
1113 				if (disk->d_state == G_RAID_DISK_S_ACTIVE)
1114 					break;
1115 			}
1116 		}
1117 	} while (disk != NULL);
1118 
1119 	/* Write new metadata if we changed something. */
1120 	if (update) {
1121 		g_raid_md_write_intel(md, NULL, NULL, NULL);
1122 		meta = mdi->mdio_meta;
1123 	}
1124 
1125 	/* Update status of our need for spare. */
1126 	mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
1127 	    g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED) < meta->total_disks);
1128 
1129 	/* Request retaste hoping to find spare. */
1130 	if (mdi->mdio_incomplete) {
1131 		task = malloc(sizeof(struct task),
1132 		    M_MD_INTEL, M_WAITOK | M_ZERO);
1133 		TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
1134 		taskqueue_enqueue(taskqueue_swi, task);
1135 	}
1136 }
1137 
1138 static void
1139 g_raid_md_intel_start(struct g_raid_softc *sc)
1140 {
1141 	struct g_raid_md_object *md;
1142 	struct g_raid_md_intel_object *mdi;
1143 	struct g_raid_md_intel_pervolume *pv;
1144 	struct g_raid_md_intel_perdisk *pd;
1145 	struct intel_raid_conf *meta;
1146 	struct intel_raid_vol *mvol;
1147 	struct intel_raid_map *mmap;
1148 	struct g_raid_volume *vol;
1149 	struct g_raid_subdisk *sd;
1150 	struct g_raid_disk *disk;
1151 	int i, j, disk_pos;
1152 
1153 	md = sc->sc_md;
1154 	mdi = (struct g_raid_md_intel_object *)md;
1155 	meta = mdi->mdio_meta;
1156 
1157 	/* Create volumes and subdisks. */
1158 	for (i = 0; i < meta->total_volumes; i++) {
1159 		mvol = intel_get_volume(meta, i);
1160 		mmap = intel_get_map(mvol, 0);
1161 		vol = g_raid_create_volume(sc, mvol->name, mvol->tid - 1);
1162 		pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
1163 		pv->pv_volume_pos = i;
1164 		pv->pv_cng = (mvol->state & INTEL_ST_CLONE_N_GO) != 0;
1165 		pv->pv_cng_man_sync = (mvol->state & INTEL_ST_CLONE_MAN_SYNC) != 0;
1166 		if (mvol->cng_master_disk < mmap->total_disks)
1167 			pv->pv_cng_master_disk = mvol->cng_master_disk;
1168 		vol->v_md_data = pv;
1169 		vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
1170 		if (mmap->type == INTEL_T_RAID0)
1171 			vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
1172 		else if (mmap->type == INTEL_T_RAID1 &&
1173 		    mmap->total_domains >= 2 &&
1174 		    mmap->total_domains <= mmap->total_disks) {
1175 			/* Assume total_domains is correct. */
1176 			if (mmap->total_domains == mmap->total_disks)
1177 				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1178 			else
1179 				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1180 		} else if (mmap->type == INTEL_T_RAID1) {
1181 			/* total_domains looks wrong. */
1182 			if (mmap->total_disks <= 2)
1183 				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1184 			else
1185 				vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1186 		} else if (mmap->type == INTEL_T_RAID5) {
1187 			vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
1188 			vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
1189 		} else
1190 			vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
1191 		vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
1192 		vol->v_disks_count = mmap->total_disks;
1193 		vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
1194 		vol->v_sectorsize = 512; //ZZZ
1195 		for (j = 0; j < vol->v_disks_count; j++) {
1196 			sd = &vol->v_subdisks[j];
1197 			sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
1198 			sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
1199 		}
1200 		g_raid_start_volume(vol);
1201 	}
1202 
1203 	/* Create disk placeholders to store data for later writing. */
1204 	for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
1205 		pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1206 		pd->pd_disk_pos = disk_pos;
1207 		pd->pd_disk_meta = meta->disk[disk_pos];
1208 		disk = g_raid_create_disk(sc);
1209 		disk->d_md_data = (void *)pd;
1210 		disk->d_state = G_RAID_DISK_S_OFFLINE;
1211 		for (i = 0; i < meta->total_volumes; i++) {
1212 			mvol = intel_get_volume(meta, i);
1213 			mmap = intel_get_map(mvol, 0);
1214 			for (j = 0; j < mmap->total_disks; j++) {
1215 				if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
1216 					break;
1217 			}
1218 			if (j == mmap->total_disks)
1219 				continue;
1220 			vol = g_raid_md_intel_get_volume(sc, i);
1221 			sd = &vol->v_subdisks[j];
1222 			sd->sd_disk = disk;
1223 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1224 		}
1225 	}
1226 
1227 	/* Make all disks found till the moment take their places. */
1228 	do {
1229 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1230 			if (disk->d_state == G_RAID_DISK_S_NONE) {
1231 				g_raid_md_intel_start_disk(disk);
1232 				break;
1233 			}
1234 		}
1235 	} while (disk != NULL);
1236 
1237 	mdi->mdio_started = 1;
1238 	G_RAID_DEBUG1(0, sc, "Array started.");
1239 	g_raid_md_write_intel(md, NULL, NULL, NULL);
1240 
1241 	/* Pickup any STALE/SPARE disks to refill array if needed. */
1242 	g_raid_md_intel_refill(sc);
1243 
1244 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1245 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1246 		    G_RAID_EVENT_VOLUME);
1247 	}
1248 
1249 	callout_stop(&mdi->mdio_start_co);
1250 	G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
1251 	root_mount_rel(mdi->mdio_rootmount);
1252 	mdi->mdio_rootmount = NULL;
1253 }
1254 
1255 static void
1256 g_raid_md_intel_new_disk(struct g_raid_disk *disk)
1257 {
1258 	struct g_raid_softc *sc;
1259 	struct g_raid_md_object *md;
1260 	struct g_raid_md_intel_object *mdi;
1261 	struct intel_raid_conf *pdmeta;
1262 	struct g_raid_md_intel_perdisk *pd;
1263 
1264 	sc = disk->d_softc;
1265 	md = sc->sc_md;
1266 	mdi = (struct g_raid_md_intel_object *)md;
1267 	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1268 	pdmeta = pd->pd_meta;
1269 
1270 	if (mdi->mdio_started) {
1271 		if (g_raid_md_intel_start_disk(disk))
1272 			g_raid_md_write_intel(md, NULL, NULL, NULL);
1273 	} else {
1274 		/* If we haven't started yet - check metadata freshness. */
1275 		if (mdi->mdio_meta == NULL ||
1276 		    ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
1277 			G_RAID_DEBUG1(1, sc, "Newer disk");
1278 			if (mdi->mdio_meta != NULL)
1279 				free(mdi->mdio_meta, M_MD_INTEL);
1280 			mdi->mdio_meta = intel_meta_copy(pdmeta);
1281 			mdi->mdio_generation = mdi->mdio_meta->generation;
1282 			mdi->mdio_disks_present = 1;
1283 		} else if (pdmeta->generation == mdi->mdio_generation) {
1284 			mdi->mdio_disks_present++;
1285 			G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
1286 			    mdi->mdio_disks_present,
1287 			    mdi->mdio_meta->total_disks);
1288 		} else {
1289 			G_RAID_DEBUG1(1, sc, "Older disk");
1290 		}
1291 		/* If we collected all needed disks - start array. */
1292 		if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
1293 			g_raid_md_intel_start(sc);
1294 	}
1295 }
1296 
1297 static void
1298 g_raid_intel_go(void *arg)
1299 {
1300 	struct g_raid_softc *sc;
1301 	struct g_raid_md_object *md;
1302 	struct g_raid_md_intel_object *mdi;
1303 
1304 	sc = arg;
1305 	md = sc->sc_md;
1306 	mdi = (struct g_raid_md_intel_object *)md;
1307 	if (!mdi->mdio_started) {
1308 		G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
1309 		g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
1310 	}
1311 }
1312 
1313 static int
1314 g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
1315     struct g_geom **gp)
1316 {
1317 	struct g_raid_softc *sc;
1318 	struct g_raid_md_intel_object *mdi;
1319 	char name[16];
1320 
1321 	mdi = (struct g_raid_md_intel_object *)md;
1322 	mdi->mdio_config_id = mdi->mdio_orig_config_id = arc4random();
1323 	mdi->mdio_generation = 0;
1324 	snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
1325 	sc = g_raid_create_node(mp, name, md);
1326 	if (sc == NULL)
1327 		return (G_RAID_MD_TASTE_FAIL);
1328 	md->mdo_softc = sc;
1329 	*gp = sc->sc_geom;
1330 	return (G_RAID_MD_TASTE_NEW);
1331 }
1332 
1333 /*
1334  * Return the last N characters of the serial label.  The Linux and
1335  * ataraid(7) code always uses the last 16 characters of the label to
1336  * store into the Intel meta format.  Generalize this to N characters
1337  * since that's easy.  Labels can be up to 20 characters for SATA drives
1338  * and up 251 characters for SAS drives.  Since intel controllers don't
1339  * support SAS drives, just stick with the SATA limits for stack friendliness.
1340  */
1341 static int
1342 g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
1343 {
1344 	char serial_buffer[24];
1345 	int len, error;
1346 
1347 	len = sizeof(serial_buffer);
1348 	error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
1349 	if (error != 0)
1350 		return (error);
1351 	len = strlen(serial_buffer);
1352 	if (len > serlen)
1353 		len -= serlen;
1354 	else
1355 		len = 0;
1356 	strncpy(serial, serial_buffer + len, serlen);
1357 	return (0);
1358 }
1359 
1360 static int
1361 g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
1362                               struct g_consumer *cp, struct g_geom **gp)
1363 {
1364 	struct g_consumer *rcp;
1365 	struct g_provider *pp;
1366 	struct g_raid_md_intel_object *mdi, *mdi1;
1367 	struct g_raid_softc *sc;
1368 	struct g_raid_disk *disk;
1369 	struct intel_raid_conf *meta;
1370 	struct g_raid_md_intel_perdisk *pd;
1371 	struct g_geom *geom;
1372 	int error, disk_pos, result, spare, len;
1373 	char serial[INTEL_SERIAL_LEN];
1374 	char name[16];
1375 	uint16_t vendor;
1376 
1377 	G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
1378 	mdi = (struct g_raid_md_intel_object *)md;
1379 	pp = cp->provider;
1380 
1381 	/* Read metadata from device. */
1382 	meta = NULL;
1383 	vendor = 0xffff;
1384 	disk_pos = 0;
1385 	if (g_access(cp, 1, 0, 0) != 0)
1386 		return (G_RAID_MD_TASTE_FAIL);
1387 	g_topology_unlock();
1388 	error = g_raid_md_get_label(cp, serial, sizeof(serial));
1389 	if (error != 0) {
1390 		G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
1391 		    pp->name, error);
1392 		goto fail2;
1393 	}
1394 	len = 2;
1395 	if (pp->geom->rank == 1)
1396 		g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
1397 	meta = intel_meta_read(cp);
1398 	g_topology_lock();
1399 	g_access(cp, -1, 0, 0);
1400 	if (meta == NULL) {
1401 		if (g_raid_aggressive_spare) {
1402 			if (vendor != 0x8086) {
1403 				G_RAID_DEBUG(1,
1404 				    "Intel vendor mismatch 0x%04x != 0x8086",
1405 				    vendor);
1406 			} else {
1407 				G_RAID_DEBUG(1,
1408 				    "No Intel metadata, forcing spare.");
1409 				spare = 2;
1410 				goto search;
1411 			}
1412 		}
1413 		return (G_RAID_MD_TASTE_FAIL);
1414 	}
1415 
1416 	/* Check this disk position in obtained metadata. */
1417 	disk_pos = intel_meta_find_disk(meta, serial);
1418 	if (disk_pos < 0) {
1419 		G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
1420 		goto fail1;
1421 	}
1422 	if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
1423 	    (pp->mediasize / pp->sectorsize)) {
1424 		G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
1425 		    intel_get_disk_sectors(&meta->disk[disk_pos]),
1426 		    (off_t)(pp->mediasize / pp->sectorsize));
1427 		goto fail1;
1428 	}
1429 
1430 	G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
1431 	spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
1432 
1433 search:
1434 	/* Search for matching node. */
1435 	sc = NULL;
1436 	mdi1 = NULL;
1437 	LIST_FOREACH(geom, &mp->geom, geom) {
1438 		sc = geom->softc;
1439 		if (sc == NULL)
1440 			continue;
1441 		if (sc->sc_stopping != 0)
1442 			continue;
1443 		if (sc->sc_md->mdo_class != md->mdo_class)
1444 			continue;
1445 		mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
1446 		if (spare) {
1447 			if (mdi1->mdio_incomplete)
1448 				break;
1449 		} else {
1450 			if (mdi1->mdio_config_id == meta->config_id)
1451 				break;
1452 		}
1453 	}
1454 
1455 	/* Found matching node. */
1456 	if (geom != NULL) {
1457 		G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
1458 		result = G_RAID_MD_TASTE_EXISTING;
1459 
1460 	} else if (spare) { /* Not found needy node -- left for later. */
1461 		G_RAID_DEBUG(1, "Spare is not needed at this time");
1462 		goto fail1;
1463 
1464 	} else { /* Not found matching node -- create one. */
1465 		result = G_RAID_MD_TASTE_NEW;
1466 		mdi->mdio_config_id = meta->config_id;
1467 		mdi->mdio_orig_config_id = meta->orig_config_id;
1468 		snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
1469 		sc = g_raid_create_node(mp, name, md);
1470 		md->mdo_softc = sc;
1471 		geom = sc->sc_geom;
1472 		callout_init(&mdi->mdio_start_co, 1);
1473 		callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
1474 		    g_raid_intel_go, sc);
1475 		mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
1476 		G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
1477 	}
1478 
1479 	rcp = g_new_consumer(geom);
1480 	rcp->flags |= G_CF_DIRECT_RECEIVE;
1481 	g_attach(rcp, pp);
1482 	if (g_access(rcp, 1, 1, 1) != 0)
1483 		; //goto fail1;
1484 
1485 	g_topology_unlock();
1486 	sx_xlock(&sc->sc_lock);
1487 
1488 	pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1489 	pd->pd_meta = meta;
1490 	pd->pd_disk_pos = -1;
1491 	if (spare == 2) {
1492 		memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
1493 		intel_set_disk_sectors(&pd->pd_disk_meta,
1494 		    pp->mediasize / pp->sectorsize);
1495 		pd->pd_disk_meta.id = 0;
1496 		pd->pd_disk_meta.flags = INTEL_F_SPARE;
1497 	} else {
1498 		pd->pd_disk_meta = meta->disk[disk_pos];
1499 	}
1500 	disk = g_raid_create_disk(sc);
1501 	disk->d_md_data = (void *)pd;
1502 	disk->d_consumer = rcp;
1503 	rcp->private = disk;
1504 
1505 	g_raid_get_disk_info(disk);
1506 
1507 	g_raid_md_intel_new_disk(disk);
1508 
1509 	sx_xunlock(&sc->sc_lock);
1510 	g_topology_lock();
1511 	*gp = geom;
1512 	return (result);
1513 fail2:
1514 	g_topology_lock();
1515 	g_access(cp, -1, 0, 0);
1516 fail1:
1517 	free(meta, M_MD_INTEL);
1518 	return (G_RAID_MD_TASTE_FAIL);
1519 }
1520 
1521 static int
1522 g_raid_md_event_intel(struct g_raid_md_object *md,
1523     struct g_raid_disk *disk, u_int event)
1524 {
1525 	struct g_raid_softc *sc;
1526 	struct g_raid_subdisk *sd;
1527 	struct g_raid_md_intel_object *mdi;
1528 	struct g_raid_md_intel_perdisk *pd;
1529 
1530 	sc = md->mdo_softc;
1531 	mdi = (struct g_raid_md_intel_object *)md;
1532 	if (disk == NULL) {
1533 		switch (event) {
1534 		case G_RAID_NODE_E_START:
1535 			if (!mdi->mdio_started)
1536 				g_raid_md_intel_start(sc);
1537 			return (0);
1538 		}
1539 		return (-1);
1540 	}
1541 	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1542 	switch (event) {
1543 	case G_RAID_DISK_E_DISCONNECTED:
1544 		/* If disk was assigned, just update statuses. */
1545 		if (pd->pd_disk_pos >= 0) {
1546 			g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1547 			if (disk->d_consumer) {
1548 				g_raid_kill_consumer(sc, disk->d_consumer);
1549 				disk->d_consumer = NULL;
1550 			}
1551 			TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1552 				g_raid_change_subdisk_state(sd,
1553 				    G_RAID_SUBDISK_S_NONE);
1554 				g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
1555 				    G_RAID_EVENT_SUBDISK);
1556 			}
1557 		} else {
1558 			/* Otherwise -- delete. */
1559 			g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
1560 			g_raid_destroy_disk(disk);
1561 		}
1562 
1563 		/* Write updated metadata to all disks. */
1564 		g_raid_md_write_intel(md, NULL, NULL, NULL);
1565 
1566 		/* Check if anything left except placeholders. */
1567 		if (g_raid_ndisks(sc, -1) ==
1568 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
1569 			g_raid_destroy_node(sc, 0);
1570 		else
1571 			g_raid_md_intel_refill(sc);
1572 		return (0);
1573 	}
1574 	return (-2);
1575 }
1576 
1577 static int
1578 g_raid_md_ctl_intel(struct g_raid_md_object *md,
1579     struct gctl_req *req)
1580 {
1581 	struct g_raid_softc *sc;
1582 	struct g_raid_volume *vol, *vol1;
1583 	struct g_raid_subdisk *sd;
1584 	struct g_raid_disk *disk;
1585 	struct g_raid_md_intel_object *mdi;
1586 	struct g_raid_md_intel_pervolume *pv;
1587 	struct g_raid_md_intel_perdisk *pd;
1588 	struct g_consumer *cp;
1589 	struct g_provider *pp;
1590 	char arg[16], serial[INTEL_SERIAL_LEN];
1591 	const char *nodename, *verb, *volname, *levelname, *diskname;
1592 	char *tmp;
1593 	int *nargs, *force;
1594 	off_t off, size, sectorsize, strip, disk_sectors;
1595 	intmax_t *sizearg, *striparg;
1596 	int numdisks, i, len, level, qual, update;
1597 	int error;
1598 
1599 	sc = md->mdo_softc;
1600 	mdi = (struct g_raid_md_intel_object *)md;
1601 	verb = gctl_get_param(req, "verb", NULL);
1602 	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1603 	error = 0;
1604 	if (strcmp(verb, "label") == 0) {
1605 
1606 		if (*nargs < 4) {
1607 			gctl_error(req, "Invalid number of arguments.");
1608 			return (-1);
1609 		}
1610 		volname = gctl_get_asciiparam(req, "arg1");
1611 		if (volname == NULL) {
1612 			gctl_error(req, "No volume name.");
1613 			return (-2);
1614 		}
1615 		levelname = gctl_get_asciiparam(req, "arg2");
1616 		if (levelname == NULL) {
1617 			gctl_error(req, "No RAID level.");
1618 			return (-3);
1619 		}
1620 		if (strcasecmp(levelname, "RAID5") == 0)
1621 			levelname = "RAID5-LA";
1622 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
1623 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
1624 			return (-4);
1625 		}
1626 		numdisks = *nargs - 3;
1627 		force = gctl_get_paraml(req, "force", sizeof(*force));
1628 		if (!g_raid_md_intel_supported(level, qual, numdisks,
1629 		    force ? *force : 0)) {
1630 			gctl_error(req, "Unsupported RAID level "
1631 			    "(0x%02x/0x%02x), or number of disks (%d).",
1632 			    level, qual, numdisks);
1633 			return (-5);
1634 		}
1635 
1636 		/* Search for disks, connect them and probe. */
1637 		size = 0x7fffffffffffffffllu;
1638 		sectorsize = 0;
1639 		for (i = 0; i < numdisks; i++) {
1640 			snprintf(arg, sizeof(arg), "arg%d", i + 3);
1641 			diskname = gctl_get_asciiparam(req, arg);
1642 			if (diskname == NULL) {
1643 				gctl_error(req, "No disk name (%s).", arg);
1644 				error = -6;
1645 				break;
1646 			}
1647 			if (strcmp(diskname, "NONE") == 0) {
1648 				cp = NULL;
1649 				pp = NULL;
1650 			} else {
1651 				g_topology_lock();
1652 				cp = g_raid_open_consumer(sc, diskname);
1653 				if (cp == NULL) {
1654 					gctl_error(req, "Can't open disk '%s'.",
1655 					    diskname);
1656 					g_topology_unlock();
1657 					error = -7;
1658 					break;
1659 				}
1660 				pp = cp->provider;
1661 			}
1662 			pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1663 			pd->pd_disk_pos = i;
1664 			disk = g_raid_create_disk(sc);
1665 			disk->d_md_data = (void *)pd;
1666 			disk->d_consumer = cp;
1667 			if (cp == NULL) {
1668 				strcpy(&pd->pd_disk_meta.serial[0], "NONE");
1669 				pd->pd_disk_meta.id = 0xffffffff;
1670 				pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
1671 				continue;
1672 			}
1673 			cp->private = disk;
1674 			g_topology_unlock();
1675 
1676 			error = g_raid_md_get_label(cp,
1677 			    &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
1678 			if (error != 0) {
1679 				gctl_error(req,
1680 				    "Can't get serial for provider '%s'.",
1681 				    diskname);
1682 				error = -8;
1683 				break;
1684 			}
1685 
1686 			g_raid_get_disk_info(disk);
1687 
1688 			intel_set_disk_sectors(&pd->pd_disk_meta,
1689 			    pp->mediasize / pp->sectorsize);
1690 			if (size > pp->mediasize)
1691 				size = pp->mediasize;
1692 			if (sectorsize < pp->sectorsize)
1693 				sectorsize = pp->sectorsize;
1694 			pd->pd_disk_meta.id = 0;
1695 			pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
1696 		}
1697 		if (error != 0)
1698 			return (error);
1699 
1700 		if (sectorsize <= 0) {
1701 			gctl_error(req, "Can't get sector size.");
1702 			return (-8);
1703 		}
1704 
1705 		/* Reserve some space for metadata. */
1706 		size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1707 
1708 		/* Handle size argument. */
1709 		len = sizeof(*sizearg);
1710 		sizearg = gctl_get_param(req, "size", &len);
1711 		if (sizearg != NULL && len == sizeof(*sizearg) &&
1712 		    *sizearg > 0) {
1713 			if (*sizearg > size) {
1714 				gctl_error(req, "Size too big %lld > %lld.",
1715 				    (long long)*sizearg, (long long)size);
1716 				return (-9);
1717 			}
1718 			size = *sizearg;
1719 		}
1720 
1721 		/* Handle strip argument. */
1722 		strip = 131072;
1723 		len = sizeof(*striparg);
1724 		striparg = gctl_get_param(req, "strip", &len);
1725 		if (striparg != NULL && len == sizeof(*striparg) &&
1726 		    *striparg > 0) {
1727 			if (*striparg < sectorsize) {
1728 				gctl_error(req, "Strip size too small.");
1729 				return (-10);
1730 			}
1731 			if (*striparg % sectorsize != 0) {
1732 				gctl_error(req, "Incorrect strip size.");
1733 				return (-11);
1734 			}
1735 			if (strip > 65535 * sectorsize) {
1736 				gctl_error(req, "Strip size too big.");
1737 				return (-12);
1738 			}
1739 			strip = *striparg;
1740 		}
1741 
1742 		/* Round size down to strip or sector. */
1743 		if (level == G_RAID_VOLUME_RL_RAID1)
1744 			size -= (size % sectorsize);
1745 		else if (level == G_RAID_VOLUME_RL_RAID1E &&
1746 		    (numdisks & 1) != 0)
1747 			size -= (size % (2 * strip));
1748 		else
1749 			size -= (size % strip);
1750 		if (size <= 0) {
1751 			gctl_error(req, "Size too small.");
1752 			return (-13);
1753 		}
1754 
1755 		/* We have all we need, create things: volume, ... */
1756 		mdi->mdio_started = 1;
1757 		vol = g_raid_create_volume(sc, volname, -1);
1758 		pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
1759 		pv->pv_volume_pos = 0;
1760 		vol->v_md_data = pv;
1761 		vol->v_raid_level = level;
1762 		vol->v_raid_level_qualifier = qual;
1763 		vol->v_strip_size = strip;
1764 		vol->v_disks_count = numdisks;
1765 		if (level == G_RAID_VOLUME_RL_RAID0)
1766 			vol->v_mediasize = size * numdisks;
1767 		else if (level == G_RAID_VOLUME_RL_RAID1)
1768 			vol->v_mediasize = size;
1769 		else if (level == G_RAID_VOLUME_RL_RAID5)
1770 			vol->v_mediasize = size * (numdisks - 1);
1771 		else { /* RAID1E */
1772 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
1773 			    strip;
1774 		}
1775 		vol->v_sectorsize = sectorsize;
1776 		g_raid_start_volume(vol);
1777 
1778 		/* , and subdisks. */
1779 		TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1780 			pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1781 			sd = &vol->v_subdisks[pd->pd_disk_pos];
1782 			sd->sd_disk = disk;
1783 			sd->sd_offset = 0;
1784 			sd->sd_size = size;
1785 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1786 			if (sd->sd_disk->d_consumer != NULL) {
1787 				g_raid_change_disk_state(disk,
1788 				    G_RAID_DISK_S_ACTIVE);
1789 				if (level == G_RAID_VOLUME_RL_RAID5)
1790 					g_raid_change_subdisk_state(sd,
1791 					    G_RAID_SUBDISK_S_UNINITIALIZED);
1792 				else
1793 					g_raid_change_subdisk_state(sd,
1794 					    G_RAID_SUBDISK_S_ACTIVE);
1795 				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1796 				    G_RAID_EVENT_SUBDISK);
1797 			} else {
1798 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1799 			}
1800 		}
1801 
1802 		/* Write metadata based on created entities. */
1803 		G_RAID_DEBUG1(0, sc, "Array started.");
1804 		g_raid_md_write_intel(md, NULL, NULL, NULL);
1805 
1806 		/* Pickup any STALE/SPARE disks to refill array if needed. */
1807 		g_raid_md_intel_refill(sc);
1808 
1809 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1810 		    G_RAID_EVENT_VOLUME);
1811 		return (0);
1812 	}
1813 	if (strcmp(verb, "add") == 0) {
1814 
1815 		if (*nargs != 3) {
1816 			gctl_error(req, "Invalid number of arguments.");
1817 			return (-1);
1818 		}
1819 		volname = gctl_get_asciiparam(req, "arg1");
1820 		if (volname == NULL) {
1821 			gctl_error(req, "No volume name.");
1822 			return (-2);
1823 		}
1824 		levelname = gctl_get_asciiparam(req, "arg2");
1825 		if (levelname == NULL) {
1826 			gctl_error(req, "No RAID level.");
1827 			return (-3);
1828 		}
1829 		if (strcasecmp(levelname, "RAID5") == 0)
1830 			levelname = "RAID5-LA";
1831 		if (g_raid_volume_str2level(levelname, &level, &qual)) {
1832 			gctl_error(req, "Unknown RAID level '%s'.", levelname);
1833 			return (-4);
1834 		}
1835 
1836 		/* Look for existing volumes. */
1837 		i = 0;
1838 		vol1 = NULL;
1839 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1840 			vol1 = vol;
1841 			i++;
1842 		}
1843 		if (i > 1) {
1844 			gctl_error(req, "Maximum two volumes supported.");
1845 			return (-6);
1846 		}
1847 		if (vol1 == NULL) {
1848 			gctl_error(req, "At least one volume must exist.");
1849 			return (-7);
1850 		}
1851 
1852 		numdisks = vol1->v_disks_count;
1853 		force = gctl_get_paraml(req, "force", sizeof(*force));
1854 		if (!g_raid_md_intel_supported(level, qual, numdisks,
1855 		    force ? *force : 0)) {
1856 			gctl_error(req, "Unsupported RAID level "
1857 			    "(0x%02x/0x%02x), or number of disks (%d).",
1858 			    level, qual, numdisks);
1859 			return (-5);
1860 		}
1861 
1862 		/* Collect info about present disks. */
1863 		size = 0x7fffffffffffffffllu;
1864 		sectorsize = 512;
1865 		for (i = 0; i < numdisks; i++) {
1866 			disk = vol1->v_subdisks[i].sd_disk;
1867 			pd = (struct g_raid_md_intel_perdisk *)
1868 			    disk->d_md_data;
1869 			disk_sectors =
1870 			    intel_get_disk_sectors(&pd->pd_disk_meta);
1871 
1872 			if (disk_sectors * 512 < size)
1873 				size = disk_sectors * 512;
1874 			if (disk->d_consumer != NULL &&
1875 			    disk->d_consumer->provider != NULL &&
1876 			    disk->d_consumer->provider->sectorsize >
1877 			     sectorsize) {
1878 				sectorsize =
1879 				    disk->d_consumer->provider->sectorsize;
1880 			}
1881 		}
1882 
1883 		/* Reserve some space for metadata. */
1884 		size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1885 
1886 		/* Decide insert before or after. */
1887 		sd = &vol1->v_subdisks[0];
1888 		if (sd->sd_offset >
1889 		    size - (sd->sd_offset + sd->sd_size)) {
1890 			off = 0;
1891 			size = sd->sd_offset;
1892 		} else {
1893 			off = sd->sd_offset + sd->sd_size;
1894 			size = size - (sd->sd_offset + sd->sd_size);
1895 		}
1896 
1897 		/* Handle strip argument. */
1898 		strip = 131072;
1899 		len = sizeof(*striparg);
1900 		striparg = gctl_get_param(req, "strip", &len);
1901 		if (striparg != NULL && len == sizeof(*striparg) &&
1902 		    *striparg > 0) {
1903 			if (*striparg < sectorsize) {
1904 				gctl_error(req, "Strip size too small.");
1905 				return (-10);
1906 			}
1907 			if (*striparg % sectorsize != 0) {
1908 				gctl_error(req, "Incorrect strip size.");
1909 				return (-11);
1910 			}
1911 			if (strip > 65535 * sectorsize) {
1912 				gctl_error(req, "Strip size too big.");
1913 				return (-12);
1914 			}
1915 			strip = *striparg;
1916 		}
1917 
1918 		/* Round offset up to strip. */
1919 		if (off % strip != 0) {
1920 			size -= strip - off % strip;
1921 			off += strip - off % strip;
1922 		}
1923 
1924 		/* Handle size argument. */
1925 		len = sizeof(*sizearg);
1926 		sizearg = gctl_get_param(req, "size", &len);
1927 		if (sizearg != NULL && len == sizeof(*sizearg) &&
1928 		    *sizearg > 0) {
1929 			if (*sizearg > size) {
1930 				gctl_error(req, "Size too big %lld > %lld.",
1931 				    (long long)*sizearg, (long long)size);
1932 				return (-9);
1933 			}
1934 			size = *sizearg;
1935 		}
1936 
1937 		/* Round size down to strip or sector. */
1938 		if (level == G_RAID_VOLUME_RL_RAID1)
1939 			size -= (size % sectorsize);
1940 		else
1941 			size -= (size % strip);
1942 		if (size <= 0) {
1943 			gctl_error(req, "Size too small.");
1944 			return (-13);
1945 		}
1946 		if (size > 0xffffffffllu * sectorsize) {
1947 			gctl_error(req, "Size too big.");
1948 			return (-14);
1949 		}
1950 
1951 		/* We have all we need, create things: volume, ... */
1952 		vol = g_raid_create_volume(sc, volname, -1);
1953 		pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
1954 		pv->pv_volume_pos = i;
1955 		vol->v_md_data = pv;
1956 		vol->v_raid_level = level;
1957 		vol->v_raid_level_qualifier = qual;
1958 		vol->v_strip_size = strip;
1959 		vol->v_disks_count = numdisks;
1960 		if (level == G_RAID_VOLUME_RL_RAID0)
1961 			vol->v_mediasize = size * numdisks;
1962 		else if (level == G_RAID_VOLUME_RL_RAID1)
1963 			vol->v_mediasize = size;
1964 		else if (level == G_RAID_VOLUME_RL_RAID5)
1965 			vol->v_mediasize = size * (numdisks - 1);
1966 		else { /* RAID1E */
1967 			vol->v_mediasize = ((size * numdisks) / strip / 2) *
1968 			    strip;
1969 		}
1970 		vol->v_sectorsize = sectorsize;
1971 		g_raid_start_volume(vol);
1972 
1973 		/* , and subdisks. */
1974 		for (i = 0; i < numdisks; i++) {
1975 			disk = vol1->v_subdisks[i].sd_disk;
1976 			sd = &vol->v_subdisks[i];
1977 			sd->sd_disk = disk;
1978 			sd->sd_offset = off;
1979 			sd->sd_size = size;
1980 			TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1981 			if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
1982 				if (level == G_RAID_VOLUME_RL_RAID5)
1983 					g_raid_change_subdisk_state(sd,
1984 					    G_RAID_SUBDISK_S_UNINITIALIZED);
1985 				else
1986 					g_raid_change_subdisk_state(sd,
1987 					    G_RAID_SUBDISK_S_ACTIVE);
1988 				g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1989 				    G_RAID_EVENT_SUBDISK);
1990 			}
1991 		}
1992 
1993 		/* Write metadata based on created entities. */
1994 		g_raid_md_write_intel(md, NULL, NULL, NULL);
1995 
1996 		g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1997 		    G_RAID_EVENT_VOLUME);
1998 		return (0);
1999 	}
2000 	if (strcmp(verb, "delete") == 0) {
2001 
2002 		nodename = gctl_get_asciiparam(req, "arg0");
2003 		if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0)
2004 			nodename = NULL;
2005 
2006 		/* Full node destruction. */
2007 		if (*nargs == 1 && nodename != NULL) {
2008 			/* Check if some volume is still open. */
2009 			force = gctl_get_paraml(req, "force", sizeof(*force));
2010 			if (force != NULL && *force == 0 &&
2011 			    g_raid_nopens(sc) != 0) {
2012 				gctl_error(req, "Some volume is still open.");
2013 				return (-4);
2014 			}
2015 
2016 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2017 				if (disk->d_consumer)
2018 					intel_meta_erase(disk->d_consumer);
2019 			}
2020 			g_raid_destroy_node(sc, 0);
2021 			return (0);
2022 		}
2023 
2024 		/* Destroy specified volume. If it was last - all node. */
2025 		if (*nargs > 2) {
2026 			gctl_error(req, "Invalid number of arguments.");
2027 			return (-1);
2028 		}
2029 		volname = gctl_get_asciiparam(req,
2030 		    nodename != NULL ? "arg1" : "arg0");
2031 		if (volname == NULL) {
2032 			gctl_error(req, "No volume name.");
2033 			return (-2);
2034 		}
2035 
2036 		/* Search for volume. */
2037 		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2038 			if (strcmp(vol->v_name, volname) == 0)
2039 				break;
2040 			pp = vol->v_provider;
2041 			if (pp == NULL)
2042 				continue;
2043 			if (strcmp(pp->name, volname) == 0)
2044 				break;
2045 			if (strncmp(pp->name, "raid/", 5) == 0 &&
2046 			    strcmp(pp->name + 5, volname) == 0)
2047 				break;
2048 		}
2049 		if (vol == NULL) {
2050 			i = strtol(volname, &tmp, 10);
2051 			if (verb != volname && tmp[0] == 0) {
2052 				TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2053 					if (vol->v_global_id == i)
2054 						break;
2055 				}
2056 			}
2057 		}
2058 		if (vol == NULL) {
2059 			gctl_error(req, "Volume '%s' not found.", volname);
2060 			return (-3);
2061 		}
2062 
2063 		/* Check if volume is still open. */
2064 		force = gctl_get_paraml(req, "force", sizeof(*force));
2065 		if (force != NULL && *force == 0 &&
2066 		    vol->v_provider_open != 0) {
2067 			gctl_error(req, "Volume is still open.");
2068 			return (-4);
2069 		}
2070 
2071 		/* Destroy volume and potentially node. */
2072 		i = 0;
2073 		TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
2074 			i++;
2075 		if (i >= 2) {
2076 			g_raid_destroy_volume(vol);
2077 			g_raid_md_write_intel(md, NULL, NULL, NULL);
2078 		} else {
2079 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2080 				if (disk->d_consumer)
2081 					intel_meta_erase(disk->d_consumer);
2082 			}
2083 			g_raid_destroy_node(sc, 0);
2084 		}
2085 		return (0);
2086 	}
2087 	if (strcmp(verb, "remove") == 0 ||
2088 	    strcmp(verb, "fail") == 0) {
2089 		if (*nargs < 2) {
2090 			gctl_error(req, "Invalid number of arguments.");
2091 			return (-1);
2092 		}
2093 		for (i = 1; i < *nargs; i++) {
2094 			snprintf(arg, sizeof(arg), "arg%d", i);
2095 			diskname = gctl_get_asciiparam(req, arg);
2096 			if (diskname == NULL) {
2097 				gctl_error(req, "No disk name (%s).", arg);
2098 				error = -2;
2099 				break;
2100 			}
2101 			if (strncmp(diskname, "/dev/", 5) == 0)
2102 				diskname += 5;
2103 
2104 			TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2105 				if (disk->d_consumer != NULL &&
2106 				    disk->d_consumer->provider != NULL &&
2107 				    strcmp(disk->d_consumer->provider->name,
2108 				     diskname) == 0)
2109 					break;
2110 			}
2111 			if (disk == NULL) {
2112 				gctl_error(req, "Disk '%s' not found.",
2113 				    diskname);
2114 				error = -3;
2115 				break;
2116 			}
2117 
2118 			if (strcmp(verb, "fail") == 0) {
2119 				g_raid_md_fail_disk_intel(md, NULL, disk);
2120 				continue;
2121 			}
2122 
2123 			pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2124 
2125 			/* Erase metadata on deleting disk. */
2126 			intel_meta_erase(disk->d_consumer);
2127 
2128 			/* If disk was assigned, just update statuses. */
2129 			if (pd->pd_disk_pos >= 0) {
2130 				g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
2131 				g_raid_kill_consumer(sc, disk->d_consumer);
2132 				disk->d_consumer = NULL;
2133 				TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
2134 					g_raid_change_subdisk_state(sd,
2135 					    G_RAID_SUBDISK_S_NONE);
2136 					g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
2137 					    G_RAID_EVENT_SUBDISK);
2138 				}
2139 			} else {
2140 				/* Otherwise -- delete. */
2141 				g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
2142 				g_raid_destroy_disk(disk);
2143 			}
2144 		}
2145 
2146 		/* Write updated metadata to remaining disks. */
2147 		g_raid_md_write_intel(md, NULL, NULL, NULL);
2148 
2149 		/* Check if anything left except placeholders. */
2150 		if (g_raid_ndisks(sc, -1) ==
2151 		    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2152 			g_raid_destroy_node(sc, 0);
2153 		else
2154 			g_raid_md_intel_refill(sc);
2155 		return (error);
2156 	}
2157 	if (strcmp(verb, "insert") == 0) {
2158 		if (*nargs < 2) {
2159 			gctl_error(req, "Invalid number of arguments.");
2160 			return (-1);
2161 		}
2162 		update = 0;
2163 		for (i = 1; i < *nargs; i++) {
2164 			/* Get disk name. */
2165 			snprintf(arg, sizeof(arg), "arg%d", i);
2166 			diskname = gctl_get_asciiparam(req, arg);
2167 			if (diskname == NULL) {
2168 				gctl_error(req, "No disk name (%s).", arg);
2169 				error = -3;
2170 				break;
2171 			}
2172 
2173 			/* Try to find provider with specified name. */
2174 			g_topology_lock();
2175 			cp = g_raid_open_consumer(sc, diskname);
2176 			if (cp == NULL) {
2177 				gctl_error(req, "Can't open disk '%s'.",
2178 				    diskname);
2179 				g_topology_unlock();
2180 				error = -4;
2181 				break;
2182 			}
2183 			pp = cp->provider;
2184 			g_topology_unlock();
2185 
2186 			/* Read disk serial. */
2187 			error = g_raid_md_get_label(cp,
2188 			    &serial[0], INTEL_SERIAL_LEN);
2189 			if (error != 0) {
2190 				gctl_error(req,
2191 				    "Can't get serial for provider '%s'.",
2192 				    diskname);
2193 				g_raid_kill_consumer(sc, cp);
2194 				error = -7;
2195 				break;
2196 			}
2197 
2198 			pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
2199 			pd->pd_disk_pos = -1;
2200 
2201 			disk = g_raid_create_disk(sc);
2202 			disk->d_consumer = cp;
2203 			disk->d_md_data = (void *)pd;
2204 			cp->private = disk;
2205 
2206 			g_raid_get_disk_info(disk);
2207 
2208 			memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
2209 			    INTEL_SERIAL_LEN);
2210 			intel_set_disk_sectors(&pd->pd_disk_meta,
2211 			    pp->mediasize / pp->sectorsize);
2212 			pd->pd_disk_meta.id = 0;
2213 			pd->pd_disk_meta.flags = INTEL_F_SPARE;
2214 
2215 			/* Welcome the "new" disk. */
2216 			update += g_raid_md_intel_start_disk(disk);
2217 			if (disk->d_state == G_RAID_DISK_S_SPARE) {
2218 				intel_meta_write_spare(cp, &pd->pd_disk_meta);
2219 				g_raid_destroy_disk(disk);
2220 			} else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
2221 				gctl_error(req, "Disk '%s' doesn't fit.",
2222 				    diskname);
2223 				g_raid_destroy_disk(disk);
2224 				error = -8;
2225 				break;
2226 			}
2227 		}
2228 
2229 		/* Write new metadata if we changed something. */
2230 		if (update)
2231 			g_raid_md_write_intel(md, NULL, NULL, NULL);
2232 		return (error);
2233 	}
2234 	return (-100);
2235 }
2236 
2237 static int
2238 g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
2239     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2240 {
2241 	struct g_raid_softc *sc;
2242 	struct g_raid_volume *vol;
2243 	struct g_raid_subdisk *sd;
2244 	struct g_raid_disk *disk;
2245 	struct g_raid_md_intel_object *mdi;
2246 	struct g_raid_md_intel_pervolume *pv;
2247 	struct g_raid_md_intel_perdisk *pd;
2248 	struct intel_raid_conf *meta;
2249 	struct intel_raid_vol *mvol;
2250 	struct intel_raid_map *mmap0, *mmap1;
2251 	off_t sectorsize = 512, pos;
2252 	const char *version, *cv;
2253 	int vi, sdi, numdisks, len, state, stale;
2254 
2255 	sc = md->mdo_softc;
2256 	mdi = (struct g_raid_md_intel_object *)md;
2257 
2258 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
2259 		return (0);
2260 
2261 	/* Bump generation. Newly written metadata may differ from previous. */
2262 	mdi->mdio_generation++;
2263 
2264 	/* Count number of disks. */
2265 	numdisks = 0;
2266 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2267 		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2268 		if (pd->pd_disk_pos < 0)
2269 			continue;
2270 		numdisks++;
2271 		if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
2272 			pd->pd_disk_meta.flags =
2273 			    INTEL_F_ONLINE | INTEL_F_ASSIGNED;
2274 		} else if (disk->d_state == G_RAID_DISK_S_FAILED) {
2275 			pd->pd_disk_meta.flags = INTEL_F_FAILED |
2276 			    INTEL_F_ASSIGNED;
2277 		} else if (disk->d_state == G_RAID_DISK_S_DISABLED) {
2278 			pd->pd_disk_meta.flags = INTEL_F_FAILED |
2279 			    INTEL_F_ASSIGNED | INTEL_F_DISABLED;
2280 		} else {
2281 			if (!(pd->pd_disk_meta.flags & INTEL_F_DISABLED))
2282 				pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
2283 			if (pd->pd_disk_meta.id != 0xffffffff) {
2284 				pd->pd_disk_meta.id = 0xffffffff;
2285 				len = strlen(pd->pd_disk_meta.serial);
2286 				len = min(len, INTEL_SERIAL_LEN - 3);
2287 				strcpy(pd->pd_disk_meta.serial + len, ":0");
2288 			}
2289 		}
2290 	}
2291 
2292 	/* Fill anchor and disks. */
2293 	meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
2294 	    M_MD_INTEL, M_WAITOK | M_ZERO);
2295 	memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
2296 	meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
2297 	meta->config_id = mdi->mdio_config_id;
2298 	meta->orig_config_id = mdi->mdio_orig_config_id;
2299 	meta->generation = mdi->mdio_generation;
2300 	meta->attributes = INTEL_ATTR_CHECKSUM;
2301 	meta->total_disks = numdisks;
2302 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2303 		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2304 		if (pd->pd_disk_pos < 0)
2305 			continue;
2306 		meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
2307 		if (pd->pd_disk_meta.sectors_hi != 0)
2308 			meta->attributes |= INTEL_ATTR_2TB_DISK;
2309 	}
2310 
2311 	/* Fill volumes and maps. */
2312 	vi = 0;
2313 	version = INTEL_VERSION_1000;
2314 	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2315 		pv = vol->v_md_data;
2316 		if (vol->v_stopping)
2317 			continue;
2318 		mvol = intel_get_volume(meta, vi);
2319 
2320 		/* New metadata may have different volumes order. */
2321 		pv->pv_volume_pos = vi;
2322 
2323 		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2324 			sd = &vol->v_subdisks[sdi];
2325 			if (sd->sd_disk != NULL)
2326 				break;
2327 		}
2328 		if (sdi >= vol->v_disks_count)
2329 			panic("No any filled subdisk in volume");
2330 		if (vol->v_mediasize >= 0x20000000000llu)
2331 			meta->attributes |= INTEL_ATTR_2TB;
2332 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2333 			meta->attributes |= INTEL_ATTR_RAID0;
2334 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2335 			meta->attributes |= INTEL_ATTR_RAID1;
2336 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2337 			meta->attributes |= INTEL_ATTR_RAID5;
2338 		else if ((vol->v_disks_count & 1) == 0)
2339 			meta->attributes |= INTEL_ATTR_RAID10;
2340 		else
2341 			meta->attributes |= INTEL_ATTR_RAID1E;
2342 		if (pv->pv_cng)
2343 			meta->attributes |= INTEL_ATTR_RAIDCNG;
2344 		if (vol->v_strip_size > 131072)
2345 			meta->attributes |= INTEL_ATTR_EXT_STRIP;
2346 
2347 		if (pv->pv_cng)
2348 			cv = INTEL_VERSION_1206;
2349 		else if (vol->v_disks_count > 4)
2350 			cv = INTEL_VERSION_1204;
2351 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2352 			cv = INTEL_VERSION_1202;
2353 		else if (vol->v_disks_count > 2)
2354 			cv = INTEL_VERSION_1201;
2355 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2356 			cv = INTEL_VERSION_1100;
2357 		else
2358 			cv = INTEL_VERSION_1000;
2359 		if (strcmp(cv, version) > 0)
2360 			version = cv;
2361 
2362 		strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
2363 		mvol->total_sectors = vol->v_mediasize / sectorsize;
2364 		mvol->state = (INTEL_ST_READ_COALESCING |
2365 		    INTEL_ST_WRITE_COALESCING);
2366 		mvol->tid = vol->v_global_id + 1;
2367 		if (pv->pv_cng) {
2368 			mvol->state |= INTEL_ST_CLONE_N_GO;
2369 			if (pv->pv_cng_man_sync)
2370 				mvol->state |= INTEL_ST_CLONE_MAN_SYNC;
2371 			mvol->cng_master_disk = pv->pv_cng_master_disk;
2372 			if (vol->v_subdisks[pv->pv_cng_master_disk].sd_state ==
2373 			    G_RAID_SUBDISK_S_NONE)
2374 				mvol->cng_state = INTEL_CNGST_MASTER_MISSING;
2375 			else if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL)
2376 				mvol->cng_state = INTEL_CNGST_NEEDS_UPDATE;
2377 			else
2378 				mvol->cng_state = INTEL_CNGST_UPDATED;
2379 		}
2380 
2381 		/* Check for any recovery in progress. */
2382 		state = G_RAID_SUBDISK_S_ACTIVE;
2383 		pos = 0x7fffffffffffffffllu;
2384 		stale = 0;
2385 		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2386 			sd = &vol->v_subdisks[sdi];
2387 			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
2388 				state = G_RAID_SUBDISK_S_REBUILD;
2389 			else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
2390 			    state != G_RAID_SUBDISK_S_REBUILD)
2391 				state = G_RAID_SUBDISK_S_RESYNC;
2392 			else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
2393 				stale = 1;
2394 			if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2395 			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
2396 			     sd->sd_rebuild_pos < pos)
2397 			        pos = sd->sd_rebuild_pos;
2398 		}
2399 		if (state == G_RAID_SUBDISK_S_REBUILD) {
2400 			mvol->migr_state = 1;
2401 			mvol->migr_type = INTEL_MT_REBUILD;
2402 		} else if (state == G_RAID_SUBDISK_S_RESYNC) {
2403 			mvol->migr_state = 1;
2404 			/* mvol->migr_type = INTEL_MT_REPAIR; */
2405 			mvol->migr_type = INTEL_MT_VERIFY;
2406 			mvol->state |= INTEL_ST_VERIFY_AND_FIX;
2407 		} else
2408 			mvol->migr_state = 0;
2409 		mvol->dirty = (vol->v_dirty || stale);
2410 
2411 		mmap0 = intel_get_map(mvol, 0);
2412 
2413 		/* Write map / common part of two maps. */
2414 		intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
2415 		intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
2416 		mmap0->strip_sectors = vol->v_strip_size / sectorsize;
2417 		if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
2418 			mmap0->status = INTEL_S_FAILURE;
2419 		else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
2420 			mmap0->status = INTEL_S_DEGRADED;
2421 		else if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED)
2422 		    == g_raid_nsubdisks(vol, -1))
2423 			mmap0->status = INTEL_S_UNINITIALIZED;
2424 		else
2425 			mmap0->status = INTEL_S_READY;
2426 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2427 			mmap0->type = INTEL_T_RAID0;
2428 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
2429 		    vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2430 			mmap0->type = INTEL_T_RAID1;
2431 		else
2432 			mmap0->type = INTEL_T_RAID5;
2433 		mmap0->total_disks = vol->v_disks_count;
2434 		if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2435 			mmap0->total_domains = vol->v_disks_count;
2436 		else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2437 			mmap0->total_domains = 2;
2438 		else
2439 			mmap0->total_domains = 1;
2440 		intel_set_map_stripe_count(mmap0,
2441 		    sd->sd_size / vol->v_strip_size / mmap0->total_domains);
2442 		mmap0->failed_disk_num = 0xff;
2443 		mmap0->ddf = 1;
2444 
2445 		/* If there are two maps - copy common and update. */
2446 		if (mvol->migr_state) {
2447 			intel_set_vol_curr_migr_unit(mvol,
2448 			    pos / vol->v_strip_size / mmap0->total_domains);
2449 			mmap1 = intel_get_map(mvol, 1);
2450 			memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
2451 			mmap0->status = INTEL_S_READY;
2452 		} else
2453 			mmap1 = NULL;
2454 
2455 		/* Write disk indexes and put rebuild flags. */
2456 		for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2457 			sd = &vol->v_subdisks[sdi];
2458 			pd = (struct g_raid_md_intel_perdisk *)
2459 			    sd->sd_disk->d_md_data;
2460 			mmap0->disk_idx[sdi] = pd->pd_disk_pos;
2461 			if (mvol->migr_state)
2462 				mmap1->disk_idx[sdi] = pd->pd_disk_pos;
2463 			if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2464 			    sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
2465 				mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2466 			} else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
2467 			    sd->sd_state != G_RAID_SUBDISK_S_STALE &&
2468 			    sd->sd_state != G_RAID_SUBDISK_S_UNINITIALIZED) {
2469 				mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
2470 				if (mvol->migr_state)
2471 					mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2472 			}
2473 			if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
2474 			     sd->sd_state == G_RAID_SUBDISK_S_FAILED ||
2475 			     sd->sd_state == G_RAID_SUBDISK_S_REBUILD) &&
2476 			    mmap0->failed_disk_num == 0xff) {
2477 				mmap0->failed_disk_num = sdi;
2478 				if (mvol->migr_state)
2479 					mmap1->failed_disk_num = sdi;
2480 			}
2481 		}
2482 		vi++;
2483 	}
2484 	meta->total_volumes = vi;
2485 	if (vi > 1 || meta->attributes &
2486 	     (INTEL_ATTR_EXT_STRIP | INTEL_ATTR_2TB_DISK | INTEL_ATTR_2TB))
2487 		version = INTEL_VERSION_1300;
2488 	if (strcmp(version, INTEL_VERSION_1300) < 0)
2489 		meta->attributes &= INTEL_ATTR_CHECKSUM;
2490 	memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
2491 
2492 	/* We are done. Print meta data and store them to disks. */
2493 	g_raid_md_intel_print(meta);
2494 	if (mdi->mdio_meta != NULL)
2495 		free(mdi->mdio_meta, M_MD_INTEL);
2496 	mdi->mdio_meta = meta;
2497 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2498 		pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2499 		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
2500 			continue;
2501 		if (pd->pd_meta != NULL) {
2502 			free(pd->pd_meta, M_MD_INTEL);
2503 			pd->pd_meta = NULL;
2504 		}
2505 		pd->pd_meta = intel_meta_copy(meta);
2506 		intel_meta_write(disk->d_consumer, meta);
2507 	}
2508 	return (0);
2509 }
2510 
2511 static int
2512 g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
2513     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2514 {
2515 	struct g_raid_softc *sc;
2516 	struct g_raid_md_intel_object *mdi;
2517 	struct g_raid_md_intel_perdisk *pd;
2518 	struct g_raid_subdisk *sd;
2519 
2520 	sc = md->mdo_softc;
2521 	mdi = (struct g_raid_md_intel_object *)md;
2522 	pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
2523 
2524 	/* We can't fail disk that is not a part of array now. */
2525 	if (pd->pd_disk_pos < 0)
2526 		return (-1);
2527 
2528 	/*
2529 	 * Mark disk as failed in metadata and try to write that metadata
2530 	 * to the disk itself to prevent it's later resurrection as STALE.
2531 	 */
2532 	mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
2533 	pd->pd_disk_meta.flags = INTEL_F_FAILED;
2534 	g_raid_md_intel_print(mdi->mdio_meta);
2535 	if (tdisk->d_consumer)
2536 		intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
2537 
2538 	/* Change states. */
2539 	g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
2540 	TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
2541 		g_raid_change_subdisk_state(sd,
2542 		    G_RAID_SUBDISK_S_FAILED);
2543 		g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
2544 		    G_RAID_EVENT_SUBDISK);
2545 	}
2546 
2547 	/* Write updated metadata to remaining disks. */
2548 	g_raid_md_write_intel(md, NULL, NULL, tdisk);
2549 
2550 	/* Check if anything left except placeholders. */
2551 	if (g_raid_ndisks(sc, -1) ==
2552 	    g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2553 		g_raid_destroy_node(sc, 0);
2554 	else
2555 		g_raid_md_intel_refill(sc);
2556 	return (0);
2557 }
2558 
2559 static int
2560 g_raid_md_free_disk_intel(struct g_raid_md_object *md,
2561     struct g_raid_disk *disk)
2562 {
2563 	struct g_raid_md_intel_perdisk *pd;
2564 
2565 	pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2566 	if (pd->pd_meta != NULL) {
2567 		free(pd->pd_meta, M_MD_INTEL);
2568 		pd->pd_meta = NULL;
2569 	}
2570 	free(pd, M_MD_INTEL);
2571 	disk->d_md_data = NULL;
2572 	return (0);
2573 }
2574 
2575 static int
2576 g_raid_md_free_volume_intel(struct g_raid_md_object *md,
2577     struct g_raid_volume *vol)
2578 {
2579 	struct g_raid_md_intel_pervolume *pv;
2580 
2581 	pv = (struct g_raid_md_intel_pervolume *)vol->v_md_data;
2582 	free(pv, M_MD_INTEL);
2583 	vol->v_md_data = NULL;
2584 	return (0);
2585 }
2586 
2587 static int
2588 g_raid_md_free_intel(struct g_raid_md_object *md)
2589 {
2590 	struct g_raid_md_intel_object *mdi;
2591 
2592 	mdi = (struct g_raid_md_intel_object *)md;
2593 	if (!mdi->mdio_started) {
2594 		mdi->mdio_started = 0;
2595 		callout_stop(&mdi->mdio_start_co);
2596 		G_RAID_DEBUG1(1, md->mdo_softc,
2597 		    "root_mount_rel %p", mdi->mdio_rootmount);
2598 		root_mount_rel(mdi->mdio_rootmount);
2599 		mdi->mdio_rootmount = NULL;
2600 	}
2601 	if (mdi->mdio_meta != NULL) {
2602 		free(mdi->mdio_meta, M_MD_INTEL);
2603 		mdi->mdio_meta = NULL;
2604 	}
2605 	return (0);
2606 }
2607 
2608 G_RAID_MD_DECLARE(intel, "Intel");
2609