xref: /linux/drivers/md/raid0.c (revision 2975489458c59ce2e348b1b3aef5d8d2acb5cc8d)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3    raid0.c : Multiple Devices driver for Linux
4 	     Copyright (C) 1994-96 Marc ZYNGIER
5 	     <zyngier@ufr-info-p7.ibp.fr> or
6 	     <maz@gloups.fdn.fr>
7 	     Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
8 
9    RAID-0 management functions.
10 
11 */
12 
13 #include <linux/blkdev.h>
14 #include <linux/seq_file.h>
15 #include <linux/module.h>
16 #include <linux/slab.h>
17 #include <trace/events/block.h>
18 #include "md.h"
19 #include "raid0.h"
20 #include "raid5.h"
21 
22 static int default_layout = 0;
23 module_param(default_layout, int, 0644);
24 
25 #define UNSUPPORTED_MDDEV_FLAGS		\
26 	((1L << MD_HAS_JOURNAL) |	\
27 	 (1L << MD_JOURNAL_CLEAN) |	\
28 	 (1L << MD_FAILFAST_SUPPORTED) |\
29 	 (1L << MD_HAS_PPL) |		\
30 	 (1L << MD_HAS_MULTIPLE_PPLS))
31 
32 static int raid0_congested(struct mddev *mddev, int bits)
33 {
34 	struct r0conf *conf = mddev->private;
35 	struct md_rdev **devlist = conf->devlist;
36 	int raid_disks = conf->strip_zone[0].nb_dev;
37 	int i, ret = 0;
38 
39 	for (i = 0; i < raid_disks && !ret ; i++) {
40 		struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
41 
42 		ret |= bdi_congested(q->backing_dev_info, bits);
43 	}
44 	return ret;
45 }
46 
47 /*
48  * inform the user of the raid configuration
49 */
50 static void dump_zones(struct mddev *mddev)
51 {
52 	int j, k;
53 	sector_t zone_size = 0;
54 	sector_t zone_start = 0;
55 	char b[BDEVNAME_SIZE];
56 	struct r0conf *conf = mddev->private;
57 	int raid_disks = conf->strip_zone[0].nb_dev;
58 	pr_debug("md: RAID0 configuration for %s - %d zone%s\n",
59 		 mdname(mddev),
60 		 conf->nr_strip_zones, conf->nr_strip_zones==1?"":"s");
61 	for (j = 0; j < conf->nr_strip_zones; j++) {
62 		char line[200];
63 		int len = 0;
64 
65 		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
66 			len += snprintf(line+len, 200-len, "%s%s", k?"/":"",
67 					bdevname(conf->devlist[j*raid_disks
68 							       + k]->bdev, b));
69 		pr_debug("md: zone%d=[%s]\n", j, line);
70 
71 		zone_size  = conf->strip_zone[j].zone_end - zone_start;
72 		pr_debug("      zone-offset=%10lluKB, device-offset=%10lluKB, size=%10lluKB\n",
73 			(unsigned long long)zone_start>>1,
74 			(unsigned long long)conf->strip_zone[j].dev_start>>1,
75 			(unsigned long long)zone_size>>1);
76 		zone_start = conf->strip_zone[j].zone_end;
77 	}
78 }
79 
80 static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
81 {
82 	int i, c, err;
83 	sector_t curr_zone_end, sectors;
84 	struct md_rdev *smallest, *rdev1, *rdev2, *rdev, **dev;
85 	struct strip_zone *zone;
86 	int cnt;
87 	char b[BDEVNAME_SIZE];
88 	char b2[BDEVNAME_SIZE];
89 	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
90 	unsigned short blksize = 512;
91 
92 	*private_conf = ERR_PTR(-ENOMEM);
93 	if (!conf)
94 		return -ENOMEM;
95 	rdev_for_each(rdev1, mddev) {
96 		pr_debug("md/raid0:%s: looking at %s\n",
97 			 mdname(mddev),
98 			 bdevname(rdev1->bdev, b));
99 		c = 0;
100 
101 		/* round size to chunk_size */
102 		sectors = rdev1->sectors;
103 		sector_div(sectors, mddev->chunk_sectors);
104 		rdev1->sectors = sectors * mddev->chunk_sectors;
105 
106 		blksize = max(blksize, queue_logical_block_size(
107 				      rdev1->bdev->bd_disk->queue));
108 
109 		rdev_for_each(rdev2, mddev) {
110 			pr_debug("md/raid0:%s:   comparing %s(%llu)"
111 				 " with %s(%llu)\n",
112 				 mdname(mddev),
113 				 bdevname(rdev1->bdev,b),
114 				 (unsigned long long)rdev1->sectors,
115 				 bdevname(rdev2->bdev,b2),
116 				 (unsigned long long)rdev2->sectors);
117 			if (rdev2 == rdev1) {
118 				pr_debug("md/raid0:%s:   END\n",
119 					 mdname(mddev));
120 				break;
121 			}
122 			if (rdev2->sectors == rdev1->sectors) {
123 				/*
124 				 * Not unique, don't count it as a new
125 				 * group
126 				 */
127 				pr_debug("md/raid0:%s:   EQUAL\n",
128 					 mdname(mddev));
129 				c = 1;
130 				break;
131 			}
132 			pr_debug("md/raid0:%s:   NOT EQUAL\n",
133 				 mdname(mddev));
134 		}
135 		if (!c) {
136 			pr_debug("md/raid0:%s:   ==> UNIQUE\n",
137 				 mdname(mddev));
138 			conf->nr_strip_zones++;
139 			pr_debug("md/raid0:%s: %d zones\n",
140 				 mdname(mddev), conf->nr_strip_zones);
141 		}
142 	}
143 	pr_debug("md/raid0:%s: FINAL %d zones\n",
144 		 mdname(mddev), conf->nr_strip_zones);
145 
146 	if (conf->nr_strip_zones == 1) {
147 		conf->layout = RAID0_ORIG_LAYOUT;
148 	} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
149 		   mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
150 		conf->layout = mddev->layout;
151 	} else if (default_layout == RAID0_ORIG_LAYOUT ||
152 		   default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
153 		conf->layout = default_layout;
154 	} else {
155 		pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
156 		       mdname(mddev));
157 		pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
158 		err = -ENOTSUPP;
159 		goto abort;
160 	}
161 	/*
162 	 * now since we have the hard sector sizes, we can make sure
163 	 * chunk size is a multiple of that sector size
164 	 */
165 	if ((mddev->chunk_sectors << 9) % blksize) {
166 		pr_warn("md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
167 			mdname(mddev),
168 			mddev->chunk_sectors << 9, blksize);
169 		err = -EINVAL;
170 		goto abort;
171 	}
172 
173 	err = -ENOMEM;
174 	conf->strip_zone = kcalloc(conf->nr_strip_zones,
175 				   sizeof(struct strip_zone),
176 				   GFP_KERNEL);
177 	if (!conf->strip_zone)
178 		goto abort;
179 	conf->devlist = kzalloc(array3_size(sizeof(struct md_rdev *),
180 					    conf->nr_strip_zones,
181 					    mddev->raid_disks),
182 				GFP_KERNEL);
183 	if (!conf->devlist)
184 		goto abort;
185 
186 	/* The first zone must contain all devices, so here we check that
187 	 * there is a proper alignment of slots to devices and find them all
188 	 */
189 	zone = &conf->strip_zone[0];
190 	cnt = 0;
191 	smallest = NULL;
192 	dev = conf->devlist;
193 	err = -EINVAL;
194 	rdev_for_each(rdev1, mddev) {
195 		int j = rdev1->raid_disk;
196 
197 		if (mddev->level == 10) {
198 			/* taking over a raid10-n2 array */
199 			j /= 2;
200 			rdev1->new_raid_disk = j;
201 		}
202 
203 		if (mddev->level == 1) {
204 			/* taiking over a raid1 array-
205 			 * we have only one active disk
206 			 */
207 			j = 0;
208 			rdev1->new_raid_disk = j;
209 		}
210 
211 		if (j < 0) {
212 			pr_warn("md/raid0:%s: remove inactive devices before converting to RAID0\n",
213 				mdname(mddev));
214 			goto abort;
215 		}
216 		if (j >= mddev->raid_disks) {
217 			pr_warn("md/raid0:%s: bad disk number %d - aborting!\n",
218 				mdname(mddev), j);
219 			goto abort;
220 		}
221 		if (dev[j]) {
222 			pr_warn("md/raid0:%s: multiple devices for %d - aborting!\n",
223 				mdname(mddev), j);
224 			goto abort;
225 		}
226 		dev[j] = rdev1;
227 
228 		if (!smallest || (rdev1->sectors < smallest->sectors))
229 			smallest = rdev1;
230 		cnt++;
231 	}
232 	if (cnt != mddev->raid_disks) {
233 		pr_warn("md/raid0:%s: too few disks (%d of %d) - aborting!\n",
234 			mdname(mddev), cnt, mddev->raid_disks);
235 		goto abort;
236 	}
237 	zone->nb_dev = cnt;
238 	zone->zone_end = smallest->sectors * cnt;
239 
240 	curr_zone_end = zone->zone_end;
241 
242 	/* now do the other zones */
243 	for (i = 1; i < conf->nr_strip_zones; i++)
244 	{
245 		int j;
246 
247 		zone = conf->strip_zone + i;
248 		dev = conf->devlist + i * mddev->raid_disks;
249 
250 		pr_debug("md/raid0:%s: zone %d\n", mdname(mddev), i);
251 		zone->dev_start = smallest->sectors;
252 		smallest = NULL;
253 		c = 0;
254 
255 		for (j=0; j<cnt; j++) {
256 			rdev = conf->devlist[j];
257 			if (rdev->sectors <= zone->dev_start) {
258 				pr_debug("md/raid0:%s: checking %s ... nope\n",
259 					 mdname(mddev),
260 					 bdevname(rdev->bdev, b));
261 				continue;
262 			}
263 			pr_debug("md/raid0:%s: checking %s ..."
264 				 " contained as device %d\n",
265 				 mdname(mddev),
266 				 bdevname(rdev->bdev, b), c);
267 			dev[c] = rdev;
268 			c++;
269 			if (!smallest || rdev->sectors < smallest->sectors) {
270 				smallest = rdev;
271 				pr_debug("md/raid0:%s:  (%llu) is smallest!.\n",
272 					 mdname(mddev),
273 					 (unsigned long long)rdev->sectors);
274 			}
275 		}
276 
277 		zone->nb_dev = c;
278 		sectors = (smallest->sectors - zone->dev_start) * c;
279 		pr_debug("md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
280 			 mdname(mddev),
281 			 zone->nb_dev, (unsigned long long)sectors);
282 
283 		curr_zone_end += sectors;
284 		zone->zone_end = curr_zone_end;
285 
286 		pr_debug("md/raid0:%s: current zone start: %llu\n",
287 			 mdname(mddev),
288 			 (unsigned long long)smallest->sectors);
289 	}
290 
291 	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
292 	*private_conf = conf;
293 
294 	return 0;
295 abort:
296 	kfree(conf->strip_zone);
297 	kfree(conf->devlist);
298 	kfree(conf);
299 	*private_conf = ERR_PTR(err);
300 	return err;
301 }
302 
303 /* Find the zone which holds a particular offset
304  * Update *sectorp to be an offset in that zone
305  */
306 static struct strip_zone *find_zone(struct r0conf *conf,
307 				    sector_t *sectorp)
308 {
309 	int i;
310 	struct strip_zone *z = conf->strip_zone;
311 	sector_t sector = *sectorp;
312 
313 	for (i = 0; i < conf->nr_strip_zones; i++)
314 		if (sector < z[i].zone_end) {
315 			if (i)
316 				*sectorp = sector - z[i-1].zone_end;
317 			return z + i;
318 		}
319 	BUG();
320 }
321 
322 /*
323  * remaps the bio to the target device. we separate two flows.
324  * power 2 flow and a general flow for the sake of performance
325 */
326 static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
327 				sector_t sector, sector_t *sector_offset)
328 {
329 	unsigned int sect_in_chunk;
330 	sector_t chunk;
331 	struct r0conf *conf = mddev->private;
332 	int raid_disks = conf->strip_zone[0].nb_dev;
333 	unsigned int chunk_sects = mddev->chunk_sectors;
334 
335 	if (is_power_of_2(chunk_sects)) {
336 		int chunksect_bits = ffz(~chunk_sects);
337 		/* find the sector offset inside the chunk */
338 		sect_in_chunk  = sector & (chunk_sects - 1);
339 		sector >>= chunksect_bits;
340 		/* chunk in zone */
341 		chunk = *sector_offset;
342 		/* quotient is the chunk in real device*/
343 		sector_div(chunk, zone->nb_dev << chunksect_bits);
344 	} else{
345 		sect_in_chunk = sector_div(sector, chunk_sects);
346 		chunk = *sector_offset;
347 		sector_div(chunk, chunk_sects * zone->nb_dev);
348 	}
349 	/*
350 	*  position the bio over the real device
351 	*  real sector = chunk in device + starting of zone
352 	*	+ the position in the chunk
353 	*/
354 	*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
355 	return conf->devlist[(zone - conf->strip_zone)*raid_disks
356 			     + sector_div(sector, zone->nb_dev)];
357 }
358 
359 static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks)
360 {
361 	sector_t array_sectors = 0;
362 	struct md_rdev *rdev;
363 
364 	WARN_ONCE(sectors || raid_disks,
365 		  "%s does not support generic reshape\n", __func__);
366 
367 	rdev_for_each(rdev, mddev)
368 		array_sectors += (rdev->sectors &
369 				  ~(sector_t)(mddev->chunk_sectors-1));
370 
371 	return array_sectors;
372 }
373 
374 static void raid0_free(struct mddev *mddev, void *priv);
375 
376 static int raid0_run(struct mddev *mddev)
377 {
378 	struct r0conf *conf;
379 	int ret;
380 
381 	if (mddev->chunk_sectors == 0) {
382 		pr_warn("md/raid0:%s: chunk size must be set.\n", mdname(mddev));
383 		return -EINVAL;
384 	}
385 	if (md_check_no_bitmap(mddev))
386 		return -EINVAL;
387 
388 	/* if private is not null, we are here after takeover */
389 	if (mddev->private == NULL) {
390 		ret = create_strip_zones(mddev, &conf);
391 		if (ret < 0)
392 			return ret;
393 		mddev->private = conf;
394 	}
395 	conf = mddev->private;
396 	if (mddev->queue) {
397 		struct md_rdev *rdev;
398 		bool discard_supported = false;
399 
400 		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
401 		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
402 		blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
403 		blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
404 
405 		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
406 		blk_queue_io_opt(mddev->queue,
407 				 (mddev->chunk_sectors << 9) * mddev->raid_disks);
408 
409 		rdev_for_each(rdev, mddev) {
410 			disk_stack_limits(mddev->gendisk, rdev->bdev,
411 					  rdev->data_offset << 9);
412 			if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
413 				discard_supported = true;
414 		}
415 		if (!discard_supported)
416 			blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
417 		else
418 			blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
419 	}
420 
421 	/* calculate array device size */
422 	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
423 
424 	pr_debug("md/raid0:%s: md_size is %llu sectors.\n",
425 		 mdname(mddev),
426 		 (unsigned long long)mddev->array_sectors);
427 
428 	if (mddev->queue) {
429 		/* calculate the max read-ahead size.
430 		 * For read-ahead of large files to be effective, we need to
431 		 * readahead at least twice a whole stripe. i.e. number of devices
432 		 * multiplied by chunk size times 2.
433 		 * If an individual device has an ra_pages greater than the
434 		 * chunk size, then we will not drive that device as hard as it
435 		 * wants.  We consider this a configuration error: a larger
436 		 * chunksize should be used in that case.
437 		 */
438 		int stripe = mddev->raid_disks *
439 			(mddev->chunk_sectors << 9) / PAGE_SIZE;
440 		if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
441 			mddev->queue->backing_dev_info->ra_pages = 2* stripe;
442 	}
443 
444 	dump_zones(mddev);
445 
446 	ret = md_integrity_register(mddev);
447 
448 	return ret;
449 }
450 
451 static void raid0_free(struct mddev *mddev, void *priv)
452 {
453 	struct r0conf *conf = priv;
454 
455 	kfree(conf->strip_zone);
456 	kfree(conf->devlist);
457 	kfree(conf);
458 }
459 
460 /*
461  * Is io distribute over 1 or more chunks ?
462 */
463 static inline int is_io_in_chunk_boundary(struct mddev *mddev,
464 			unsigned int chunk_sects, struct bio *bio)
465 {
466 	if (likely(is_power_of_2(chunk_sects))) {
467 		return chunk_sects >=
468 			((bio->bi_iter.bi_sector & (chunk_sects-1))
469 					+ bio_sectors(bio));
470 	} else{
471 		sector_t sector = bio->bi_iter.bi_sector;
472 		return chunk_sects >= (sector_div(sector, chunk_sects)
473 						+ bio_sectors(bio));
474 	}
475 }
476 
477 static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
478 {
479 	struct r0conf *conf = mddev->private;
480 	struct strip_zone *zone;
481 	sector_t start = bio->bi_iter.bi_sector;
482 	sector_t end;
483 	unsigned int stripe_size;
484 	sector_t first_stripe_index, last_stripe_index;
485 	sector_t start_disk_offset;
486 	unsigned int start_disk_index;
487 	sector_t end_disk_offset;
488 	unsigned int end_disk_index;
489 	unsigned int disk;
490 
491 	zone = find_zone(conf, &start);
492 
493 	if (bio_end_sector(bio) > zone->zone_end) {
494 		struct bio *split = bio_split(bio,
495 			zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
496 			&mddev->bio_set);
497 		bio_chain(split, bio);
498 		generic_make_request(bio);
499 		bio = split;
500 		end = zone->zone_end;
501 	} else
502 		end = bio_end_sector(bio);
503 
504 	if (zone != conf->strip_zone)
505 		end = end - zone[-1].zone_end;
506 
507 	/* Now start and end is the offset in zone */
508 	stripe_size = zone->nb_dev * mddev->chunk_sectors;
509 
510 	first_stripe_index = start;
511 	sector_div(first_stripe_index, stripe_size);
512 	last_stripe_index = end;
513 	sector_div(last_stripe_index, stripe_size);
514 
515 	start_disk_index = (int)(start - first_stripe_index * stripe_size) /
516 		mddev->chunk_sectors;
517 	start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
518 		mddev->chunk_sectors) +
519 		first_stripe_index * mddev->chunk_sectors;
520 	end_disk_index = (int)(end - last_stripe_index * stripe_size) /
521 		mddev->chunk_sectors;
522 	end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
523 		mddev->chunk_sectors) +
524 		last_stripe_index * mddev->chunk_sectors;
525 
526 	for (disk = 0; disk < zone->nb_dev; disk++) {
527 		sector_t dev_start, dev_end;
528 		struct bio *discard_bio = NULL;
529 		struct md_rdev *rdev;
530 
531 		if (disk < start_disk_index)
532 			dev_start = (first_stripe_index + 1) *
533 				mddev->chunk_sectors;
534 		else if (disk > start_disk_index)
535 			dev_start = first_stripe_index * mddev->chunk_sectors;
536 		else
537 			dev_start = start_disk_offset;
538 
539 		if (disk < end_disk_index)
540 			dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
541 		else if (disk > end_disk_index)
542 			dev_end = last_stripe_index * mddev->chunk_sectors;
543 		else
544 			dev_end = end_disk_offset;
545 
546 		if (dev_end <= dev_start)
547 			continue;
548 
549 		rdev = conf->devlist[(zone - conf->strip_zone) *
550 			conf->strip_zone[0].nb_dev + disk];
551 		if (__blkdev_issue_discard(rdev->bdev,
552 			dev_start + zone->dev_start + rdev->data_offset,
553 			dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
554 		    !discard_bio)
555 			continue;
556 		bio_chain(discard_bio, bio);
557 		bio_clone_blkg_association(discard_bio, bio);
558 		if (mddev->gendisk)
559 			trace_block_bio_remap(bdev_get_queue(rdev->bdev),
560 				discard_bio, disk_devt(mddev->gendisk),
561 				bio->bi_iter.bi_sector);
562 		generic_make_request(discard_bio);
563 	}
564 	bio_endio(bio);
565 }
566 
567 static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
568 {
569 	struct r0conf *conf = mddev->private;
570 	struct strip_zone *zone;
571 	struct md_rdev *tmp_dev;
572 	sector_t bio_sector;
573 	sector_t sector;
574 	sector_t orig_sector;
575 	unsigned chunk_sects;
576 	unsigned sectors;
577 
578 	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
579 		md_flush_request(mddev, bio);
580 		return true;
581 	}
582 
583 	if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
584 		raid0_handle_discard(mddev, bio);
585 		return true;
586 	}
587 
588 	bio_sector = bio->bi_iter.bi_sector;
589 	sector = bio_sector;
590 	chunk_sects = mddev->chunk_sectors;
591 
592 	sectors = chunk_sects -
593 		(likely(is_power_of_2(chunk_sects))
594 		 ? (sector & (chunk_sects-1))
595 		 : sector_div(sector, chunk_sects));
596 
597 	/* Restore due to sector_div */
598 	sector = bio_sector;
599 
600 	if (sectors < bio_sectors(bio)) {
601 		struct bio *split = bio_split(bio, sectors, GFP_NOIO,
602 					      &mddev->bio_set);
603 		bio_chain(split, bio);
604 		generic_make_request(bio);
605 		bio = split;
606 	}
607 
608 	orig_sector = sector;
609 	zone = find_zone(mddev->private, &sector);
610 	switch (conf->layout) {
611 	case RAID0_ORIG_LAYOUT:
612 		tmp_dev = map_sector(mddev, zone, orig_sector, &sector);
613 		break;
614 	case RAID0_ALT_MULTIZONE_LAYOUT:
615 		tmp_dev = map_sector(mddev, zone, sector, &sector);
616 		break;
617 	default:
618 		WARN("md/raid0:%s: Invalid layout\n", mdname(mddev));
619 		bio_io_error(bio);
620 		return true;
621 	}
622 
623 	if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
624 		bio_io_error(bio);
625 		return true;
626 	}
627 
628 	bio_set_dev(bio, tmp_dev->bdev);
629 	bio->bi_iter.bi_sector = sector + zone->dev_start +
630 		tmp_dev->data_offset;
631 
632 	if (mddev->gendisk)
633 		trace_block_bio_remap(bio->bi_disk->queue, bio,
634 				disk_devt(mddev->gendisk), bio_sector);
635 	mddev_check_writesame(mddev, bio);
636 	mddev_check_write_zeroes(mddev, bio);
637 	generic_make_request(bio);
638 	return true;
639 }
640 
641 static void raid0_status(struct seq_file *seq, struct mddev *mddev)
642 {
643 	seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
644 	return;
645 }
646 
647 static void *raid0_takeover_raid45(struct mddev *mddev)
648 {
649 	struct md_rdev *rdev;
650 	struct r0conf *priv_conf;
651 
652 	if (mddev->degraded != 1) {
653 		pr_warn("md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
654 			mdname(mddev),
655 			mddev->degraded);
656 		return ERR_PTR(-EINVAL);
657 	}
658 
659 	rdev_for_each(rdev, mddev) {
660 		/* check slot number for a disk */
661 		if (rdev->raid_disk == mddev->raid_disks-1) {
662 			pr_warn("md/raid0:%s: raid5 must have missing parity disk!\n",
663 				mdname(mddev));
664 			return ERR_PTR(-EINVAL);
665 		}
666 		rdev->sectors = mddev->dev_sectors;
667 	}
668 
669 	/* Set new parameters */
670 	mddev->new_level = 0;
671 	mddev->new_layout = 0;
672 	mddev->new_chunk_sectors = mddev->chunk_sectors;
673 	mddev->raid_disks--;
674 	mddev->delta_disks = -1;
675 	/* make sure it will be not marked as dirty */
676 	mddev->recovery_cp = MaxSector;
677 	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
678 
679 	create_strip_zones(mddev, &priv_conf);
680 
681 	return priv_conf;
682 }
683 
684 static void *raid0_takeover_raid10(struct mddev *mddev)
685 {
686 	struct r0conf *priv_conf;
687 
688 	/* Check layout:
689 	 *  - far_copies must be 1
690 	 *  - near_copies must be 2
691 	 *  - disks number must be even
692 	 *  - all mirrors must be already degraded
693 	 */
694 	if (mddev->layout != ((1 << 8) + 2)) {
695 		pr_warn("md/raid0:%s:: Raid0 cannot takeover layout: 0x%x\n",
696 			mdname(mddev),
697 			mddev->layout);
698 		return ERR_PTR(-EINVAL);
699 	}
700 	if (mddev->raid_disks & 1) {
701 		pr_warn("md/raid0:%s: Raid0 cannot takeover Raid10 with odd disk number.\n",
702 			mdname(mddev));
703 		return ERR_PTR(-EINVAL);
704 	}
705 	if (mddev->degraded != (mddev->raid_disks>>1)) {
706 		pr_warn("md/raid0:%s: All mirrors must be already degraded!\n",
707 			mdname(mddev));
708 		return ERR_PTR(-EINVAL);
709 	}
710 
711 	/* Set new parameters */
712 	mddev->new_level = 0;
713 	mddev->new_layout = 0;
714 	mddev->new_chunk_sectors = mddev->chunk_sectors;
715 	mddev->delta_disks = - mddev->raid_disks / 2;
716 	mddev->raid_disks += mddev->delta_disks;
717 	mddev->degraded = 0;
718 	/* make sure it will be not marked as dirty */
719 	mddev->recovery_cp = MaxSector;
720 	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
721 
722 	create_strip_zones(mddev, &priv_conf);
723 	return priv_conf;
724 }
725 
726 static void *raid0_takeover_raid1(struct mddev *mddev)
727 {
728 	struct r0conf *priv_conf;
729 	int chunksect;
730 
731 	/* Check layout:
732 	 *  - (N - 1) mirror drives must be already faulty
733 	 */
734 	if ((mddev->raid_disks - 1) != mddev->degraded) {
735 		pr_err("md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
736 		       mdname(mddev));
737 		return ERR_PTR(-EINVAL);
738 	}
739 
740 	/*
741 	 * a raid1 doesn't have the notion of chunk size, so
742 	 * figure out the largest suitable size we can use.
743 	 */
744 	chunksect = 64 * 2; /* 64K by default */
745 
746 	/* The array must be an exact multiple of chunksize */
747 	while (chunksect && (mddev->array_sectors & (chunksect - 1)))
748 		chunksect >>= 1;
749 
750 	if ((chunksect << 9) < PAGE_SIZE)
751 		/* array size does not allow a suitable chunk size */
752 		return ERR_PTR(-EINVAL);
753 
754 	/* Set new parameters */
755 	mddev->new_level = 0;
756 	mddev->new_layout = 0;
757 	mddev->new_chunk_sectors = chunksect;
758 	mddev->chunk_sectors = chunksect;
759 	mddev->delta_disks = 1 - mddev->raid_disks;
760 	mddev->raid_disks = 1;
761 	/* make sure it will be not marked as dirty */
762 	mddev->recovery_cp = MaxSector;
763 	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
764 
765 	create_strip_zones(mddev, &priv_conf);
766 	return priv_conf;
767 }
768 
769 static void *raid0_takeover(struct mddev *mddev)
770 {
771 	/* raid0 can take over:
772 	 *  raid4 - if all data disks are active.
773 	 *  raid5 - providing it is Raid4 layout and one disk is faulty
774 	 *  raid10 - assuming we have all necessary active disks
775 	 *  raid1 - with (N -1) mirror drives faulty
776 	 */
777 
778 	if (mddev->bitmap) {
779 		pr_warn("md/raid0: %s: cannot takeover array with bitmap\n",
780 			mdname(mddev));
781 		return ERR_PTR(-EBUSY);
782 	}
783 	if (mddev->level == 4)
784 		return raid0_takeover_raid45(mddev);
785 
786 	if (mddev->level == 5) {
787 		if (mddev->layout == ALGORITHM_PARITY_N)
788 			return raid0_takeover_raid45(mddev);
789 
790 		pr_warn("md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
791 			mdname(mddev), ALGORITHM_PARITY_N);
792 	}
793 
794 	if (mddev->level == 10)
795 		return raid0_takeover_raid10(mddev);
796 
797 	if (mddev->level == 1)
798 		return raid0_takeover_raid1(mddev);
799 
800 	pr_warn("Takeover from raid%i to raid0 not supported\n",
801 		mddev->level);
802 
803 	return ERR_PTR(-EINVAL);
804 }
805 
806 static void raid0_quiesce(struct mddev *mddev, int quiesce)
807 {
808 }
809 
810 static struct md_personality raid0_personality=
811 {
812 	.name		= "raid0",
813 	.level		= 0,
814 	.owner		= THIS_MODULE,
815 	.make_request	= raid0_make_request,
816 	.run		= raid0_run,
817 	.free		= raid0_free,
818 	.status		= raid0_status,
819 	.size		= raid0_size,
820 	.takeover	= raid0_takeover,
821 	.quiesce	= raid0_quiesce,
822 	.congested	= raid0_congested,
823 };
824 
825 static int __init raid0_init (void)
826 {
827 	return register_md_personality (&raid0_personality);
828 }
829 
830 static void raid0_exit (void)
831 {
832 	unregister_md_personality (&raid0_personality);
833 }
834 
835 module_init(raid0_init);
836 module_exit(raid0_exit);
837 MODULE_LICENSE("GPL");
838 MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
839 MODULE_ALIAS("md-personality-2"); /* RAID0 */
840 MODULE_ALIAS("md-raid0");
841 MODULE_ALIAS("md-level-0");
842