raid5.c (7471fb77ce4dc4cb81291189947fcdf621a97987) raid5.c (845b9e229fe0716ab6b4d94b4364c99069667b59)
1/*
2 * raid5.c : Multiple Devices driver for Linux
3 * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
4 * Copyright (C) 1999, 2000 Ingo Molnar
5 * Copyright (C) 2002, 2003 H. Peter Anvin
6 *
7 * RAID-4/5/6 management functions.
8 * Thanks to Penguin Computing for making the RAID-6 development possible

--- 457 unchanged lines hidden (view full) ---

466 for (i = 0; i < num ; i++) {
467 WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
468 p = sh->dev[i].page;
469 if (!p)
470 continue;
471 sh->dev[i].page = NULL;
472 put_page(p);
473 }
1/*
2 * raid5.c : Multiple Devices driver for Linux
3 * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
4 * Copyright (C) 1999, 2000 Ingo Molnar
5 * Copyright (C) 2002, 2003 H. Peter Anvin
6 *
7 * RAID-4/5/6 management functions.
8 * Thanks to Penguin Computing for making the RAID-6 development possible

--- 457 unchanged lines hidden (view full) ---

466 for (i = 0; i < num ; i++) {
467 WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
468 p = sh->dev[i].page;
469 if (!p)
470 continue;
471 sh->dev[i].page = NULL;
472 put_page(p);
473 }
474
475 if (sh->ppl_page) {
476 put_page(sh->ppl_page);
477 sh->ppl_page = NULL;
478 }
479}
480
481static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
482{
483 int i;
484 int num = sh->raid_conf->pool_size;
485
486 for (i = 0; i < num; i++) {
487 struct page *page;
488
489 if (!(page = alloc_page(gfp))) {
490 return 1;
491 }
492 sh->dev[i].page = page;
493 sh->dev[i].orig_page = page;
494 }
495
474}
475
476static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
477{
478 int i;
479 int num = sh->raid_conf->pool_size;
480
481 for (i = 0; i < num; i++) {
482 struct page *page;
483
484 if (!(page = alloc_page(gfp))) {
485 return 1;
486 }
487 sh->dev[i].page = page;
488 sh->dev[i].orig_page = page;
489 }
490
496 if (raid5_has_ppl(sh->raid_conf)) {
497 sh->ppl_page = alloc_page(gfp);
498 if (!sh->ppl_page)
499 return 1;
500 }
501
502 return 0;
503}
504
505static void raid5_build_block(struct stripe_head *sh, int i, int previous);
506static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
507 struct stripe_head *sh);
508
509static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)

--- 1617 unchanged lines hidden (view full) ---

2127 for (i = disks; i--; ) {
2128 struct r5dev *dev = &sh->dev[i];
2129 if (test_and_clear_bit(R5_Overlap, &dev->flags))
2130 wake_up(&sh->raid_conf->wait_for_overlap);
2131 }
2132 put_cpu();
2133}
2134
491 return 0;
492}
493
494static void raid5_build_block(struct stripe_head *sh, int i, int previous);
495static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
496 struct stripe_head *sh);
497
498static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)

--- 1617 unchanged lines hidden (view full) ---

2116 for (i = disks; i--; ) {
2117 struct r5dev *dev = &sh->dev[i];
2118 if (test_and_clear_bit(R5_Overlap, &dev->flags))
2119 wake_up(&sh->raid_conf->wait_for_overlap);
2120 }
2121 put_cpu();
2122}
2123
2124static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
2125{
2126 if (sh->ppl_page)
2127 __free_page(sh->ppl_page);
2128 kmem_cache_free(sc, sh);
2129}
2130
2135static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
2131static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
2136 int disks)
2132 int disks, struct r5conf *conf)
2137{
2138 struct stripe_head *sh;
2139 int i;
2140
2141 sh = kmem_cache_zalloc(sc, gfp);
2142 if (sh) {
2143 spin_lock_init(&sh->stripe_lock);
2144 spin_lock_init(&sh->batch_lock);
2145 INIT_LIST_HEAD(&sh->batch_list);
2146 INIT_LIST_HEAD(&sh->lru);
2147 INIT_LIST_HEAD(&sh->r5c);
2148 INIT_LIST_HEAD(&sh->log_list);
2149 atomic_set(&sh->count, 1);
2133{
2134 struct stripe_head *sh;
2135 int i;
2136
2137 sh = kmem_cache_zalloc(sc, gfp);
2138 if (sh) {
2139 spin_lock_init(&sh->stripe_lock);
2140 spin_lock_init(&sh->batch_lock);
2141 INIT_LIST_HEAD(&sh->batch_list);
2142 INIT_LIST_HEAD(&sh->lru);
2143 INIT_LIST_HEAD(&sh->r5c);
2144 INIT_LIST_HEAD(&sh->log_list);
2145 atomic_set(&sh->count, 1);
2146 sh->raid_conf = conf;
2150 sh->log_start = MaxSector;
2151 for (i = 0; i < disks; i++) {
2152 struct r5dev *dev = &sh->dev[i];
2153
2154 bio_init(&dev->req, &dev->vec, 1);
2155 bio_init(&dev->rreq, &dev->rvec, 1);
2156 }
2147 sh->log_start = MaxSector;
2148 for (i = 0; i < disks; i++) {
2149 struct r5dev *dev = &sh->dev[i];
2150
2151 bio_init(&dev->req, &dev->vec, 1);
2152 bio_init(&dev->rreq, &dev->rvec, 1);
2153 }
2154
2155 if (raid5_has_ppl(conf)) {
2156 sh->ppl_page = alloc_page(gfp);
2157 if (!sh->ppl_page) {
2158 free_stripe(sc, sh);
2159 sh = NULL;
2160 }
2161 }
2157 }
2158 return sh;
2159}
2160static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
2161{
2162 struct stripe_head *sh;
2163
2162 }
2163 return sh;
2164}
2165static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
2166{
2167 struct stripe_head *sh;
2168
2164 sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
2169 sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf);
2165 if (!sh)
2166 return 0;
2167
2170 if (!sh)
2171 return 0;
2172
2168 sh->raid_conf = conf;
2169
2170 if (grow_buffers(sh, gfp)) {
2171 shrink_buffers(sh);
2173 if (grow_buffers(sh, gfp)) {
2174 shrink_buffers(sh);
2172 kmem_cache_free(conf->slab_cache, sh);
2175 free_stripe(conf->slab_cache, sh);
2173 return 0;
2174 }
2175 sh->hash_lock_index =
2176 conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
2177 /* we just created an active stripe so... */
2178 atomic_inc(&conf->active_stripes);
2179
2180 raid5_release_stripe(sh);

--- 128 unchanged lines hidden (view full) ---

2309 struct stripe_head *osh, *nsh;
2310 LIST_HEAD(newstripes);
2311 struct disk_info *ndisks;
2312 int err;
2313 struct kmem_cache *sc;
2314 int i;
2315 int hash, cnt;
2316
2176 return 0;
2177 }
2178 sh->hash_lock_index =
2179 conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
2180 /* we just created an active stripe so... */
2181 atomic_inc(&conf->active_stripes);
2182
2183 raid5_release_stripe(sh);

--- 128 unchanged lines hidden (view full) ---

2312 struct stripe_head *osh, *nsh;
2313 LIST_HEAD(newstripes);
2314 struct disk_info *ndisks;
2315 int err;
2316 struct kmem_cache *sc;
2317 int i;
2318 int hash, cnt;
2319
2317 if (newsize <= conf->pool_size)
2318 return 0; /* never bother to shrink */
2319
2320 err = md_allow_write(conf->mddev);
2321 if (err)
2322 return err;
2323
2324 /* Step 1 */
2325 sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
2326 sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
2327 0, 0, NULL);
2328 if (!sc)
2329 return -ENOMEM;
2330
2331 /* Need to ensure auto-resizing doesn't interfere */
2332 mutex_lock(&conf->cache_size_mutex);
2333
2334 for (i = conf->max_nr_stripes; i; i--) {
2320 err = md_allow_write(conf->mddev);
2321 if (err)
2322 return err;
2323
2324 /* Step 1 */
2325 sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
2326 sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
2327 0, 0, NULL);
2328 if (!sc)
2329 return -ENOMEM;
2330
2331 /* Need to ensure auto-resizing doesn't interfere */
2332 mutex_lock(&conf->cache_size_mutex);
2333
2334 for (i = conf->max_nr_stripes; i; i--) {
2335 nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
2335 nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf);
2336 if (!nsh)
2337 break;
2338
2336 if (!nsh)
2337 break;
2338
2339 nsh->raid_conf = conf;
2340 list_add(&nsh->lru, &newstripes);
2341 }
2342 if (i) {
2343 /* didn't get enough, give up */
2344 while (!list_empty(&newstripes)) {
2345 nsh = list_entry(newstripes.next, struct stripe_head, lru);
2346 list_del(&nsh->lru);
2339 list_add(&nsh->lru, &newstripes);
2340 }
2341 if (i) {
2342 /* didn't get enough, give up */
2343 while (!list_empty(&newstripes)) {
2344 nsh = list_entry(newstripes.next, struct stripe_head, lru);
2345 list_del(&nsh->lru);
2347 kmem_cache_free(sc, nsh);
2346 free_stripe(sc, nsh);
2348 }
2349 kmem_cache_destroy(sc);
2350 mutex_unlock(&conf->cache_size_mutex);
2351 return -ENOMEM;
2352 }
2353 /* Step 2 - Must use GFP_NOIO now.
2354 * OK, we have enough stripes, start collecting inactive
2355 * stripes and copying them over

--- 9 unchanged lines hidden (view full) ---

2365 osh = get_free_stripe(conf, hash);
2366 unlock_device_hash_lock(conf, hash);
2367
2368 for(i=0; i<conf->pool_size; i++) {
2369 nsh->dev[i].page = osh->dev[i].page;
2370 nsh->dev[i].orig_page = osh->dev[i].page;
2371 }
2372 nsh->hash_lock_index = hash;
2347 }
2348 kmem_cache_destroy(sc);
2349 mutex_unlock(&conf->cache_size_mutex);
2350 return -ENOMEM;
2351 }
2352 /* Step 2 - Must use GFP_NOIO now.
2353 * OK, we have enough stripes, start collecting inactive
2354 * stripes and copying them over

--- 9 unchanged lines hidden (view full) ---

2364 osh = get_free_stripe(conf, hash);
2365 unlock_device_hash_lock(conf, hash);
2366
2367 for(i=0; i<conf->pool_size; i++) {
2368 nsh->dev[i].page = osh->dev[i].page;
2369 nsh->dev[i].orig_page = osh->dev[i].page;
2370 }
2371 nsh->hash_lock_index = hash;
2373 kmem_cache_free(conf->slab_cache, osh);
2372 free_stripe(conf->slab_cache, osh);
2374 cnt++;
2375 if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
2376 !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
2377 hash++;
2378 cnt = 0;
2379 }
2380 }
2381 kmem_cache_destroy(conf->slab_cache);

--- 60 unchanged lines hidden (view full) ---

2442
2443 spin_lock_irq(conf->hash_locks + hash);
2444 sh = get_free_stripe(conf, hash);
2445 spin_unlock_irq(conf->hash_locks + hash);
2446 if (!sh)
2447 return 0;
2448 BUG_ON(atomic_read(&sh->count));
2449 shrink_buffers(sh);
2373 cnt++;
2374 if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
2375 !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
2376 hash++;
2377 cnt = 0;
2378 }
2379 }
2380 kmem_cache_destroy(conf->slab_cache);

--- 60 unchanged lines hidden (view full) ---

2441
2442 spin_lock_irq(conf->hash_locks + hash);
2443 sh = get_free_stripe(conf, hash);
2444 spin_unlock_irq(conf->hash_locks + hash);
2445 if (!sh)
2446 return 0;
2447 BUG_ON(atomic_read(&sh->count));
2448 shrink_buffers(sh);
2450 kmem_cache_free(conf->slab_cache, sh);
2449 free_stripe(conf->slab_cache, sh);
2451 atomic_dec(&conf->active_stripes);
2452 conf->max_nr_stripes--;
2453 return 1;
2454}
2455
2456static void shrink_stripes(struct r5conf *conf)
2457{
2458 while (conf->max_nr_stripes &&

--- 706 unchanged lines hidden (view full) ---

3165 int qd_idx = sh->qd_idx;
3166 struct r5dev *dev = &sh->dev[qd_idx];
3167
3168 set_bit(R5_LOCKED, &dev->flags);
3169 clear_bit(R5_UPTODATE, &dev->flags);
3170 s->locked++;
3171 }
3172
2450 atomic_dec(&conf->active_stripes);
2451 conf->max_nr_stripes--;
2452 return 1;
2453}
2454
2455static void shrink_stripes(struct r5conf *conf)
2456{
2457 while (conf->max_nr_stripes &&

--- 706 unchanged lines hidden (view full) ---

3164 int qd_idx = sh->qd_idx;
3165 struct r5dev *dev = &sh->dev[qd_idx];
3166
3167 set_bit(R5_LOCKED, &dev->flags);
3168 clear_bit(R5_UPTODATE, &dev->flags);
3169 s->locked++;
3170 }
3171
3173 if (raid5_has_ppl(sh->raid_conf) &&
3172 if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page &&
3174 test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
3175 !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
3176 test_bit(R5_Insync, &sh->dev[pd_idx].flags))
3177 set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request);
3178
3179 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
3180 __func__, (unsigned long long)sh->sector,
3181 s->locked, s->ops_request);

--- 4240 unchanged lines hidden (view full) ---

7422 mddev->queue);
7423 else
7424 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
7425 mddev->queue);
7426
7427 blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
7428 }
7429
3173 test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
3174 !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
3175 test_bit(R5_Insync, &sh->dev[pd_idx].flags))
3176 set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request);
3177
3178 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
3179 __func__, (unsigned long long)sh->sector,
3180 s->locked, s->ops_request);

--- 4240 unchanged lines hidden (view full) ---

7421 mddev->queue);
7422 else
7423 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
7424 mddev->queue);
7425
7426 blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
7427 }
7428
7430 if (log_init(conf, journal_dev))
7429 if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
7431 goto abort;
7432
7433 return 0;
7434abort:
7435 md_unregister_thread(&mddev->thread);
7436 print_raid5_conf(conf);
7437 free_conf(conf);
7438 mddev->private = NULL;

--- 192 unchanged lines hidden (view full) ---

7631 if (conf->log)
7632 return -EBUSY;
7633
7634 rdev->raid_disk = 0;
7635 /*
7636 * The array is in readonly mode if journal is missing, so no
7637 * write requests running. We should be safe
7638 */
7430 goto abort;
7431
7432 return 0;
7433abort:
7434 md_unregister_thread(&mddev->thread);
7435 print_raid5_conf(conf);
7436 free_conf(conf);
7437 mddev->private = NULL;

--- 192 unchanged lines hidden (view full) ---

7630 if (conf->log)
7631 return -EBUSY;
7632
7633 rdev->raid_disk = 0;
7634 /*
7635 * The array is in readonly mode if journal is missing, so no
7636 * write requests running. We should be safe
7637 */
7639 log_init(conf, rdev);
7638 log_init(conf, rdev, false);
7640 return 0;
7641 }
7642 if (mddev->recovery_disabled == conf->recovery_disabled)
7643 return -EBUSY;
7644
7645 if (rdev->saved_raid_disk < 0 && has_failed(conf))
7646 /* no point adding a device */
7647 return -EINVAL;

--- 133 unchanged lines hidden (view full) ---

7781 mddev->delta_disks > 0)
7782 if (resize_chunks(conf,
7783 conf->previous_raid_disks
7784 + max(0, mddev->delta_disks),
7785 max(mddev->new_chunk_sectors,
7786 mddev->chunk_sectors)
7787 ) < 0)
7788 return -ENOMEM;
7639 return 0;
7640 }
7641 if (mddev->recovery_disabled == conf->recovery_disabled)
7642 return -EBUSY;
7643
7644 if (rdev->saved_raid_disk < 0 && has_failed(conf))
7645 /* no point adding a device */
7646 return -EINVAL;

--- 133 unchanged lines hidden (view full) ---

7780 mddev->delta_disks > 0)
7781 if (resize_chunks(conf,
7782 conf->previous_raid_disks
7783 + max(0, mddev->delta_disks),
7784 max(mddev->new_chunk_sectors,
7785 mddev->chunk_sectors)
7786 ) < 0)
7787 return -ENOMEM;
7788
7789 if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size)
7790 return 0; /* never bother to shrink */
7789 return resize_stripes(conf, (conf->previous_raid_disks
7790 + mddev->delta_disks));
7791}
7792
7793static int raid5_start_reshape(struct mddev *mddev)
7794{
7795 struct r5conf *conf = mddev->private;
7796 struct md_rdev *rdev;

--- 474 unchanged lines hidden (view full) ---

8271 }
8272 mddev->new_level = 6;
8273 mddev->new_layout = new_layout;
8274 mddev->delta_disks = 1;
8275 mddev->raid_disks += 1;
8276 return setup_conf(mddev);
8277}
8278
7791 return resize_stripes(conf, (conf->previous_raid_disks
7792 + mddev->delta_disks));
7793}
7794
7795static int raid5_start_reshape(struct mddev *mddev)
7796{
7797 struct r5conf *conf = mddev->private;
7798 struct md_rdev *rdev;

--- 474 unchanged lines hidden (view full) ---

8273 }
8274 mddev->new_level = 6;
8275 mddev->new_layout = new_layout;
8276 mddev->delta_disks = 1;
8277 mddev->raid_disks += 1;
8278 return setup_conf(mddev);
8279}
8280
8279static void raid5_reset_stripe_cache(struct mddev *mddev)
8280{
8281 struct r5conf *conf = mddev->private;
8282
8283 mutex_lock(&conf->cache_size_mutex);
8284 while (conf->max_nr_stripes &&
8285 drop_one_stripe(conf))
8286 ;
8287 while (conf->min_nr_stripes > conf->max_nr_stripes &&
8288 grow_one_stripe(conf, GFP_KERNEL))
8289 ;
8290 mutex_unlock(&conf->cache_size_mutex);
8291}
8292
8293static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
8294{
8295 struct r5conf *conf;
8296 int err;
8297
8298 err = mddev_lock(mddev);
8299 if (err)
8300 return err;
8301 conf = mddev->private;
8302 if (!conf) {
8303 mddev_unlock(mddev);
8304 return -ENODEV;
8305 }
8306
8281static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
8282{
8283 struct r5conf *conf;
8284 int err;
8285
8286 err = mddev_lock(mddev);
8287 if (err)
8288 return err;
8289 conf = mddev->private;
8290 if (!conf) {
8291 mddev_unlock(mddev);
8292 return -ENODEV;
8293 }
8294
8307 if (strncmp(buf, "ppl", 3) == 0 && !raid5_has_ppl(conf)) {
8295 if (strncmp(buf, "ppl", 3) == 0) {
8308 /* ppl only works with RAID 5 */
8296 /* ppl only works with RAID 5 */
8309 if (conf->level == 5) {
8310 mddev_suspend(mddev);
8311 set_bit(MD_HAS_PPL, &mddev->flags);
8312 err = log_init(conf, NULL);
8313 if (!err)
8314 raid5_reset_stripe_cache(mddev);
8315 mddev_resume(mddev);
8297 if (!raid5_has_ppl(conf) && conf->level == 5) {
8298 err = log_init(conf, NULL, true);
8299 if (!err) {
8300 err = resize_stripes(conf, conf->pool_size);
8301 if (err)
8302 log_exit(conf);
8303 }
8316 } else
8317 err = -EINVAL;
8318 } else if (strncmp(buf, "resync", 6) == 0) {
8319 if (raid5_has_ppl(conf)) {
8320 mddev_suspend(mddev);
8321 log_exit(conf);
8304 } else
8305 err = -EINVAL;
8306 } else if (strncmp(buf, "resync", 6) == 0) {
8307 if (raid5_has_ppl(conf)) {
8308 mddev_suspend(mddev);
8309 log_exit(conf);
8322 raid5_reset_stripe_cache(mddev);
8323 mddev_resume(mddev);
8310 mddev_resume(mddev);
8311 err = resize_stripes(conf, conf->pool_size);
8324 } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) &&
8325 r5l_log_disk_error(conf)) {
8326 bool journal_dev_exists = false;
8327 struct md_rdev *rdev;
8328
8329 rdev_for_each(rdev, mddev)
8330 if (test_bit(Journal, &rdev->flags)) {
8331 journal_dev_exists = true;

--- 145 unchanged lines hidden ---
8312 } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) &&
8313 r5l_log_disk_error(conf)) {
8314 bool journal_dev_exists = false;
8315 struct md_rdev *rdev;
8316
8317 rdev_for_each(rdev, mddev)
8318 if (test_bit(Journal, &rdev->flags)) {
8319 journal_dev_exists = true;

--- 145 unchanged lines hidden ---