1f6bed0efSShaohua Li /* 2f6bed0efSShaohua Li * Copyright (C) 2015 Shaohua Li <shli@fb.com> 3f6bed0efSShaohua Li * 4f6bed0efSShaohua Li * This program is free software; you can redistribute it and/or modify it 5f6bed0efSShaohua Li * under the terms and conditions of the GNU General Public License, 6f6bed0efSShaohua Li * version 2, as published by the Free Software Foundation. 7f6bed0efSShaohua Li * 8f6bed0efSShaohua Li * This program is distributed in the hope it will be useful, but WITHOUT 9f6bed0efSShaohua Li * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10f6bed0efSShaohua Li * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11f6bed0efSShaohua Li * more details. 12f6bed0efSShaohua Li * 13f6bed0efSShaohua Li */ 14f6bed0efSShaohua Li #include <linux/kernel.h> 15f6bed0efSShaohua Li #include <linux/wait.h> 16f6bed0efSShaohua Li #include <linux/blkdev.h> 17f6bed0efSShaohua Li #include <linux/slab.h> 18f6bed0efSShaohua Li #include <linux/raid/md_p.h> 195cb2fbd6SShaohua Li #include <linux/crc32c.h> 20f6bed0efSShaohua Li #include <linux/random.h> 21f6bed0efSShaohua Li #include "md.h" 22f6bed0efSShaohua Li #include "raid5.h" 23f6bed0efSShaohua Li 24f6bed0efSShaohua Li /* 25f6bed0efSShaohua Li * metadata/data stored in disk with 4k size unit (a block) regardless 26f6bed0efSShaohua Li * underneath hardware sector size. only works with PAGE_SIZE == 4096 27f6bed0efSShaohua Li */ 28f6bed0efSShaohua Li #define BLOCK_SECTORS (8) 29f6bed0efSShaohua Li 300576b1c6SShaohua Li /* 310576b1c6SShaohua Li * reclaim runs every 1/4 disk size or 10G reclaimable space. This can prevent 320576b1c6SShaohua Li * recovery scans a very long log 330576b1c6SShaohua Li */ 340576b1c6SShaohua Li #define RECLAIM_MAX_FREE_SPACE (10 * 1024 * 1024 * 2) /* sector */ 350576b1c6SShaohua Li #define RECLAIM_MAX_FREE_SPACE_SHIFT (2) 360576b1c6SShaohua Li 37f6bed0efSShaohua Li struct r5l_log { 38f6bed0efSShaohua Li struct md_rdev *rdev; 39f6bed0efSShaohua Li 40f6bed0efSShaohua Li u32 uuid_checksum; 41f6bed0efSShaohua Li 42f6bed0efSShaohua Li sector_t device_size; /* log device size, round to 43f6bed0efSShaohua Li * BLOCK_SECTORS */ 440576b1c6SShaohua Li sector_t max_free_space; /* reclaim run if free space is at 450576b1c6SShaohua Li * this size */ 46f6bed0efSShaohua Li 47f6bed0efSShaohua Li sector_t last_checkpoint; /* log tail. where recovery scan 48f6bed0efSShaohua Li * starts from */ 49f6bed0efSShaohua Li u64 last_cp_seq; /* log tail sequence */ 50f6bed0efSShaohua Li 51f6bed0efSShaohua Li sector_t log_start; /* log head. where new data appends */ 52f6bed0efSShaohua Li u64 seq; /* log head sequence */ 53f6bed0efSShaohua Li 54f6bed0efSShaohua Li struct mutex io_mutex; 55f6bed0efSShaohua Li struct r5l_io_unit *current_io; /* current io_unit accepting new data */ 56f6bed0efSShaohua Li 57f6bed0efSShaohua Li spinlock_t io_list_lock; 58f6bed0efSShaohua Li struct list_head running_ios; /* io_units which are still running, 59f6bed0efSShaohua Li * and have not yet been completely 60f6bed0efSShaohua Li * written to the log */ 61f6bed0efSShaohua Li struct list_head io_end_ios; /* io_units which have been completely 62f6bed0efSShaohua Li * written to the log but not yet written 63f6bed0efSShaohua Li * to the RAID */ 64a8c34f91SShaohua Li struct list_head flushing_ios; /* io_units which are waiting for log 65a8c34f91SShaohua Li * cache flush */ 66a8c34f91SShaohua Li struct list_head flushed_ios; /* io_units which settle down in log disk */ 67a8c34f91SShaohua Li struct bio flush_bio; 680576b1c6SShaohua Li struct list_head stripe_end_ios;/* io_units which have been completely 690576b1c6SShaohua Li * written to the RAID but have not yet 700576b1c6SShaohua Li * been considered for updating super */ 71f6bed0efSShaohua Li 72f6bed0efSShaohua Li struct kmem_cache *io_kc; 73f6bed0efSShaohua Li 740576b1c6SShaohua Li struct md_thread *reclaim_thread; 750576b1c6SShaohua Li unsigned long reclaim_target; /* number of space that need to be 760576b1c6SShaohua Li * reclaimed. if it's 0, reclaim spaces 770576b1c6SShaohua Li * used by io_units which are in 780576b1c6SShaohua Li * IO_UNIT_STRIPE_END state (eg, reclaim 790576b1c6SShaohua Li * dones't wait for specific io_unit 800576b1c6SShaohua Li * switching to IO_UNIT_STRIPE_END 810576b1c6SShaohua Li * state) */ 820fd22b45SShaohua Li wait_queue_head_t iounit_wait; 830576b1c6SShaohua Li 84f6bed0efSShaohua Li struct list_head no_space_stripes; /* pending stripes, log has no space */ 85f6bed0efSShaohua Li spinlock_t no_space_stripes_lock; 86f6bed0efSShaohua Li }; 87f6bed0efSShaohua Li 88f6bed0efSShaohua Li /* 89f6bed0efSShaohua Li * an IO range starts from a meta data block and end at the next meta data 90f6bed0efSShaohua Li * block. The io unit's the meta data block tracks data/parity followed it. io 91f6bed0efSShaohua Li * unit is written to log disk with normal write, as we always flush log disk 92f6bed0efSShaohua Li * first and then start move data to raid disks, there is no requirement to 93f6bed0efSShaohua Li * write io unit with FLUSH/FUA 94f6bed0efSShaohua Li */ 95f6bed0efSShaohua Li struct r5l_io_unit { 96f6bed0efSShaohua Li struct r5l_log *log; 97f6bed0efSShaohua Li 98f6bed0efSShaohua Li struct page *meta_page; /* store meta block */ 99f6bed0efSShaohua Li int meta_offset; /* current offset in meta_page */ 100f6bed0efSShaohua Li 101f6bed0efSShaohua Li struct bio_list bios; 102f6bed0efSShaohua Li atomic_t pending_io; /* pending bios not written to log yet */ 103f6bed0efSShaohua Li struct bio *current_bio;/* current_bio accepting new data */ 104f6bed0efSShaohua Li 105f6bed0efSShaohua Li atomic_t pending_stripe;/* how many stripes not flushed to raid */ 106f6bed0efSShaohua Li u64 seq; /* seq number of the metablock */ 107f6bed0efSShaohua Li sector_t log_start; /* where the io_unit starts */ 108f6bed0efSShaohua Li sector_t log_end; /* where the io_unit ends */ 109f6bed0efSShaohua Li struct list_head log_sibling; /* log->running_ios */ 110f6bed0efSShaohua Li struct list_head stripe_list; /* stripes added to the io_unit */ 111f6bed0efSShaohua Li 112f6bed0efSShaohua Li int state; 113f6bed0efSShaohua Li }; 114f6bed0efSShaohua Li 115f6bed0efSShaohua Li /* r5l_io_unit state */ 116f6bed0efSShaohua Li enum r5l_io_unit_state { 117f6bed0efSShaohua Li IO_UNIT_RUNNING = 0, /* accepting new IO */ 118f6bed0efSShaohua Li IO_UNIT_IO_START = 1, /* io_unit bio start writing to log, 119f6bed0efSShaohua Li * don't accepting new bio */ 120f6bed0efSShaohua Li IO_UNIT_IO_END = 2, /* io_unit bio finish writing to log */ 121a8c34f91SShaohua Li IO_UNIT_STRIPE_END = 3, /* stripes data finished writing to raid */ 122f6bed0efSShaohua Li }; 123f6bed0efSShaohua Li 124f6bed0efSShaohua Li static sector_t r5l_ring_add(struct r5l_log *log, sector_t start, sector_t inc) 125f6bed0efSShaohua Li { 126f6bed0efSShaohua Li start += inc; 127f6bed0efSShaohua Li if (start >= log->device_size) 128f6bed0efSShaohua Li start = start - log->device_size; 129f6bed0efSShaohua Li return start; 130f6bed0efSShaohua Li } 131f6bed0efSShaohua Li 132f6bed0efSShaohua Li static sector_t r5l_ring_distance(struct r5l_log *log, sector_t start, 133f6bed0efSShaohua Li sector_t end) 134f6bed0efSShaohua Li { 135f6bed0efSShaohua Li if (end >= start) 136f6bed0efSShaohua Li return end - start; 137f6bed0efSShaohua Li else 138f6bed0efSShaohua Li return end + log->device_size - start; 139f6bed0efSShaohua Li } 140f6bed0efSShaohua Li 141f6bed0efSShaohua Li static bool r5l_has_free_space(struct r5l_log *log, sector_t size) 142f6bed0efSShaohua Li { 143f6bed0efSShaohua Li sector_t used_size; 144f6bed0efSShaohua Li 145f6bed0efSShaohua Li used_size = r5l_ring_distance(log, log->last_checkpoint, 146f6bed0efSShaohua Li log->log_start); 147f6bed0efSShaohua Li 148f6bed0efSShaohua Li return log->device_size > used_size + size; 149f6bed0efSShaohua Li } 150f6bed0efSShaohua Li 151f6bed0efSShaohua Li static struct r5l_io_unit *r5l_alloc_io_unit(struct r5l_log *log) 152f6bed0efSShaohua Li { 153f6bed0efSShaohua Li struct r5l_io_unit *io; 154f6bed0efSShaohua Li /* We can't handle memory allocate failure so far */ 155f6bed0efSShaohua Li gfp_t gfp = GFP_NOIO | __GFP_NOFAIL; 156f6bed0efSShaohua Li 157f6bed0efSShaohua Li io = kmem_cache_zalloc(log->io_kc, gfp); 158f6bed0efSShaohua Li io->log = log; 159f6bed0efSShaohua Li io->meta_page = alloc_page(gfp | __GFP_ZERO); 160f6bed0efSShaohua Li 161f6bed0efSShaohua Li bio_list_init(&io->bios); 162f6bed0efSShaohua Li INIT_LIST_HEAD(&io->log_sibling); 163f6bed0efSShaohua Li INIT_LIST_HEAD(&io->stripe_list); 164f6bed0efSShaohua Li io->state = IO_UNIT_RUNNING; 165f6bed0efSShaohua Li return io; 166f6bed0efSShaohua Li } 167f6bed0efSShaohua Li 168f6bed0efSShaohua Li static void r5l_free_io_unit(struct r5l_log *log, struct r5l_io_unit *io) 169f6bed0efSShaohua Li { 170f6bed0efSShaohua Li __free_page(io->meta_page); 171f6bed0efSShaohua Li kmem_cache_free(log->io_kc, io); 172f6bed0efSShaohua Li } 173f6bed0efSShaohua Li 174f6bed0efSShaohua Li static void r5l_move_io_unit_list(struct list_head *from, struct list_head *to, 175f6bed0efSShaohua Li enum r5l_io_unit_state state) 176f6bed0efSShaohua Li { 177f6bed0efSShaohua Li struct r5l_io_unit *io; 178f6bed0efSShaohua Li 179f6bed0efSShaohua Li while (!list_empty(from)) { 180f6bed0efSShaohua Li io = list_first_entry(from, struct r5l_io_unit, log_sibling); 181f6bed0efSShaohua Li /* don't change list order */ 182f6bed0efSShaohua Li if (io->state >= state) 183f6bed0efSShaohua Li list_move_tail(&io->log_sibling, to); 184f6bed0efSShaohua Li else 185f6bed0efSShaohua Li break; 186f6bed0efSShaohua Li } 187f6bed0efSShaohua Li } 188f6bed0efSShaohua Li 1890576b1c6SShaohua Li /* 1900576b1c6SShaohua Li * We don't want too many io_units reside in stripe_end_ios list, which will 1910576b1c6SShaohua Li * waste a lot of memory. So we try to remove some. But we must keep at least 2 1920576b1c6SShaohua Li * io_units. The superblock must point to a valid meta, if it's the last meta, 1930576b1c6SShaohua Li * recovery can scan less 1940576b1c6SShaohua Li */ 1950576b1c6SShaohua Li static void r5l_compress_stripe_end_list(struct r5l_log *log) 1960576b1c6SShaohua Li { 1970576b1c6SShaohua Li struct r5l_io_unit *first, *last, *io; 1980576b1c6SShaohua Li 1990576b1c6SShaohua Li first = list_first_entry(&log->stripe_end_ios, 2000576b1c6SShaohua Li struct r5l_io_unit, log_sibling); 2010576b1c6SShaohua Li last = list_last_entry(&log->stripe_end_ios, 2020576b1c6SShaohua Li struct r5l_io_unit, log_sibling); 2030576b1c6SShaohua Li if (first == last) 2040576b1c6SShaohua Li return; 2050576b1c6SShaohua Li list_del(&first->log_sibling); 2060576b1c6SShaohua Li list_del(&last->log_sibling); 2070576b1c6SShaohua Li while (!list_empty(&log->stripe_end_ios)) { 2080576b1c6SShaohua Li io = list_first_entry(&log->stripe_end_ios, 2090576b1c6SShaohua Li struct r5l_io_unit, log_sibling); 2100576b1c6SShaohua Li list_del(&io->log_sibling); 2110576b1c6SShaohua Li first->log_end = io->log_end; 2120576b1c6SShaohua Li r5l_free_io_unit(log, io); 2130576b1c6SShaohua Li } 2140576b1c6SShaohua Li list_add_tail(&first->log_sibling, &log->stripe_end_ios); 2150576b1c6SShaohua Li list_add_tail(&last->log_sibling, &log->stripe_end_ios); 2160576b1c6SShaohua Li } 2170576b1c6SShaohua Li 218f6bed0efSShaohua Li static void __r5l_set_io_unit_state(struct r5l_io_unit *io, 219f6bed0efSShaohua Li enum r5l_io_unit_state state) 220f6bed0efSShaohua Li { 221f6bed0efSShaohua Li if (WARN_ON(io->state >= state)) 222f6bed0efSShaohua Li return; 223f6bed0efSShaohua Li io->state = state; 224f6bed0efSShaohua Li } 225f6bed0efSShaohua Li 226f6bed0efSShaohua Li /* XXX: totally ignores I/O errors */ 227f6bed0efSShaohua Li static void r5l_log_endio(struct bio *bio) 228f6bed0efSShaohua Li { 229f6bed0efSShaohua Li struct r5l_io_unit *io = bio->bi_private; 230f6bed0efSShaohua Li struct r5l_log *log = io->log; 231509ffec7SChristoph Hellwig unsigned long flags; 232f6bed0efSShaohua Li 233f6bed0efSShaohua Li bio_put(bio); 234f6bed0efSShaohua Li 235f6bed0efSShaohua Li if (!atomic_dec_and_test(&io->pending_io)) 236f6bed0efSShaohua Li return; 237f6bed0efSShaohua Li 238509ffec7SChristoph Hellwig spin_lock_irqsave(&log->io_list_lock, flags); 239509ffec7SChristoph Hellwig __r5l_set_io_unit_state(io, IO_UNIT_IO_END); 240509ffec7SChristoph Hellwig r5l_move_io_unit_list(&log->running_ios, &log->io_end_ios, 241509ffec7SChristoph Hellwig IO_UNIT_IO_END); 242509ffec7SChristoph Hellwig spin_unlock_irqrestore(&log->io_list_lock, flags); 243509ffec7SChristoph Hellwig 244f6bed0efSShaohua Li md_wakeup_thread(log->rdev->mddev->thread); 245f6bed0efSShaohua Li } 246f6bed0efSShaohua Li 247f6bed0efSShaohua Li static void r5l_submit_current_io(struct r5l_log *log) 248f6bed0efSShaohua Li { 249f6bed0efSShaohua Li struct r5l_io_unit *io = log->current_io; 250f6bed0efSShaohua Li struct r5l_meta_block *block; 251f6bed0efSShaohua Li struct bio *bio; 252509ffec7SChristoph Hellwig unsigned long flags; 253f6bed0efSShaohua Li u32 crc; 254f6bed0efSShaohua Li 255f6bed0efSShaohua Li if (!io) 256f6bed0efSShaohua Li return; 257f6bed0efSShaohua Li 258f6bed0efSShaohua Li block = page_address(io->meta_page); 259f6bed0efSShaohua Li block->meta_size = cpu_to_le32(io->meta_offset); 2605cb2fbd6SShaohua Li crc = crc32c_le(log->uuid_checksum, block, PAGE_SIZE); 261f6bed0efSShaohua Li block->checksum = cpu_to_le32(crc); 262f6bed0efSShaohua Li 263f6bed0efSShaohua Li log->current_io = NULL; 264509ffec7SChristoph Hellwig spin_lock_irqsave(&log->io_list_lock, flags); 265509ffec7SChristoph Hellwig __r5l_set_io_unit_state(io, IO_UNIT_IO_START); 266509ffec7SChristoph Hellwig spin_unlock_irqrestore(&log->io_list_lock, flags); 267f6bed0efSShaohua Li 268f6bed0efSShaohua Li while ((bio = bio_list_pop(&io->bios))) { 269f6bed0efSShaohua Li /* all IO must start from rdev->data_offset */ 270f6bed0efSShaohua Li bio->bi_iter.bi_sector += log->rdev->data_offset; 271f6bed0efSShaohua Li submit_bio(WRITE, bio); 272f6bed0efSShaohua Li } 273f6bed0efSShaohua Li } 274f6bed0efSShaohua Li 275f6bed0efSShaohua Li static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log) 276f6bed0efSShaohua Li { 277f6bed0efSShaohua Li struct r5l_io_unit *io; 278f6bed0efSShaohua Li struct r5l_meta_block *block; 279f6bed0efSShaohua Li struct bio *bio; 280f6bed0efSShaohua Li 281f6bed0efSShaohua Li io = r5l_alloc_io_unit(log); 282f6bed0efSShaohua Li 283f6bed0efSShaohua Li block = page_address(io->meta_page); 284f6bed0efSShaohua Li block->magic = cpu_to_le32(R5LOG_MAGIC); 285f6bed0efSShaohua Li block->version = R5LOG_VERSION; 286f6bed0efSShaohua Li block->seq = cpu_to_le64(log->seq); 287f6bed0efSShaohua Li block->position = cpu_to_le64(log->log_start); 288f6bed0efSShaohua Li 289f6bed0efSShaohua Li io->log_start = log->log_start; 290f6bed0efSShaohua Li io->meta_offset = sizeof(struct r5l_meta_block); 291f6bed0efSShaohua Li io->seq = log->seq; 292f6bed0efSShaohua Li 293f6bed0efSShaohua Li bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL, BIO_MAX_PAGES); 294f6bed0efSShaohua Li io->current_bio = bio; 295f6bed0efSShaohua Li bio->bi_rw = WRITE; 296f6bed0efSShaohua Li bio->bi_bdev = log->rdev->bdev; 297f6bed0efSShaohua Li bio->bi_iter.bi_sector = log->log_start; 298f6bed0efSShaohua Li bio_add_page(bio, io->meta_page, PAGE_SIZE, 0); 299f6bed0efSShaohua Li bio->bi_end_io = r5l_log_endio; 300f6bed0efSShaohua Li bio->bi_private = io; 301f6bed0efSShaohua Li 302f6bed0efSShaohua Li bio_list_add(&io->bios, bio); 303f6bed0efSShaohua Li atomic_inc(&io->pending_io); 304f6bed0efSShaohua Li 305f6bed0efSShaohua Li log->seq++; 306f6bed0efSShaohua Li log->log_start = r5l_ring_add(log, log->log_start, BLOCK_SECTORS); 307f6bed0efSShaohua Li io->log_end = log->log_start; 308f6bed0efSShaohua Li /* current bio hit disk end */ 309f6bed0efSShaohua Li if (log->log_start == 0) 310f6bed0efSShaohua Li io->current_bio = NULL; 311f6bed0efSShaohua Li 312f6bed0efSShaohua Li spin_lock_irq(&log->io_list_lock); 313f6bed0efSShaohua Li list_add_tail(&io->log_sibling, &log->running_ios); 314f6bed0efSShaohua Li spin_unlock_irq(&log->io_list_lock); 315f6bed0efSShaohua Li 316f6bed0efSShaohua Li return io; 317f6bed0efSShaohua Li } 318f6bed0efSShaohua Li 319f6bed0efSShaohua Li static int r5l_get_meta(struct r5l_log *log, unsigned int payload_size) 320f6bed0efSShaohua Li { 321f6bed0efSShaohua Li struct r5l_io_unit *io; 322f6bed0efSShaohua Li 323f6bed0efSShaohua Li io = log->current_io; 324f6bed0efSShaohua Li if (io && io->meta_offset + payload_size > PAGE_SIZE) 325f6bed0efSShaohua Li r5l_submit_current_io(log); 326f6bed0efSShaohua Li io = log->current_io; 327f6bed0efSShaohua Li if (io) 328f6bed0efSShaohua Li return 0; 329f6bed0efSShaohua Li 330f6bed0efSShaohua Li log->current_io = r5l_new_meta(log); 331f6bed0efSShaohua Li return 0; 332f6bed0efSShaohua Li } 333f6bed0efSShaohua Li 334f6bed0efSShaohua Li static void r5l_append_payload_meta(struct r5l_log *log, u16 type, 335f6bed0efSShaohua Li sector_t location, 336f6bed0efSShaohua Li u32 checksum1, u32 checksum2, 337f6bed0efSShaohua Li bool checksum2_valid) 338f6bed0efSShaohua Li { 339f6bed0efSShaohua Li struct r5l_io_unit *io = log->current_io; 340f6bed0efSShaohua Li struct r5l_payload_data_parity *payload; 341f6bed0efSShaohua Li 342f6bed0efSShaohua Li payload = page_address(io->meta_page) + io->meta_offset; 343f6bed0efSShaohua Li payload->header.type = cpu_to_le16(type); 344f6bed0efSShaohua Li payload->header.flags = cpu_to_le16(0); 345f6bed0efSShaohua Li payload->size = cpu_to_le32((1 + !!checksum2_valid) << 346f6bed0efSShaohua Li (PAGE_SHIFT - 9)); 347f6bed0efSShaohua Li payload->location = cpu_to_le64(location); 348f6bed0efSShaohua Li payload->checksum[0] = cpu_to_le32(checksum1); 349f6bed0efSShaohua Li if (checksum2_valid) 350f6bed0efSShaohua Li payload->checksum[1] = cpu_to_le32(checksum2); 351f6bed0efSShaohua Li 352f6bed0efSShaohua Li io->meta_offset += sizeof(struct r5l_payload_data_parity) + 353f6bed0efSShaohua Li sizeof(__le32) * (1 + !!checksum2_valid); 354f6bed0efSShaohua Li } 355f6bed0efSShaohua Li 356f6bed0efSShaohua Li static void r5l_append_payload_page(struct r5l_log *log, struct page *page) 357f6bed0efSShaohua Li { 358f6bed0efSShaohua Li struct r5l_io_unit *io = log->current_io; 359f6bed0efSShaohua Li 360f6bed0efSShaohua Li alloc_bio: 361f6bed0efSShaohua Li if (!io->current_bio) { 362f6bed0efSShaohua Li struct bio *bio; 363f6bed0efSShaohua Li 364f6bed0efSShaohua Li bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL, BIO_MAX_PAGES); 365f6bed0efSShaohua Li bio->bi_rw = WRITE; 366f6bed0efSShaohua Li bio->bi_bdev = log->rdev->bdev; 367f6bed0efSShaohua Li bio->bi_iter.bi_sector = log->log_start; 368f6bed0efSShaohua Li bio->bi_end_io = r5l_log_endio; 369f6bed0efSShaohua Li bio->bi_private = io; 370f6bed0efSShaohua Li bio_list_add(&io->bios, bio); 371f6bed0efSShaohua Li atomic_inc(&io->pending_io); 372f6bed0efSShaohua Li io->current_bio = bio; 373f6bed0efSShaohua Li } 374f6bed0efSShaohua Li if (!bio_add_page(io->current_bio, page, PAGE_SIZE, 0)) { 375f6bed0efSShaohua Li io->current_bio = NULL; 376f6bed0efSShaohua Li goto alloc_bio; 377f6bed0efSShaohua Li } 378f6bed0efSShaohua Li log->log_start = r5l_ring_add(log, log->log_start, 379f6bed0efSShaohua Li BLOCK_SECTORS); 380f6bed0efSShaohua Li /* current bio hit disk end */ 381f6bed0efSShaohua Li if (log->log_start == 0) 382f6bed0efSShaohua Li io->current_bio = NULL; 383f6bed0efSShaohua Li 384f6bed0efSShaohua Li io->log_end = log->log_start; 385f6bed0efSShaohua Li } 386f6bed0efSShaohua Li 387f6bed0efSShaohua Li static void r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh, 388f6bed0efSShaohua Li int data_pages, int parity_pages) 389f6bed0efSShaohua Li { 390f6bed0efSShaohua Li int i; 391f6bed0efSShaohua Li int meta_size; 392f6bed0efSShaohua Li struct r5l_io_unit *io; 393f6bed0efSShaohua Li 394f6bed0efSShaohua Li meta_size = 395f6bed0efSShaohua Li ((sizeof(struct r5l_payload_data_parity) + sizeof(__le32)) 396f6bed0efSShaohua Li * data_pages) + 397f6bed0efSShaohua Li sizeof(struct r5l_payload_data_parity) + 398f6bed0efSShaohua Li sizeof(__le32) * parity_pages; 399f6bed0efSShaohua Li 400f6bed0efSShaohua Li r5l_get_meta(log, meta_size); 401f6bed0efSShaohua Li io = log->current_io; 402f6bed0efSShaohua Li 403f6bed0efSShaohua Li for (i = 0; i < sh->disks; i++) { 404f6bed0efSShaohua Li if (!test_bit(R5_Wantwrite, &sh->dev[i].flags)) 405f6bed0efSShaohua Li continue; 406f6bed0efSShaohua Li if (i == sh->pd_idx || i == sh->qd_idx) 407f6bed0efSShaohua Li continue; 408f6bed0efSShaohua Li r5l_append_payload_meta(log, R5LOG_PAYLOAD_DATA, 409f6bed0efSShaohua Li raid5_compute_blocknr(sh, i, 0), 410f6bed0efSShaohua Li sh->dev[i].log_checksum, 0, false); 411f6bed0efSShaohua Li r5l_append_payload_page(log, sh->dev[i].page); 412f6bed0efSShaohua Li } 413f6bed0efSShaohua Li 414f6bed0efSShaohua Li if (sh->qd_idx >= 0) { 415f6bed0efSShaohua Li r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, 416f6bed0efSShaohua Li sh->sector, sh->dev[sh->pd_idx].log_checksum, 417f6bed0efSShaohua Li sh->dev[sh->qd_idx].log_checksum, true); 418f6bed0efSShaohua Li r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); 419f6bed0efSShaohua Li r5l_append_payload_page(log, sh->dev[sh->qd_idx].page); 420f6bed0efSShaohua Li } else { 421f6bed0efSShaohua Li r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, 422f6bed0efSShaohua Li sh->sector, sh->dev[sh->pd_idx].log_checksum, 423f6bed0efSShaohua Li 0, false); 424f6bed0efSShaohua Li r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); 425f6bed0efSShaohua Li } 426f6bed0efSShaohua Li 427f6bed0efSShaohua Li list_add_tail(&sh->log_list, &io->stripe_list); 428f6bed0efSShaohua Li atomic_inc(&io->pending_stripe); 429f6bed0efSShaohua Li sh->log_io = io; 430f6bed0efSShaohua Li } 431f6bed0efSShaohua Li 432509ffec7SChristoph Hellwig static void r5l_wake_reclaim(struct r5l_log *log, sector_t space); 433f6bed0efSShaohua Li /* 434f6bed0efSShaohua Li * running in raid5d, where reclaim could wait for raid5d too (when it flushes 435f6bed0efSShaohua Li * data from log to raid disks), so we shouldn't wait for reclaim here 436f6bed0efSShaohua Li */ 437f6bed0efSShaohua Li int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh) 438f6bed0efSShaohua Li { 439f6bed0efSShaohua Li int write_disks = 0; 440f6bed0efSShaohua Li int data_pages, parity_pages; 441f6bed0efSShaohua Li int meta_size; 442f6bed0efSShaohua Li int reserve; 443f6bed0efSShaohua Li int i; 444f6bed0efSShaohua Li 445f6bed0efSShaohua Li if (!log) 446f6bed0efSShaohua Li return -EAGAIN; 447f6bed0efSShaohua Li /* Don't support stripe batch */ 448f6bed0efSShaohua Li if (sh->log_io || !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) || 449f6bed0efSShaohua Li test_bit(STRIPE_SYNCING, &sh->state)) { 450f6bed0efSShaohua Li /* the stripe is written to log, we start writing it to raid */ 451f6bed0efSShaohua Li clear_bit(STRIPE_LOG_TRAPPED, &sh->state); 452f6bed0efSShaohua Li return -EAGAIN; 453f6bed0efSShaohua Li } 454f6bed0efSShaohua Li 455f6bed0efSShaohua Li for (i = 0; i < sh->disks; i++) { 456f6bed0efSShaohua Li void *addr; 457f6bed0efSShaohua Li 458f6bed0efSShaohua Li if (!test_bit(R5_Wantwrite, &sh->dev[i].flags)) 459f6bed0efSShaohua Li continue; 460f6bed0efSShaohua Li write_disks++; 461f6bed0efSShaohua Li /* checksum is already calculated in last run */ 462f6bed0efSShaohua Li if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) 463f6bed0efSShaohua Li continue; 464f6bed0efSShaohua Li addr = kmap_atomic(sh->dev[i].page); 4655cb2fbd6SShaohua Li sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum, 466f6bed0efSShaohua Li addr, PAGE_SIZE); 467f6bed0efSShaohua Li kunmap_atomic(addr); 468f6bed0efSShaohua Li } 469f6bed0efSShaohua Li parity_pages = 1 + !!(sh->qd_idx >= 0); 470f6bed0efSShaohua Li data_pages = write_disks - parity_pages; 471f6bed0efSShaohua Li 472f6bed0efSShaohua Li meta_size = 473f6bed0efSShaohua Li ((sizeof(struct r5l_payload_data_parity) + sizeof(__le32)) 474f6bed0efSShaohua Li * data_pages) + 475f6bed0efSShaohua Li sizeof(struct r5l_payload_data_parity) + 476f6bed0efSShaohua Li sizeof(__le32) * parity_pages; 477f6bed0efSShaohua Li /* Doesn't work with very big raid array */ 478f6bed0efSShaohua Li if (meta_size + sizeof(struct r5l_meta_block) > PAGE_SIZE) 479f6bed0efSShaohua Li return -EINVAL; 480f6bed0efSShaohua Li 481f6bed0efSShaohua Li set_bit(STRIPE_LOG_TRAPPED, &sh->state); 482f6bed0efSShaohua Li atomic_inc(&sh->count); 483f6bed0efSShaohua Li 484f6bed0efSShaohua Li mutex_lock(&log->io_mutex); 485f6bed0efSShaohua Li /* meta + data */ 486f6bed0efSShaohua Li reserve = (1 + write_disks) << (PAGE_SHIFT - 9); 487f6bed0efSShaohua Li if (r5l_has_free_space(log, reserve)) 488f6bed0efSShaohua Li r5l_log_stripe(log, sh, data_pages, parity_pages); 489f6bed0efSShaohua Li else { 490f6bed0efSShaohua Li spin_lock(&log->no_space_stripes_lock); 491f6bed0efSShaohua Li list_add_tail(&sh->log_list, &log->no_space_stripes); 492f6bed0efSShaohua Li spin_unlock(&log->no_space_stripes_lock); 493f6bed0efSShaohua Li 494f6bed0efSShaohua Li r5l_wake_reclaim(log, reserve); 495f6bed0efSShaohua Li } 496f6bed0efSShaohua Li mutex_unlock(&log->io_mutex); 497f6bed0efSShaohua Li 498f6bed0efSShaohua Li return 0; 499f6bed0efSShaohua Li } 500f6bed0efSShaohua Li 501f6bed0efSShaohua Li void r5l_write_stripe_run(struct r5l_log *log) 502f6bed0efSShaohua Li { 503f6bed0efSShaohua Li if (!log) 504f6bed0efSShaohua Li return; 505f6bed0efSShaohua Li mutex_lock(&log->io_mutex); 506f6bed0efSShaohua Li r5l_submit_current_io(log); 507f6bed0efSShaohua Li mutex_unlock(&log->io_mutex); 508f6bed0efSShaohua Li } 509f6bed0efSShaohua Li 510828cbe98SShaohua Li int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio) 511828cbe98SShaohua Li { 512828cbe98SShaohua Li if (!log) 513828cbe98SShaohua Li return -ENODEV; 514828cbe98SShaohua Li /* 515828cbe98SShaohua Li * we flush log disk cache first, then write stripe data to raid disks. 516828cbe98SShaohua Li * So if bio is finished, the log disk cache is flushed already. The 517828cbe98SShaohua Li * recovery guarantees we can recovery the bio from log disk, so we 518828cbe98SShaohua Li * don't need to flush again 519828cbe98SShaohua Li */ 520828cbe98SShaohua Li if (bio->bi_iter.bi_size == 0) { 521828cbe98SShaohua Li bio_endio(bio); 522828cbe98SShaohua Li return 0; 523828cbe98SShaohua Li } 524828cbe98SShaohua Li bio->bi_rw &= ~REQ_FLUSH; 525828cbe98SShaohua Li return -EAGAIN; 526828cbe98SShaohua Li } 527828cbe98SShaohua Li 528f6bed0efSShaohua Li /* This will run after log space is reclaimed */ 529f6bed0efSShaohua Li static void r5l_run_no_space_stripes(struct r5l_log *log) 530f6bed0efSShaohua Li { 531f6bed0efSShaohua Li struct stripe_head *sh; 532f6bed0efSShaohua Li 533f6bed0efSShaohua Li spin_lock(&log->no_space_stripes_lock); 534f6bed0efSShaohua Li while (!list_empty(&log->no_space_stripes)) { 535f6bed0efSShaohua Li sh = list_first_entry(&log->no_space_stripes, 536f6bed0efSShaohua Li struct stripe_head, log_list); 537f6bed0efSShaohua Li list_del_init(&sh->log_list); 538f6bed0efSShaohua Li set_bit(STRIPE_HANDLE, &sh->state); 539f6bed0efSShaohua Li raid5_release_stripe(sh); 540f6bed0efSShaohua Li } 541f6bed0efSShaohua Li spin_unlock(&log->no_space_stripes_lock); 542f6bed0efSShaohua Li } 543f6bed0efSShaohua Li 544509ffec7SChristoph Hellwig static void __r5l_stripe_write_finished(struct r5l_io_unit *io) 545509ffec7SChristoph Hellwig { 546509ffec7SChristoph Hellwig struct r5l_log *log = io->log; 547509ffec7SChristoph Hellwig struct r5l_io_unit *last; 548509ffec7SChristoph Hellwig sector_t reclaimable_space; 549509ffec7SChristoph Hellwig unsigned long flags; 550509ffec7SChristoph Hellwig 551509ffec7SChristoph Hellwig spin_lock_irqsave(&log->io_list_lock, flags); 552509ffec7SChristoph Hellwig __r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END); 553*85f2f9a4SShaohua Li /* might move 0 entry */ 554509ffec7SChristoph Hellwig r5l_move_io_unit_list(&log->flushed_ios, &log->stripe_end_ios, 555509ffec7SChristoph Hellwig IO_UNIT_STRIPE_END); 556*85f2f9a4SShaohua Li if (list_empty(&log->stripe_end_ios)) { 557*85f2f9a4SShaohua Li spin_unlock_irqrestore(&log->io_list_lock, flags); 558*85f2f9a4SShaohua Li return; 559*85f2f9a4SShaohua Li } 560509ffec7SChristoph Hellwig 561509ffec7SChristoph Hellwig last = list_last_entry(&log->stripe_end_ios, 562509ffec7SChristoph Hellwig struct r5l_io_unit, log_sibling); 563509ffec7SChristoph Hellwig reclaimable_space = r5l_ring_distance(log, log->last_checkpoint, 564509ffec7SChristoph Hellwig last->log_end); 565509ffec7SChristoph Hellwig if (reclaimable_space >= log->max_free_space) 566509ffec7SChristoph Hellwig r5l_wake_reclaim(log, 0); 567509ffec7SChristoph Hellwig 568509ffec7SChristoph Hellwig r5l_compress_stripe_end_list(log); 569509ffec7SChristoph Hellwig spin_unlock_irqrestore(&log->io_list_lock, flags); 570509ffec7SChristoph Hellwig wake_up(&log->iounit_wait); 571509ffec7SChristoph Hellwig } 572509ffec7SChristoph Hellwig 5730576b1c6SShaohua Li void r5l_stripe_write_finished(struct stripe_head *sh) 5740576b1c6SShaohua Li { 5750576b1c6SShaohua Li struct r5l_io_unit *io; 5760576b1c6SShaohua Li 5770576b1c6SShaohua Li io = sh->log_io; 5780576b1c6SShaohua Li sh->log_io = NULL; 5790576b1c6SShaohua Li 580509ffec7SChristoph Hellwig if (io && atomic_dec_and_test(&io->pending_stripe)) 581509ffec7SChristoph Hellwig __r5l_stripe_write_finished(io); 5820576b1c6SShaohua Li } 5830576b1c6SShaohua Li 584a8c34f91SShaohua Li static void r5l_log_flush_endio(struct bio *bio) 585a8c34f91SShaohua Li { 586a8c34f91SShaohua Li struct r5l_log *log = container_of(bio, struct r5l_log, 587a8c34f91SShaohua Li flush_bio); 588a8c34f91SShaohua Li unsigned long flags; 589a8c34f91SShaohua Li struct r5l_io_unit *io; 590a8c34f91SShaohua Li struct stripe_head *sh; 591a8c34f91SShaohua Li 592a8c34f91SShaohua Li spin_lock_irqsave(&log->io_list_lock, flags); 593a8c34f91SShaohua Li list_for_each_entry(io, &log->flushing_ios, log_sibling) { 594a8c34f91SShaohua Li while (!list_empty(&io->stripe_list)) { 595a8c34f91SShaohua Li sh = list_first_entry(&io->stripe_list, 596a8c34f91SShaohua Li struct stripe_head, log_list); 597a8c34f91SShaohua Li list_del_init(&sh->log_list); 598a8c34f91SShaohua Li set_bit(STRIPE_HANDLE, &sh->state); 599a8c34f91SShaohua Li raid5_release_stripe(sh); 600a8c34f91SShaohua Li } 601a8c34f91SShaohua Li } 602a8c34f91SShaohua Li list_splice_tail_init(&log->flushing_ios, &log->flushed_ios); 603a8c34f91SShaohua Li spin_unlock_irqrestore(&log->io_list_lock, flags); 604a8c34f91SShaohua Li } 605a8c34f91SShaohua Li 6060576b1c6SShaohua Li /* 6070576b1c6SShaohua Li * Starting dispatch IO to raid. 6080576b1c6SShaohua Li * io_unit(meta) consists of a log. There is one situation we want to avoid. A 6090576b1c6SShaohua Li * broken meta in the middle of a log causes recovery can't find meta at the 6100576b1c6SShaohua Li * head of log. If operations require meta at the head persistent in log, we 6110576b1c6SShaohua Li * must make sure meta before it persistent in log too. A case is: 6120576b1c6SShaohua Li * 6130576b1c6SShaohua Li * stripe data/parity is in log, we start write stripe to raid disks. stripe 6140576b1c6SShaohua Li * data/parity must be persistent in log before we do the write to raid disks. 6150576b1c6SShaohua Li * 6160576b1c6SShaohua Li * The solution is we restrictly maintain io_unit list order. In this case, we 6170576b1c6SShaohua Li * only write stripes of an io_unit to raid disks till the io_unit is the first 6180576b1c6SShaohua Li * one whose data/parity is in log. 6190576b1c6SShaohua Li */ 6200576b1c6SShaohua Li void r5l_flush_stripe_to_raid(struct r5l_log *log) 6210576b1c6SShaohua Li { 622a8c34f91SShaohua Li bool do_flush; 6230576b1c6SShaohua Li if (!log) 6240576b1c6SShaohua Li return; 6250576b1c6SShaohua Li 626a8c34f91SShaohua Li spin_lock_irq(&log->io_list_lock); 627a8c34f91SShaohua Li /* flush bio is running */ 628a8c34f91SShaohua Li if (!list_empty(&log->flushing_ios)) { 629a8c34f91SShaohua Li spin_unlock_irq(&log->io_list_lock); 6300576b1c6SShaohua Li return; 6310576b1c6SShaohua Li } 632a8c34f91SShaohua Li list_splice_tail_init(&log->io_end_ios, &log->flushing_ios); 633a8c34f91SShaohua Li do_flush = !list_empty(&log->flushing_ios); 6340576b1c6SShaohua Li spin_unlock_irq(&log->io_list_lock); 635a8c34f91SShaohua Li 636a8c34f91SShaohua Li if (!do_flush) 637a8c34f91SShaohua Li return; 638a8c34f91SShaohua Li bio_reset(&log->flush_bio); 639a8c34f91SShaohua Li log->flush_bio.bi_bdev = log->rdev->bdev; 640a8c34f91SShaohua Li log->flush_bio.bi_end_io = r5l_log_flush_endio; 641a8c34f91SShaohua Li submit_bio(WRITE_FLUSH, &log->flush_bio); 6420576b1c6SShaohua Li } 6430576b1c6SShaohua Li 6440fd22b45SShaohua Li static void r5l_kick_io_unit(struct r5l_log *log) 6450576b1c6SShaohua Li { 646a8c34f91SShaohua Li md_wakeup_thread(log->rdev->mddev->thread); 6470fd22b45SShaohua Li wait_event_lock_irq(log->iounit_wait, !list_empty(&log->stripe_end_ios), 6480fd22b45SShaohua Li log->io_list_lock); 6490576b1c6SShaohua Li } 6500576b1c6SShaohua Li 6510576b1c6SShaohua Li static void r5l_write_super(struct r5l_log *log, sector_t cp); 6520576b1c6SShaohua Li static void r5l_do_reclaim(struct r5l_log *log) 6530576b1c6SShaohua Li { 6540576b1c6SShaohua Li struct r5l_io_unit *io, *last; 6550576b1c6SShaohua Li LIST_HEAD(list); 6560576b1c6SShaohua Li sector_t free = 0; 6570576b1c6SShaohua Li sector_t reclaim_target = xchg(&log->reclaim_target, 0); 6580576b1c6SShaohua Li 6590576b1c6SShaohua Li spin_lock_irq(&log->io_list_lock); 6600576b1c6SShaohua Li /* 6610576b1c6SShaohua Li * move proper io_unit to reclaim list. We should not change the order. 6620576b1c6SShaohua Li * reclaimable/unreclaimable io_unit can be mixed in the list, we 6630576b1c6SShaohua Li * shouldn't reuse space of an unreclaimable io_unit 6640576b1c6SShaohua Li */ 6650576b1c6SShaohua Li while (1) { 666a8c34f91SShaohua Li struct list_head *target_list = NULL; 667a8c34f91SShaohua Li 6680576b1c6SShaohua Li while (!list_empty(&log->stripe_end_ios)) { 6690576b1c6SShaohua Li io = list_first_entry(&log->stripe_end_ios, 6700576b1c6SShaohua Li struct r5l_io_unit, log_sibling); 6710576b1c6SShaohua Li list_move_tail(&io->log_sibling, &list); 6720576b1c6SShaohua Li free += r5l_ring_distance(log, io->log_start, 6730576b1c6SShaohua Li io->log_end); 6740576b1c6SShaohua Li } 6750576b1c6SShaohua Li 6760576b1c6SShaohua Li if (free >= reclaim_target || 6770576b1c6SShaohua Li (list_empty(&log->running_ios) && 6780576b1c6SShaohua Li list_empty(&log->io_end_ios) && 679a8c34f91SShaohua Li list_empty(&log->flushing_ios) && 680a8c34f91SShaohua Li list_empty(&log->flushed_ios))) 6810576b1c6SShaohua Li break; 6820576b1c6SShaohua Li 6830576b1c6SShaohua Li /* Below waiting mostly happens when we shutdown the raid */ 684a8c34f91SShaohua Li if (!list_empty(&log->flushed_ios)) 685a8c34f91SShaohua Li target_list = &log->flushed_ios; 686a8c34f91SShaohua Li else if (!list_empty(&log->flushing_ios)) 687a8c34f91SShaohua Li target_list = &log->flushing_ios; 688a8c34f91SShaohua Li else if (!list_empty(&log->io_end_ios)) 689a8c34f91SShaohua Li target_list = &log->io_end_ios; 690a8c34f91SShaohua Li else if (!list_empty(&log->running_ios)) 691a8c34f91SShaohua Li target_list = &log->running_ios; 6920576b1c6SShaohua Li 6930fd22b45SShaohua Li r5l_kick_io_unit(log); 6940576b1c6SShaohua Li } 6950576b1c6SShaohua Li spin_unlock_irq(&log->io_list_lock); 6960576b1c6SShaohua Li 6970576b1c6SShaohua Li if (list_empty(&list)) 6980576b1c6SShaohua Li return; 6990576b1c6SShaohua Li 7000576b1c6SShaohua Li /* super always point to last valid meta */ 7010576b1c6SShaohua Li last = list_last_entry(&list, struct r5l_io_unit, log_sibling); 7020576b1c6SShaohua Li /* 7030576b1c6SShaohua Li * write_super will flush cache of each raid disk. We must write super 7040576b1c6SShaohua Li * here, because the log area might be reused soon and we don't want to 7050576b1c6SShaohua Li * confuse recovery 7060576b1c6SShaohua Li */ 7070576b1c6SShaohua Li r5l_write_super(log, last->log_start); 7080576b1c6SShaohua Li 7090576b1c6SShaohua Li mutex_lock(&log->io_mutex); 7100576b1c6SShaohua Li log->last_checkpoint = last->log_start; 7110576b1c6SShaohua Li log->last_cp_seq = last->seq; 7120576b1c6SShaohua Li mutex_unlock(&log->io_mutex); 7130576b1c6SShaohua Li r5l_run_no_space_stripes(log); 7140576b1c6SShaohua Li 7150576b1c6SShaohua Li while (!list_empty(&list)) { 7160576b1c6SShaohua Li io = list_first_entry(&list, struct r5l_io_unit, log_sibling); 7170576b1c6SShaohua Li list_del(&io->log_sibling); 7180576b1c6SShaohua Li r5l_free_io_unit(log, io); 7190576b1c6SShaohua Li } 7200576b1c6SShaohua Li } 7210576b1c6SShaohua Li 7220576b1c6SShaohua Li static void r5l_reclaim_thread(struct md_thread *thread) 7230576b1c6SShaohua Li { 7240576b1c6SShaohua Li struct mddev *mddev = thread->mddev; 7250576b1c6SShaohua Li struct r5conf *conf = mddev->private; 7260576b1c6SShaohua Li struct r5l_log *log = conf->log; 7270576b1c6SShaohua Li 7280576b1c6SShaohua Li if (!log) 7290576b1c6SShaohua Li return; 7300576b1c6SShaohua Li r5l_do_reclaim(log); 7310576b1c6SShaohua Li } 7320576b1c6SShaohua Li 733f6bed0efSShaohua Li static void r5l_wake_reclaim(struct r5l_log *log, sector_t space) 734f6bed0efSShaohua Li { 7350576b1c6SShaohua Li unsigned long target; 7360576b1c6SShaohua Li unsigned long new = (unsigned long)space; /* overflow in theory */ 7370576b1c6SShaohua Li 7380576b1c6SShaohua Li do { 7390576b1c6SShaohua Li target = log->reclaim_target; 7400576b1c6SShaohua Li if (new < target) 7410576b1c6SShaohua Li return; 7420576b1c6SShaohua Li } while (cmpxchg(&log->reclaim_target, target, new) != target); 7430576b1c6SShaohua Li md_wakeup_thread(log->reclaim_thread); 744f6bed0efSShaohua Li } 745f6bed0efSShaohua Li 746355810d1SShaohua Li struct r5l_recovery_ctx { 747355810d1SShaohua Li struct page *meta_page; /* current meta */ 748355810d1SShaohua Li sector_t meta_total_blocks; /* total size of current meta and data */ 749355810d1SShaohua Li sector_t pos; /* recovery position */ 750355810d1SShaohua Li u64 seq; /* recovery position seq */ 751355810d1SShaohua Li }; 752355810d1SShaohua Li 753355810d1SShaohua Li static int r5l_read_meta_block(struct r5l_log *log, 754355810d1SShaohua Li struct r5l_recovery_ctx *ctx) 755355810d1SShaohua Li { 756355810d1SShaohua Li struct page *page = ctx->meta_page; 757355810d1SShaohua Li struct r5l_meta_block *mb; 758355810d1SShaohua Li u32 crc, stored_crc; 759355810d1SShaohua Li 760355810d1SShaohua Li if (!sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, READ, false)) 761355810d1SShaohua Li return -EIO; 762355810d1SShaohua Li 763355810d1SShaohua Li mb = page_address(page); 764355810d1SShaohua Li stored_crc = le32_to_cpu(mb->checksum); 765355810d1SShaohua Li mb->checksum = 0; 766355810d1SShaohua Li 767355810d1SShaohua Li if (le32_to_cpu(mb->magic) != R5LOG_MAGIC || 768355810d1SShaohua Li le64_to_cpu(mb->seq) != ctx->seq || 769355810d1SShaohua Li mb->version != R5LOG_VERSION || 770355810d1SShaohua Li le64_to_cpu(mb->position) != ctx->pos) 771355810d1SShaohua Li return -EINVAL; 772355810d1SShaohua Li 7735cb2fbd6SShaohua Li crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE); 774355810d1SShaohua Li if (stored_crc != crc) 775355810d1SShaohua Li return -EINVAL; 776355810d1SShaohua Li 777355810d1SShaohua Li if (le32_to_cpu(mb->meta_size) > PAGE_SIZE) 778355810d1SShaohua Li return -EINVAL; 779355810d1SShaohua Li 780355810d1SShaohua Li ctx->meta_total_blocks = BLOCK_SECTORS; 781355810d1SShaohua Li 782355810d1SShaohua Li return 0; 783355810d1SShaohua Li } 784355810d1SShaohua Li 785355810d1SShaohua Li static int r5l_recovery_flush_one_stripe(struct r5l_log *log, 786355810d1SShaohua Li struct r5l_recovery_ctx *ctx, 787355810d1SShaohua Li sector_t stripe_sect, 788355810d1SShaohua Li int *offset, sector_t *log_offset) 789355810d1SShaohua Li { 790355810d1SShaohua Li struct r5conf *conf = log->rdev->mddev->private; 791355810d1SShaohua Li struct stripe_head *sh; 792355810d1SShaohua Li struct r5l_payload_data_parity *payload; 793355810d1SShaohua Li int disk_index; 794355810d1SShaohua Li 795355810d1SShaohua Li sh = raid5_get_active_stripe(conf, stripe_sect, 0, 0, 0); 796355810d1SShaohua Li while (1) { 797355810d1SShaohua Li payload = page_address(ctx->meta_page) + *offset; 798355810d1SShaohua Li 799355810d1SShaohua Li if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) { 800355810d1SShaohua Li raid5_compute_sector(conf, 801355810d1SShaohua Li le64_to_cpu(payload->location), 0, 802355810d1SShaohua Li &disk_index, sh); 803355810d1SShaohua Li 804355810d1SShaohua Li sync_page_io(log->rdev, *log_offset, PAGE_SIZE, 805355810d1SShaohua Li sh->dev[disk_index].page, READ, false); 806355810d1SShaohua Li sh->dev[disk_index].log_checksum = 807355810d1SShaohua Li le32_to_cpu(payload->checksum[0]); 808355810d1SShaohua Li set_bit(R5_Wantwrite, &sh->dev[disk_index].flags); 809355810d1SShaohua Li ctx->meta_total_blocks += BLOCK_SECTORS; 810355810d1SShaohua Li } else { 811355810d1SShaohua Li disk_index = sh->pd_idx; 812355810d1SShaohua Li sync_page_io(log->rdev, *log_offset, PAGE_SIZE, 813355810d1SShaohua Li sh->dev[disk_index].page, READ, false); 814355810d1SShaohua Li sh->dev[disk_index].log_checksum = 815355810d1SShaohua Li le32_to_cpu(payload->checksum[0]); 816355810d1SShaohua Li set_bit(R5_Wantwrite, &sh->dev[disk_index].flags); 817355810d1SShaohua Li 818355810d1SShaohua Li if (sh->qd_idx >= 0) { 819355810d1SShaohua Li disk_index = sh->qd_idx; 820355810d1SShaohua Li sync_page_io(log->rdev, 821355810d1SShaohua Li r5l_ring_add(log, *log_offset, BLOCK_SECTORS), 822355810d1SShaohua Li PAGE_SIZE, sh->dev[disk_index].page, 823355810d1SShaohua Li READ, false); 824355810d1SShaohua Li sh->dev[disk_index].log_checksum = 825355810d1SShaohua Li le32_to_cpu(payload->checksum[1]); 826355810d1SShaohua Li set_bit(R5_Wantwrite, 827355810d1SShaohua Li &sh->dev[disk_index].flags); 828355810d1SShaohua Li } 829355810d1SShaohua Li ctx->meta_total_blocks += BLOCK_SECTORS * conf->max_degraded; 830355810d1SShaohua Li } 831355810d1SShaohua Li 832355810d1SShaohua Li *log_offset = r5l_ring_add(log, *log_offset, 833355810d1SShaohua Li le32_to_cpu(payload->size)); 834355810d1SShaohua Li *offset += sizeof(struct r5l_payload_data_parity) + 835355810d1SShaohua Li sizeof(__le32) * 836355810d1SShaohua Li (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9)); 837355810d1SShaohua Li if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY) 838355810d1SShaohua Li break; 839355810d1SShaohua Li } 840355810d1SShaohua Li 841355810d1SShaohua Li for (disk_index = 0; disk_index < sh->disks; disk_index++) { 842355810d1SShaohua Li void *addr; 843355810d1SShaohua Li u32 checksum; 844355810d1SShaohua Li 845355810d1SShaohua Li if (!test_bit(R5_Wantwrite, &sh->dev[disk_index].flags)) 846355810d1SShaohua Li continue; 847355810d1SShaohua Li addr = kmap_atomic(sh->dev[disk_index].page); 8485cb2fbd6SShaohua Li checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE); 849355810d1SShaohua Li kunmap_atomic(addr); 850355810d1SShaohua Li if (checksum != sh->dev[disk_index].log_checksum) 851355810d1SShaohua Li goto error; 852355810d1SShaohua Li } 853355810d1SShaohua Li 854355810d1SShaohua Li for (disk_index = 0; disk_index < sh->disks; disk_index++) { 855355810d1SShaohua Li struct md_rdev *rdev, *rrdev; 856355810d1SShaohua Li 857355810d1SShaohua Li if (!test_and_clear_bit(R5_Wantwrite, 858355810d1SShaohua Li &sh->dev[disk_index].flags)) 859355810d1SShaohua Li continue; 860355810d1SShaohua Li 861355810d1SShaohua Li /* in case device is broken */ 862355810d1SShaohua Li rdev = rcu_dereference(conf->disks[disk_index].rdev); 863355810d1SShaohua Li if (rdev) 864355810d1SShaohua Li sync_page_io(rdev, stripe_sect, PAGE_SIZE, 865355810d1SShaohua Li sh->dev[disk_index].page, WRITE, false); 866355810d1SShaohua Li rrdev = rcu_dereference(conf->disks[disk_index].replacement); 867355810d1SShaohua Li if (rrdev) 868355810d1SShaohua Li sync_page_io(rrdev, stripe_sect, PAGE_SIZE, 869355810d1SShaohua Li sh->dev[disk_index].page, WRITE, false); 870355810d1SShaohua Li } 871355810d1SShaohua Li raid5_release_stripe(sh); 872355810d1SShaohua Li return 0; 873355810d1SShaohua Li 874355810d1SShaohua Li error: 875355810d1SShaohua Li for (disk_index = 0; disk_index < sh->disks; disk_index++) 876355810d1SShaohua Li sh->dev[disk_index].flags = 0; 877355810d1SShaohua Li raid5_release_stripe(sh); 878355810d1SShaohua Li return -EINVAL; 879355810d1SShaohua Li } 880355810d1SShaohua Li 881355810d1SShaohua Li static int r5l_recovery_flush_one_meta(struct r5l_log *log, 882355810d1SShaohua Li struct r5l_recovery_ctx *ctx) 883355810d1SShaohua Li { 884355810d1SShaohua Li struct r5conf *conf = log->rdev->mddev->private; 885355810d1SShaohua Li struct r5l_payload_data_parity *payload; 886355810d1SShaohua Li struct r5l_meta_block *mb; 887355810d1SShaohua Li int offset; 888355810d1SShaohua Li sector_t log_offset; 889355810d1SShaohua Li sector_t stripe_sector; 890355810d1SShaohua Li 891355810d1SShaohua Li mb = page_address(ctx->meta_page); 892355810d1SShaohua Li offset = sizeof(struct r5l_meta_block); 893355810d1SShaohua Li log_offset = r5l_ring_add(log, ctx->pos, BLOCK_SECTORS); 894355810d1SShaohua Li 895355810d1SShaohua Li while (offset < le32_to_cpu(mb->meta_size)) { 896355810d1SShaohua Li int dd; 897355810d1SShaohua Li 898355810d1SShaohua Li payload = (void *)mb + offset; 899355810d1SShaohua Li stripe_sector = raid5_compute_sector(conf, 900355810d1SShaohua Li le64_to_cpu(payload->location), 0, &dd, NULL); 901355810d1SShaohua Li if (r5l_recovery_flush_one_stripe(log, ctx, stripe_sector, 902355810d1SShaohua Li &offset, &log_offset)) 903355810d1SShaohua Li return -EINVAL; 904355810d1SShaohua Li } 905355810d1SShaohua Li return 0; 906355810d1SShaohua Li } 907355810d1SShaohua Li 908355810d1SShaohua Li /* copy data/parity from log to raid disks */ 909355810d1SShaohua Li static void r5l_recovery_flush_log(struct r5l_log *log, 910355810d1SShaohua Li struct r5l_recovery_ctx *ctx) 911355810d1SShaohua Li { 912355810d1SShaohua Li while (1) { 913355810d1SShaohua Li if (r5l_read_meta_block(log, ctx)) 914355810d1SShaohua Li return; 915355810d1SShaohua Li if (r5l_recovery_flush_one_meta(log, ctx)) 916355810d1SShaohua Li return; 917355810d1SShaohua Li ctx->seq++; 918355810d1SShaohua Li ctx->pos = r5l_ring_add(log, ctx->pos, ctx->meta_total_blocks); 919355810d1SShaohua Li } 920355810d1SShaohua Li } 921355810d1SShaohua Li 922355810d1SShaohua Li static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, 923355810d1SShaohua Li u64 seq) 924355810d1SShaohua Li { 925355810d1SShaohua Li struct page *page; 926355810d1SShaohua Li struct r5l_meta_block *mb; 927355810d1SShaohua Li u32 crc; 928355810d1SShaohua Li 929355810d1SShaohua Li page = alloc_page(GFP_KERNEL | __GFP_ZERO); 930355810d1SShaohua Li if (!page) 931355810d1SShaohua Li return -ENOMEM; 932355810d1SShaohua Li mb = page_address(page); 933355810d1SShaohua Li mb->magic = cpu_to_le32(R5LOG_MAGIC); 934355810d1SShaohua Li mb->version = R5LOG_VERSION; 935355810d1SShaohua Li mb->meta_size = cpu_to_le32(sizeof(struct r5l_meta_block)); 936355810d1SShaohua Li mb->seq = cpu_to_le64(seq); 937355810d1SShaohua Li mb->position = cpu_to_le64(pos); 9385cb2fbd6SShaohua Li crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE); 939355810d1SShaohua Li mb->checksum = cpu_to_le32(crc); 940355810d1SShaohua Li 941355810d1SShaohua Li if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, WRITE_FUA, false)) { 942355810d1SShaohua Li __free_page(page); 943355810d1SShaohua Li return -EIO; 944355810d1SShaohua Li } 945355810d1SShaohua Li __free_page(page); 946355810d1SShaohua Li return 0; 947355810d1SShaohua Li } 948355810d1SShaohua Li 949f6bed0efSShaohua Li static int r5l_recovery_log(struct r5l_log *log) 950f6bed0efSShaohua Li { 951355810d1SShaohua Li struct r5l_recovery_ctx ctx; 952355810d1SShaohua Li 953355810d1SShaohua Li ctx.pos = log->last_checkpoint; 954355810d1SShaohua Li ctx.seq = log->last_cp_seq; 955355810d1SShaohua Li ctx.meta_page = alloc_page(GFP_KERNEL); 956355810d1SShaohua Li if (!ctx.meta_page) 957355810d1SShaohua Li return -ENOMEM; 958355810d1SShaohua Li 959355810d1SShaohua Li r5l_recovery_flush_log(log, &ctx); 960355810d1SShaohua Li __free_page(ctx.meta_page); 961355810d1SShaohua Li 962355810d1SShaohua Li /* 963355810d1SShaohua Li * we did a recovery. Now ctx.pos points to an invalid meta block. New 964355810d1SShaohua Li * log will start here. but we can't let superblock point to last valid 965355810d1SShaohua Li * meta block. The log might looks like: 966355810d1SShaohua Li * | meta 1| meta 2| meta 3| 967355810d1SShaohua Li * meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If 968355810d1SShaohua Li * superblock points to meta 1, we write a new valid meta 2n. if crash 969355810d1SShaohua Li * happens again, new recovery will start from meta 1. Since meta 2n is 970355810d1SShaohua Li * valid now, recovery will think meta 3 is valid, which is wrong. 971355810d1SShaohua Li * The solution is we create a new meta in meta2 with its seq == meta 972355810d1SShaohua Li * 1's seq + 10 and let superblock points to meta2. The same recovery will 973355810d1SShaohua Li * not think meta 3 is a valid meta, because its seq doesn't match 974355810d1SShaohua Li */ 975355810d1SShaohua Li if (ctx.seq > log->last_cp_seq + 1) { 976355810d1SShaohua Li int ret; 977355810d1SShaohua Li 978355810d1SShaohua Li ret = r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq + 10); 979355810d1SShaohua Li if (ret) 980355810d1SShaohua Li return ret; 981355810d1SShaohua Li log->seq = ctx.seq + 11; 982355810d1SShaohua Li log->log_start = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); 983355810d1SShaohua Li r5l_write_super(log, ctx.pos); 984355810d1SShaohua Li } else { 985355810d1SShaohua Li log->log_start = ctx.pos; 986355810d1SShaohua Li log->seq = ctx.seq; 987355810d1SShaohua Li } 988f6bed0efSShaohua Li return 0; 989f6bed0efSShaohua Li } 990f6bed0efSShaohua Li 991f6bed0efSShaohua Li static void r5l_write_super(struct r5l_log *log, sector_t cp) 992f6bed0efSShaohua Li { 993f6bed0efSShaohua Li struct mddev *mddev = log->rdev->mddev; 994f6bed0efSShaohua Li 995f6bed0efSShaohua Li log->rdev->journal_tail = cp; 996f6bed0efSShaohua Li set_bit(MD_CHANGE_DEVS, &mddev->flags); 997f6bed0efSShaohua Li } 998f6bed0efSShaohua Li 999f6bed0efSShaohua Li static int r5l_load_log(struct r5l_log *log) 1000f6bed0efSShaohua Li { 1001f6bed0efSShaohua Li struct md_rdev *rdev = log->rdev; 1002f6bed0efSShaohua Li struct page *page; 1003f6bed0efSShaohua Li struct r5l_meta_block *mb; 1004f6bed0efSShaohua Li sector_t cp = log->rdev->journal_tail; 1005f6bed0efSShaohua Li u32 stored_crc, expected_crc; 1006f6bed0efSShaohua Li bool create_super = false; 1007f6bed0efSShaohua Li int ret; 1008f6bed0efSShaohua Li 1009f6bed0efSShaohua Li /* Make sure it's valid */ 1010f6bed0efSShaohua Li if (cp >= rdev->sectors || round_down(cp, BLOCK_SECTORS) != cp) 1011f6bed0efSShaohua Li cp = 0; 1012f6bed0efSShaohua Li page = alloc_page(GFP_KERNEL); 1013f6bed0efSShaohua Li if (!page) 1014f6bed0efSShaohua Li return -ENOMEM; 1015f6bed0efSShaohua Li 1016f6bed0efSShaohua Li if (!sync_page_io(rdev, cp, PAGE_SIZE, page, READ, false)) { 1017f6bed0efSShaohua Li ret = -EIO; 1018f6bed0efSShaohua Li goto ioerr; 1019f6bed0efSShaohua Li } 1020f6bed0efSShaohua Li mb = page_address(page); 1021f6bed0efSShaohua Li 1022f6bed0efSShaohua Li if (le32_to_cpu(mb->magic) != R5LOG_MAGIC || 1023f6bed0efSShaohua Li mb->version != R5LOG_VERSION) { 1024f6bed0efSShaohua Li create_super = true; 1025f6bed0efSShaohua Li goto create; 1026f6bed0efSShaohua Li } 1027f6bed0efSShaohua Li stored_crc = le32_to_cpu(mb->checksum); 1028f6bed0efSShaohua Li mb->checksum = 0; 10295cb2fbd6SShaohua Li expected_crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE); 1030f6bed0efSShaohua Li if (stored_crc != expected_crc) { 1031f6bed0efSShaohua Li create_super = true; 1032f6bed0efSShaohua Li goto create; 1033f6bed0efSShaohua Li } 1034f6bed0efSShaohua Li if (le64_to_cpu(mb->position) != cp) { 1035f6bed0efSShaohua Li create_super = true; 1036f6bed0efSShaohua Li goto create; 1037f6bed0efSShaohua Li } 1038f6bed0efSShaohua Li create: 1039f6bed0efSShaohua Li if (create_super) { 1040f6bed0efSShaohua Li log->last_cp_seq = prandom_u32(); 1041f6bed0efSShaohua Li cp = 0; 1042f6bed0efSShaohua Li /* 1043f6bed0efSShaohua Li * Make sure super points to correct address. Log might have 1044f6bed0efSShaohua Li * data very soon. If super hasn't correct log tail address, 1045f6bed0efSShaohua Li * recovery can't find the log 1046f6bed0efSShaohua Li */ 1047f6bed0efSShaohua Li r5l_write_super(log, cp); 1048f6bed0efSShaohua Li } else 1049f6bed0efSShaohua Li log->last_cp_seq = le64_to_cpu(mb->seq); 1050f6bed0efSShaohua Li 1051f6bed0efSShaohua Li log->device_size = round_down(rdev->sectors, BLOCK_SECTORS); 10520576b1c6SShaohua Li log->max_free_space = log->device_size >> RECLAIM_MAX_FREE_SPACE_SHIFT; 10530576b1c6SShaohua Li if (log->max_free_space > RECLAIM_MAX_FREE_SPACE) 10540576b1c6SShaohua Li log->max_free_space = RECLAIM_MAX_FREE_SPACE; 1055f6bed0efSShaohua Li log->last_checkpoint = cp; 1056f6bed0efSShaohua Li 1057f6bed0efSShaohua Li __free_page(page); 1058f6bed0efSShaohua Li 1059f6bed0efSShaohua Li return r5l_recovery_log(log); 1060f6bed0efSShaohua Li ioerr: 1061f6bed0efSShaohua Li __free_page(page); 1062f6bed0efSShaohua Li return ret; 1063f6bed0efSShaohua Li } 1064f6bed0efSShaohua Li 1065f6bed0efSShaohua Li int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) 1066f6bed0efSShaohua Li { 1067f6bed0efSShaohua Li struct r5l_log *log; 1068f6bed0efSShaohua Li 1069f6bed0efSShaohua Li if (PAGE_SIZE != 4096) 1070f6bed0efSShaohua Li return -EINVAL; 1071f6bed0efSShaohua Li log = kzalloc(sizeof(*log), GFP_KERNEL); 1072f6bed0efSShaohua Li if (!log) 1073f6bed0efSShaohua Li return -ENOMEM; 1074f6bed0efSShaohua Li log->rdev = rdev; 1075f6bed0efSShaohua Li 10765cb2fbd6SShaohua Li log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid, 1077f6bed0efSShaohua Li sizeof(rdev->mddev->uuid)); 1078f6bed0efSShaohua Li 1079f6bed0efSShaohua Li mutex_init(&log->io_mutex); 1080f6bed0efSShaohua Li 1081f6bed0efSShaohua Li spin_lock_init(&log->io_list_lock); 1082f6bed0efSShaohua Li INIT_LIST_HEAD(&log->running_ios); 10830576b1c6SShaohua Li INIT_LIST_HEAD(&log->io_end_ios); 10840576b1c6SShaohua Li INIT_LIST_HEAD(&log->stripe_end_ios); 1085a8c34f91SShaohua Li INIT_LIST_HEAD(&log->flushing_ios); 1086a8c34f91SShaohua Li INIT_LIST_HEAD(&log->flushed_ios); 1087a8c34f91SShaohua Li bio_init(&log->flush_bio); 1088f6bed0efSShaohua Li 1089f6bed0efSShaohua Li log->io_kc = KMEM_CACHE(r5l_io_unit, 0); 1090f6bed0efSShaohua Li if (!log->io_kc) 1091f6bed0efSShaohua Li goto io_kc; 1092f6bed0efSShaohua Li 10930576b1c6SShaohua Li log->reclaim_thread = md_register_thread(r5l_reclaim_thread, 10940576b1c6SShaohua Li log->rdev->mddev, "reclaim"); 10950576b1c6SShaohua Li if (!log->reclaim_thread) 10960576b1c6SShaohua Li goto reclaim_thread; 10970fd22b45SShaohua Li init_waitqueue_head(&log->iounit_wait); 10980576b1c6SShaohua Li 1099f6bed0efSShaohua Li INIT_LIST_HEAD(&log->no_space_stripes); 1100f6bed0efSShaohua Li spin_lock_init(&log->no_space_stripes_lock); 1101f6bed0efSShaohua Li 1102f6bed0efSShaohua Li if (r5l_load_log(log)) 1103f6bed0efSShaohua Li goto error; 1104f6bed0efSShaohua Li 1105f6bed0efSShaohua Li conf->log = log; 1106f6bed0efSShaohua Li return 0; 1107f6bed0efSShaohua Li error: 11080576b1c6SShaohua Li md_unregister_thread(&log->reclaim_thread); 11090576b1c6SShaohua Li reclaim_thread: 1110f6bed0efSShaohua Li kmem_cache_destroy(log->io_kc); 1111f6bed0efSShaohua Li io_kc: 1112f6bed0efSShaohua Li kfree(log); 1113f6bed0efSShaohua Li return -EINVAL; 1114f6bed0efSShaohua Li } 1115f6bed0efSShaohua Li 1116f6bed0efSShaohua Li void r5l_exit_log(struct r5l_log *log) 1117f6bed0efSShaohua Li { 11180576b1c6SShaohua Li /* 11190576b1c6SShaohua Li * at this point all stripes are finished, so io_unit is at least in 11200576b1c6SShaohua Li * STRIPE_END state 11210576b1c6SShaohua Li */ 11220576b1c6SShaohua Li r5l_wake_reclaim(log, -1L); 11230576b1c6SShaohua Li md_unregister_thread(&log->reclaim_thread); 11240576b1c6SShaohua Li r5l_do_reclaim(log); 11250576b1c6SShaohua Li /* 11260576b1c6SShaohua Li * force a super update, r5l_do_reclaim might updated the super. 11270576b1c6SShaohua Li * mddev->thread is already stopped 11280576b1c6SShaohua Li */ 11290576b1c6SShaohua Li md_update_sb(log->rdev->mddev, 1); 11300576b1c6SShaohua Li 1131f6bed0efSShaohua Li kmem_cache_destroy(log->io_kc); 1132f6bed0efSShaohua Li kfree(log); 1133f6bed0efSShaohua Li } 1134