raid5-cache.c (5c88f403a5d2bd75911c6faaacc9bea97ac7d121) raid5-cache.c (3c6edc66085e1d895a698c572bbfaf4d57fdb771)
1/*
2 * Copyright (C) 2015 Shaohua Li <shli@fb.com>
3 * Copyright (C) 2016 Song Liu <songliubraving@fb.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *

--- 1990 unchanged lines hidden (view full) ---

1999 * log will start here. but we can't let superblock point to last valid
2000 * meta block. The log might looks like:
2001 * | meta 1| meta 2| meta 3|
2002 * meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If
2003 * superblock points to meta 1, we write a new valid meta 2n. if crash
2004 * happens again, new recovery will start from meta 1. Since meta 2n is
2005 * valid now, recovery will think meta 3 is valid, which is wrong.
2006 * The solution is we create a new meta in meta2 with its seq == meta
1/*
2 * Copyright (C) 2015 Shaohua Li <shli@fb.com>
3 * Copyright (C) 2016 Song Liu <songliubraving@fb.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *

--- 1990 unchanged lines hidden (view full) ---

1999 * log will start here. but we can't let superblock point to last valid
2000 * meta block. The log might looks like:
2001 * | meta 1| meta 2| meta 3|
2002 * meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If
2003 * superblock points to meta 1, we write a new valid meta 2n. if crash
2004 * happens again, new recovery will start from meta 1. Since meta 2n is
2005 * valid now, recovery will think meta 3 is valid, which is wrong.
2006 * The solution is we create a new meta in meta2 with its seq == meta
2007 * 1's seq + 10 and let superblock points to meta2. The same recovery will
2008 * not think meta 3 is a valid meta, because its seq doesn't match
2007 * 1's seq + 10000 and let superblock points to meta2. The same recovery
2008 * will not think meta 3 is a valid meta, because its seq doesn't match
2009 */
2010
2011/*
2012 * Before recovery, the log looks like the following
2013 *
2014 * ---------------------------------------------
2015 * | valid log | invalid log |
2016 * ---------------------------------------------

--- 13 unchanged lines hidden (view full) ---

2030 * From this point, we need to increase seq number by 10 to avoid
2031 * confusing next recovery.
2032 *
2033 * ---------------------------------------------
2034 * | valid log | invalid log |
2035 * ---------------------------------------------
2036 * ^ ^
2037 * |- log->last_checkpoint |- ctx->pos+1
2009 */
2010
2011/*
2012 * Before recovery, the log looks like the following
2013 *
2014 * ---------------------------------------------
2015 * | valid log | invalid log |
2016 * ---------------------------------------------

--- 13 unchanged lines hidden (view full) ---

2030 * From this point, we need to increase seq number by 10 to avoid
2031 * confusing next recovery.
2032 *
2033 * ---------------------------------------------
2034 * | valid log | invalid log |
2035 * ---------------------------------------------
2036 * ^ ^
2037 * |- log->last_checkpoint |- ctx->pos+1
2038 * |- log->last_cp_seq |- ctx->seq+11
2038 * |- log->last_cp_seq |- ctx->seq+10001
2039 *
2040 * However, it is not safe to start the state machine yet, because data only
2041 * parities are not yet secured in RAID. To save these data only parities, we
2042 * rewrite them from seq+11.
2043 *
2044 * -----------------------------------------------------------------
2045 * | valid log | data only stripes | invalid log |
2046 * -----------------------------------------------------------------
2047 * ^ ^
2048 * |- log->last_checkpoint |- ctx->pos+n
2039 *
2040 * However, it is not safe to start the state machine yet, because data only
2041 * parities are not yet secured in RAID. To save these data only parities, we
2042 * rewrite them from seq+11.
2043 *
2044 * -----------------------------------------------------------------
2045 * | valid log | data only stripes | invalid log |
2046 * -----------------------------------------------------------------
2047 * ^ ^
2048 * |- log->last_checkpoint |- ctx->pos+n
2049 * |- log->last_cp_seq |- ctx->seq+10+n
2049 * |- log->last_cp_seq |- ctx->seq+10000+n
2050 *
2051 * If failure happens again during this process, the recovery can safe start
2052 * again from log->last_checkpoint.
2053 *
2054 * Once data only stripes are rewritten to journal, we move log_tail
2055 *
2056 * -----------------------------------------------------------------
2057 * | old log | data only stripes | invalid log |
2058 * -----------------------------------------------------------------
2059 * ^ ^
2060 * |- log->last_checkpoint |- ctx->pos+n
2050 *
2051 * If failure happens again during this process, the recovery can safe start
2052 * again from log->last_checkpoint.
2053 *
2054 * Once data only stripes are rewritten to journal, we move log_tail
2055 *
2056 * -----------------------------------------------------------------
2057 * | old log | data only stripes | invalid log |
2058 * -----------------------------------------------------------------
2059 * ^ ^
2060 * |- log->last_checkpoint |- ctx->pos+n
2061 * |- log->last_cp_seq |- ctx->seq+10+n
2061 * |- log->last_cp_seq |- ctx->seq+10000+n
2062 *
2063 * Then we can safely start the state machine. If failure happens from this
2064 * point on, the recovery will start from new log->last_checkpoint.
2065 */
2066static int
2067r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2068 struct r5l_recovery_ctx *ctx)
2069{

--- 82 unchanged lines hidden (view full) ---

2152 return -ENOMEM;
2153
2154 ret = r5c_recovery_flush_log(log, &ctx);
2155 __free_page(ctx.meta_page);
2156
2157 if (ret)
2158 return ret;
2159
2062 *
2063 * Then we can safely start the state machine. If failure happens from this
2064 * point on, the recovery will start from new log->last_checkpoint.
2065 */
2066static int
2067r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
2068 struct r5l_recovery_ctx *ctx)
2069{

--- 82 unchanged lines hidden (view full) ---

2152 return -ENOMEM;
2153
2154 ret = r5c_recovery_flush_log(log, &ctx);
2155 __free_page(ctx.meta_page);
2156
2157 if (ret)
2158 return ret;
2159
2160 pos = ctx.pos;
2161 ctx.seq += 10;
2160 pos = ctx.pos;
2161 ctx.seq += 10000;
2162
2163 if (ctx.data_only_stripes == 0) {
2164 log->next_checkpoint = ctx.pos;
2165 r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
2166 ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
2167 } else {
2168 sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru);
2169 log->next_checkpoint = sh->log_start;

--- 503 unchanged lines hidden ---
2162
2163 if (ctx.data_only_stripes == 0) {
2164 log->next_checkpoint = ctx.pos;
2165 r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
2166 ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
2167 } else {
2168 sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru);
2169 log->next_checkpoint = sh->log_start;

--- 503 unchanged lines hidden ---