raid5-cache.c (5c88f403a5d2bd75911c6faaacc9bea97ac7d121) | raid5-cache.c (3c6edc66085e1d895a698c572bbfaf4d57fdb771) |
---|---|
1/* 2 * Copyright (C) 2015 Shaohua Li <shli@fb.com> 3 * Copyright (C) 2016 Song Liu <songliubraving@fb.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * --- 1990 unchanged lines hidden (view full) --- 1999 * log will start here. but we can't let superblock point to last valid 2000 * meta block. The log might looks like: 2001 * | meta 1| meta 2| meta 3| 2002 * meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If 2003 * superblock points to meta 1, we write a new valid meta 2n. if crash 2004 * happens again, new recovery will start from meta 1. Since meta 2n is 2005 * valid now, recovery will think meta 3 is valid, which is wrong. 2006 * The solution is we create a new meta in meta2 with its seq == meta | 1/* 2 * Copyright (C) 2015 Shaohua Li <shli@fb.com> 3 * Copyright (C) 2016 Song Liu <songliubraving@fb.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * --- 1990 unchanged lines hidden (view full) --- 1999 * log will start here. but we can't let superblock point to last valid 2000 * meta block. The log might looks like: 2001 * | meta 1| meta 2| meta 3| 2002 * meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If 2003 * superblock points to meta 1, we write a new valid meta 2n. if crash 2004 * happens again, new recovery will start from meta 1. Since meta 2n is 2005 * valid now, recovery will think meta 3 is valid, which is wrong. 2006 * The solution is we create a new meta in meta2 with its seq == meta |
2007 * 1's seq + 10 and let superblock points to meta2. The same recovery will 2008 * not think meta 3 is a valid meta, because its seq doesn't match | 2007 * 1's seq + 10000 and let superblock points to meta2. The same recovery 2008 * will not think meta 3 is a valid meta, because its seq doesn't match |
2009 */ 2010 2011/* 2012 * Before recovery, the log looks like the following 2013 * 2014 * --------------------------------------------- 2015 * | valid log | invalid log | 2016 * --------------------------------------------- --- 13 unchanged lines hidden (view full) --- 2030 * From this point, we need to increase seq number by 10 to avoid 2031 * confusing next recovery. 2032 * 2033 * --------------------------------------------- 2034 * | valid log | invalid log | 2035 * --------------------------------------------- 2036 * ^ ^ 2037 * |- log->last_checkpoint |- ctx->pos+1 | 2009 */ 2010 2011/* 2012 * Before recovery, the log looks like the following 2013 * 2014 * --------------------------------------------- 2015 * | valid log | invalid log | 2016 * --------------------------------------------- --- 13 unchanged lines hidden (view full) --- 2030 * From this point, we need to increase seq number by 10 to avoid 2031 * confusing next recovery. 2032 * 2033 * --------------------------------------------- 2034 * | valid log | invalid log | 2035 * --------------------------------------------- 2036 * ^ ^ 2037 * |- log->last_checkpoint |- ctx->pos+1 |
2038 * |- log->last_cp_seq |- ctx->seq+11 | 2038 * |- log->last_cp_seq |- ctx->seq+10001 |
2039 * 2040 * However, it is not safe to start the state machine yet, because data only 2041 * parities are not yet secured in RAID. To save these data only parities, we 2042 * rewrite them from seq+11. 2043 * 2044 * ----------------------------------------------------------------- 2045 * | valid log | data only stripes | invalid log | 2046 * ----------------------------------------------------------------- 2047 * ^ ^ 2048 * |- log->last_checkpoint |- ctx->pos+n | 2039 * 2040 * However, it is not safe to start the state machine yet, because data only 2041 * parities are not yet secured in RAID. To save these data only parities, we 2042 * rewrite them from seq+11. 2043 * 2044 * ----------------------------------------------------------------- 2045 * | valid log | data only stripes | invalid log | 2046 * ----------------------------------------------------------------- 2047 * ^ ^ 2048 * |- log->last_checkpoint |- ctx->pos+n |
2049 * |- log->last_cp_seq |- ctx->seq+10+n | 2049 * |- log->last_cp_seq |- ctx->seq+10000+n |
2050 * 2051 * If failure happens again during this process, the recovery can safe start 2052 * again from log->last_checkpoint. 2053 * 2054 * Once data only stripes are rewritten to journal, we move log_tail 2055 * 2056 * ----------------------------------------------------------------- 2057 * | old log | data only stripes | invalid log | 2058 * ----------------------------------------------------------------- 2059 * ^ ^ 2060 * |- log->last_checkpoint |- ctx->pos+n | 2050 * 2051 * If failure happens again during this process, the recovery can safe start 2052 * again from log->last_checkpoint. 2053 * 2054 * Once data only stripes are rewritten to journal, we move log_tail 2055 * 2056 * ----------------------------------------------------------------- 2057 * | old log | data only stripes | invalid log | 2058 * ----------------------------------------------------------------- 2059 * ^ ^ 2060 * |- log->last_checkpoint |- ctx->pos+n |
2061 * |- log->last_cp_seq |- ctx->seq+10+n | 2061 * |- log->last_cp_seq |- ctx->seq+10000+n |
2062 * 2063 * Then we can safely start the state machine. If failure happens from this 2064 * point on, the recovery will start from new log->last_checkpoint. 2065 */ 2066static int 2067r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, 2068 struct r5l_recovery_ctx *ctx) 2069{ --- 82 unchanged lines hidden (view full) --- 2152 return -ENOMEM; 2153 2154 ret = r5c_recovery_flush_log(log, &ctx); 2155 __free_page(ctx.meta_page); 2156 2157 if (ret) 2158 return ret; 2159 | 2062 * 2063 * Then we can safely start the state machine. If failure happens from this 2064 * point on, the recovery will start from new log->last_checkpoint. 2065 */ 2066static int 2067r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, 2068 struct r5l_recovery_ctx *ctx) 2069{ --- 82 unchanged lines hidden (view full) --- 2152 return -ENOMEM; 2153 2154 ret = r5c_recovery_flush_log(log, &ctx); 2155 __free_page(ctx.meta_page); 2156 2157 if (ret) 2158 return ret; 2159 |
2160 pos = ctx.pos; 2161 ctx.seq += 10; | 2160 pos = ctx.pos; 2161 ctx.seq += 10000; |
2162 2163 if (ctx.data_only_stripes == 0) { 2164 log->next_checkpoint = ctx.pos; 2165 r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); 2166 ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); 2167 } else { 2168 sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru); 2169 log->next_checkpoint = sh->log_start; --- 503 unchanged lines hidden --- | 2162 2163 if (ctx.data_only_stripes == 0) { 2164 log->next_checkpoint = ctx.pos; 2165 r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); 2166 ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); 2167 } else { 2168 sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru); 2169 log->next_checkpoint = sh->log_start; --- 503 unchanged lines hidden --- |