148debafeSMikulas Patocka // SPDX-License-Identifier: GPL-2.0 248debafeSMikulas Patocka /* 348debafeSMikulas Patocka * Copyright (C) 2018 Red Hat. All rights reserved. 448debafeSMikulas Patocka * 548debafeSMikulas Patocka * This file is released under the GPL. 648debafeSMikulas Patocka */ 748debafeSMikulas Patocka 848debafeSMikulas Patocka #include <linux/device-mapper.h> 948debafeSMikulas Patocka #include <linux/module.h> 1048debafeSMikulas Patocka #include <linux/init.h> 1148debafeSMikulas Patocka #include <linux/vmalloc.h> 1248debafeSMikulas Patocka #include <linux/kthread.h> 1348debafeSMikulas Patocka #include <linux/dm-io.h> 1448debafeSMikulas Patocka #include <linux/dm-kcopyd.h> 1548debafeSMikulas Patocka #include <linux/dax.h> 1648debafeSMikulas Patocka #include <linux/pfn_t.h> 1748debafeSMikulas Patocka #include <linux/libnvdimm.h> 1848debafeSMikulas Patocka 1948debafeSMikulas Patocka #define DM_MSG_PREFIX "writecache" 2048debafeSMikulas Patocka 2148debafeSMikulas Patocka #define HIGH_WATERMARK 50 2248debafeSMikulas Patocka #define LOW_WATERMARK 45 2348debafeSMikulas Patocka #define MAX_WRITEBACK_JOBS 0 2448debafeSMikulas Patocka #define ENDIO_LATENCY 16 2548debafeSMikulas Patocka #define WRITEBACK_LATENCY 64 2648debafeSMikulas Patocka #define AUTOCOMMIT_BLOCKS_SSD 65536 2748debafeSMikulas Patocka #define AUTOCOMMIT_BLOCKS_PMEM 64 2848debafeSMikulas Patocka #define AUTOCOMMIT_MSEC 1000 293923d485SMikulas Patocka #define MAX_AGE_DIV 16 303923d485SMikulas Patocka #define MAX_AGE_UNSPECIFIED -1UL 3148debafeSMikulas Patocka 3248debafeSMikulas Patocka #define BITMAP_GRANULARITY 65536 3348debafeSMikulas Patocka #if BITMAP_GRANULARITY < PAGE_SIZE 3448debafeSMikulas Patocka #undef BITMAP_GRANULARITY 3548debafeSMikulas Patocka #define BITMAP_GRANULARITY PAGE_SIZE 3648debafeSMikulas Patocka #endif 3748debafeSMikulas Patocka 3848debafeSMikulas Patocka #if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER) 3948debafeSMikulas Patocka #define DM_WRITECACHE_HAS_PMEM 4048debafeSMikulas Patocka #endif 4148debafeSMikulas Patocka 4248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 4348debafeSMikulas Patocka #define pmem_assign(dest, src) \ 4448debafeSMikulas Patocka do { \ 4548debafeSMikulas Patocka typeof(dest) uniq = (src); \ 4648debafeSMikulas Patocka memcpy_flushcache(&(dest), &uniq, sizeof(dest)); \ 4748debafeSMikulas Patocka } while (0) 4848debafeSMikulas Patocka #else 4948debafeSMikulas Patocka #define pmem_assign(dest, src) ((dest) = (src)) 5048debafeSMikulas Patocka #endif 5148debafeSMikulas Patocka 5248debafeSMikulas Patocka #if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM) 5348debafeSMikulas Patocka #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 5448debafeSMikulas Patocka #endif 5548debafeSMikulas Patocka 5648debafeSMikulas Patocka #define MEMORY_SUPERBLOCK_MAGIC 0x23489321 5748debafeSMikulas Patocka #define MEMORY_SUPERBLOCK_VERSION 1 5848debafeSMikulas Patocka 5948debafeSMikulas Patocka struct wc_memory_entry { 6048debafeSMikulas Patocka __le64 original_sector; 6148debafeSMikulas Patocka __le64 seq_count; 6248debafeSMikulas Patocka }; 6348debafeSMikulas Patocka 6448debafeSMikulas Patocka struct wc_memory_superblock { 6548debafeSMikulas Patocka union { 6648debafeSMikulas Patocka struct { 6748debafeSMikulas Patocka __le32 magic; 6848debafeSMikulas Patocka __le32 version; 6948debafeSMikulas Patocka __le32 block_size; 7048debafeSMikulas Patocka __le32 pad; 7148debafeSMikulas Patocka __le64 n_blocks; 7248debafeSMikulas Patocka __le64 seq_count; 7348debafeSMikulas Patocka }; 7448debafeSMikulas Patocka __le64 padding[8]; 7548debafeSMikulas Patocka }; 7648debafeSMikulas Patocka struct wc_memory_entry entries[0]; 7748debafeSMikulas Patocka }; 7848debafeSMikulas Patocka 7948debafeSMikulas Patocka struct wc_entry { 8048debafeSMikulas Patocka struct rb_node rb_node; 8148debafeSMikulas Patocka struct list_head lru; 8248debafeSMikulas Patocka unsigned short wc_list_contiguous; 8348debafeSMikulas Patocka bool write_in_progress 8448debafeSMikulas Patocka #if BITS_PER_LONG == 64 8548debafeSMikulas Patocka :1 8648debafeSMikulas Patocka #endif 8748debafeSMikulas Patocka ; 8848debafeSMikulas Patocka unsigned long index 8948debafeSMikulas Patocka #if BITS_PER_LONG == 64 9048debafeSMikulas Patocka :47 9148debafeSMikulas Patocka #endif 9248debafeSMikulas Patocka ; 933923d485SMikulas Patocka unsigned long age; 9448debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 9548debafeSMikulas Patocka uint64_t original_sector; 9648debafeSMikulas Patocka uint64_t seq_count; 9748debafeSMikulas Patocka #endif 9848debafeSMikulas Patocka }; 9948debafeSMikulas Patocka 10048debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 10148debafeSMikulas Patocka #define WC_MODE_PMEM(wc) ((wc)->pmem_mode) 10248debafeSMikulas Patocka #define WC_MODE_FUA(wc) ((wc)->writeback_fua) 10348debafeSMikulas Patocka #else 10448debafeSMikulas Patocka #define WC_MODE_PMEM(wc) false 10548debafeSMikulas Patocka #define WC_MODE_FUA(wc) false 10648debafeSMikulas Patocka #endif 10748debafeSMikulas Patocka #define WC_MODE_SORT_FREELIST(wc) (!WC_MODE_PMEM(wc)) 10848debafeSMikulas Patocka 10948debafeSMikulas Patocka struct dm_writecache { 11048debafeSMikulas Patocka struct mutex lock; 11148debafeSMikulas Patocka struct list_head lru; 11248debafeSMikulas Patocka union { 11348debafeSMikulas Patocka struct list_head freelist; 11448debafeSMikulas Patocka struct { 11548debafeSMikulas Patocka struct rb_root freetree; 11648debafeSMikulas Patocka struct wc_entry *current_free; 11748debafeSMikulas Patocka }; 11848debafeSMikulas Patocka }; 11948debafeSMikulas Patocka struct rb_root tree; 12048debafeSMikulas Patocka 12148debafeSMikulas Patocka size_t freelist_size; 12248debafeSMikulas Patocka size_t writeback_size; 12348debafeSMikulas Patocka size_t freelist_high_watermark; 12448debafeSMikulas Patocka size_t freelist_low_watermark; 1253923d485SMikulas Patocka unsigned long max_age; 12648debafeSMikulas Patocka 12748debafeSMikulas Patocka unsigned uncommitted_blocks; 12848debafeSMikulas Patocka unsigned autocommit_blocks; 12948debafeSMikulas Patocka unsigned max_writeback_jobs; 13048debafeSMikulas Patocka 13148debafeSMikulas Patocka int error; 13248debafeSMikulas Patocka 13348debafeSMikulas Patocka unsigned long autocommit_jiffies; 13448debafeSMikulas Patocka struct timer_list autocommit_timer; 13548debafeSMikulas Patocka struct wait_queue_head freelist_wait; 13648debafeSMikulas Patocka 1373923d485SMikulas Patocka struct timer_list max_age_timer; 1383923d485SMikulas Patocka 13948debafeSMikulas Patocka atomic_t bio_in_progress[2]; 14048debafeSMikulas Patocka struct wait_queue_head bio_in_progress_wait[2]; 14148debafeSMikulas Patocka 14248debafeSMikulas Patocka struct dm_target *ti; 14348debafeSMikulas Patocka struct dm_dev *dev; 14448debafeSMikulas Patocka struct dm_dev *ssd_dev; 145d284f824SMikulas Patocka sector_t start_sector; 14648debafeSMikulas Patocka void *memory_map; 14748debafeSMikulas Patocka uint64_t memory_map_size; 14848debafeSMikulas Patocka size_t metadata_sectors; 14948debafeSMikulas Patocka size_t n_blocks; 15048debafeSMikulas Patocka uint64_t seq_count; 15148debafeSMikulas Patocka void *block_start; 15248debafeSMikulas Patocka struct wc_entry *entries; 15348debafeSMikulas Patocka unsigned block_size; 15448debafeSMikulas Patocka unsigned char block_size_bits; 15548debafeSMikulas Patocka 15648debafeSMikulas Patocka bool pmem_mode:1; 15748debafeSMikulas Patocka bool writeback_fua:1; 15848debafeSMikulas Patocka 15948debafeSMikulas Patocka bool overwrote_committed:1; 16048debafeSMikulas Patocka bool memory_vmapped:1; 16148debafeSMikulas Patocka 16248debafeSMikulas Patocka bool high_wm_percent_set:1; 16348debafeSMikulas Patocka bool low_wm_percent_set:1; 16448debafeSMikulas Patocka bool max_writeback_jobs_set:1; 16548debafeSMikulas Patocka bool autocommit_blocks_set:1; 16648debafeSMikulas Patocka bool autocommit_time_set:1; 16748debafeSMikulas Patocka bool writeback_fua_set:1; 16848debafeSMikulas Patocka bool flush_on_suspend:1; 16993de44ebSMikulas Patocka bool cleaner:1; 17048debafeSMikulas Patocka 17148debafeSMikulas Patocka unsigned writeback_all; 17248debafeSMikulas Patocka struct workqueue_struct *writeback_wq; 17348debafeSMikulas Patocka struct work_struct writeback_work; 17448debafeSMikulas Patocka struct work_struct flush_work; 17548debafeSMikulas Patocka 17648debafeSMikulas Patocka struct dm_io_client *dm_io; 17748debafeSMikulas Patocka 17848debafeSMikulas Patocka raw_spinlock_t endio_list_lock; 17948debafeSMikulas Patocka struct list_head endio_list; 18048debafeSMikulas Patocka struct task_struct *endio_thread; 18148debafeSMikulas Patocka 18248debafeSMikulas Patocka struct task_struct *flush_thread; 18348debafeSMikulas Patocka struct bio_list flush_list; 18448debafeSMikulas Patocka 18548debafeSMikulas Patocka struct dm_kcopyd_client *dm_kcopyd; 18648debafeSMikulas Patocka unsigned long *dirty_bitmap; 18748debafeSMikulas Patocka unsigned dirty_bitmap_size; 18848debafeSMikulas Patocka 18948debafeSMikulas Patocka struct bio_set bio_set; 19048debafeSMikulas Patocka mempool_t copy_pool; 19148debafeSMikulas Patocka }; 19248debafeSMikulas Patocka 19348debafeSMikulas Patocka #define WB_LIST_INLINE 16 19448debafeSMikulas Patocka 19548debafeSMikulas Patocka struct writeback_struct { 19648debafeSMikulas Patocka struct list_head endio_entry; 19748debafeSMikulas Patocka struct dm_writecache *wc; 19848debafeSMikulas Patocka struct wc_entry **wc_list; 19948debafeSMikulas Patocka unsigned wc_list_n; 20048debafeSMikulas Patocka struct wc_entry *wc_list_inline[WB_LIST_INLINE]; 20148debafeSMikulas Patocka struct bio bio; 20248debafeSMikulas Patocka }; 20348debafeSMikulas Patocka 20448debafeSMikulas Patocka struct copy_struct { 20548debafeSMikulas Patocka struct list_head endio_entry; 20648debafeSMikulas Patocka struct dm_writecache *wc; 20748debafeSMikulas Patocka struct wc_entry *e; 20848debafeSMikulas Patocka unsigned n_entries; 20948debafeSMikulas Patocka int error; 21048debafeSMikulas Patocka }; 21148debafeSMikulas Patocka 21248debafeSMikulas Patocka DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle, 21348debafeSMikulas Patocka "A percentage of time allocated for data copying"); 21448debafeSMikulas Patocka 21548debafeSMikulas Patocka static void wc_lock(struct dm_writecache *wc) 21648debafeSMikulas Patocka { 21748debafeSMikulas Patocka mutex_lock(&wc->lock); 21848debafeSMikulas Patocka } 21948debafeSMikulas Patocka 22048debafeSMikulas Patocka static void wc_unlock(struct dm_writecache *wc) 22148debafeSMikulas Patocka { 22248debafeSMikulas Patocka mutex_unlock(&wc->lock); 22348debafeSMikulas Patocka } 22448debafeSMikulas Patocka 22548debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 22648debafeSMikulas Patocka static int persistent_memory_claim(struct dm_writecache *wc) 22748debafeSMikulas Patocka { 22848debafeSMikulas Patocka int r; 22948debafeSMikulas Patocka loff_t s; 23048debafeSMikulas Patocka long p, da; 23148debafeSMikulas Patocka pfn_t pfn; 23248debafeSMikulas Patocka int id; 23348debafeSMikulas Patocka struct page **pages; 23448debafeSMikulas Patocka 23548debafeSMikulas Patocka wc->memory_vmapped = false; 23648debafeSMikulas Patocka 23748debafeSMikulas Patocka if (!wc->ssd_dev->dax_dev) { 23848debafeSMikulas Patocka r = -EOPNOTSUPP; 23948debafeSMikulas Patocka goto err1; 24048debafeSMikulas Patocka } 24148debafeSMikulas Patocka s = wc->memory_map_size; 24248debafeSMikulas Patocka p = s >> PAGE_SHIFT; 24348debafeSMikulas Patocka if (!p) { 24448debafeSMikulas Patocka r = -EINVAL; 24548debafeSMikulas Patocka goto err1; 24648debafeSMikulas Patocka } 24748debafeSMikulas Patocka if (p != s >> PAGE_SHIFT) { 24848debafeSMikulas Patocka r = -EOVERFLOW; 24948debafeSMikulas Patocka goto err1; 25048debafeSMikulas Patocka } 25148debafeSMikulas Patocka 25248debafeSMikulas Patocka id = dax_read_lock(); 25348debafeSMikulas Patocka 25448debafeSMikulas Patocka da = dax_direct_access(wc->ssd_dev->dax_dev, 0, p, &wc->memory_map, &pfn); 25548debafeSMikulas Patocka if (da < 0) { 25648debafeSMikulas Patocka wc->memory_map = NULL; 25748debafeSMikulas Patocka r = da; 25848debafeSMikulas Patocka goto err2; 25948debafeSMikulas Patocka } 26048debafeSMikulas Patocka if (!pfn_t_has_page(pfn)) { 26148debafeSMikulas Patocka wc->memory_map = NULL; 26248debafeSMikulas Patocka r = -EOPNOTSUPP; 26348debafeSMikulas Patocka goto err2; 26448debafeSMikulas Patocka } 26548debafeSMikulas Patocka if (da != p) { 26648debafeSMikulas Patocka long i; 26748debafeSMikulas Patocka wc->memory_map = NULL; 26850a7d3baSKees Cook pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); 26948debafeSMikulas Patocka if (!pages) { 27048debafeSMikulas Patocka r = -ENOMEM; 27148debafeSMikulas Patocka goto err2; 27248debafeSMikulas Patocka } 27348debafeSMikulas Patocka i = 0; 27448debafeSMikulas Patocka do { 27548debafeSMikulas Patocka long daa; 27648debafeSMikulas Patocka daa = dax_direct_access(wc->ssd_dev->dax_dev, i, p - i, 277f742267aSHuaisheng Ye NULL, &pfn); 27848debafeSMikulas Patocka if (daa <= 0) { 27948debafeSMikulas Patocka r = daa ? daa : -EINVAL; 28048debafeSMikulas Patocka goto err3; 28148debafeSMikulas Patocka } 28248debafeSMikulas Patocka if (!pfn_t_has_page(pfn)) { 28348debafeSMikulas Patocka r = -EOPNOTSUPP; 28448debafeSMikulas Patocka goto err3; 28548debafeSMikulas Patocka } 28648debafeSMikulas Patocka while (daa-- && i < p) { 28748debafeSMikulas Patocka pages[i++] = pfn_t_to_page(pfn); 28848debafeSMikulas Patocka pfn.val++; 28948debafeSMikulas Patocka } 29048debafeSMikulas Patocka } while (i < p); 29148debafeSMikulas Patocka wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL); 29248debafeSMikulas Patocka if (!wc->memory_map) { 29348debafeSMikulas Patocka r = -ENOMEM; 29448debafeSMikulas Patocka goto err3; 29548debafeSMikulas Patocka } 29648debafeSMikulas Patocka kvfree(pages); 29748debafeSMikulas Patocka wc->memory_vmapped = true; 29848debafeSMikulas Patocka } 29948debafeSMikulas Patocka 30048debafeSMikulas Patocka dax_read_unlock(id); 301d284f824SMikulas Patocka 302d284f824SMikulas Patocka wc->memory_map += (size_t)wc->start_sector << SECTOR_SHIFT; 303d284f824SMikulas Patocka wc->memory_map_size -= (size_t)wc->start_sector << SECTOR_SHIFT; 304d284f824SMikulas Patocka 30548debafeSMikulas Patocka return 0; 30648debafeSMikulas Patocka err3: 30748debafeSMikulas Patocka kvfree(pages); 30848debafeSMikulas Patocka err2: 30948debafeSMikulas Patocka dax_read_unlock(id); 31048debafeSMikulas Patocka err1: 31148debafeSMikulas Patocka return r; 31248debafeSMikulas Patocka } 31348debafeSMikulas Patocka #else 31448debafeSMikulas Patocka static int persistent_memory_claim(struct dm_writecache *wc) 31548debafeSMikulas Patocka { 31648debafeSMikulas Patocka BUG(); 31748debafeSMikulas Patocka } 31848debafeSMikulas Patocka #endif 31948debafeSMikulas Patocka 32048debafeSMikulas Patocka static void persistent_memory_release(struct dm_writecache *wc) 32148debafeSMikulas Patocka { 32248debafeSMikulas Patocka if (wc->memory_vmapped) 323d284f824SMikulas Patocka vunmap(wc->memory_map - ((size_t)wc->start_sector << SECTOR_SHIFT)); 32448debafeSMikulas Patocka } 32548debafeSMikulas Patocka 32648debafeSMikulas Patocka static struct page *persistent_memory_page(void *addr) 32748debafeSMikulas Patocka { 32848debafeSMikulas Patocka if (is_vmalloc_addr(addr)) 32948debafeSMikulas Patocka return vmalloc_to_page(addr); 33048debafeSMikulas Patocka else 33148debafeSMikulas Patocka return virt_to_page(addr); 33248debafeSMikulas Patocka } 33348debafeSMikulas Patocka 33448debafeSMikulas Patocka static unsigned persistent_memory_page_offset(void *addr) 33548debafeSMikulas Patocka { 33648debafeSMikulas Patocka return (unsigned long)addr & (PAGE_SIZE - 1); 33748debafeSMikulas Patocka } 33848debafeSMikulas Patocka 33948debafeSMikulas Patocka static void persistent_memory_flush_cache(void *ptr, size_t size) 34048debafeSMikulas Patocka { 34148debafeSMikulas Patocka if (is_vmalloc_addr(ptr)) 34248debafeSMikulas Patocka flush_kernel_vmap_range(ptr, size); 34348debafeSMikulas Patocka } 34448debafeSMikulas Patocka 34548debafeSMikulas Patocka static void persistent_memory_invalidate_cache(void *ptr, size_t size) 34648debafeSMikulas Patocka { 34748debafeSMikulas Patocka if (is_vmalloc_addr(ptr)) 34848debafeSMikulas Patocka invalidate_kernel_vmap_range(ptr, size); 34948debafeSMikulas Patocka } 35048debafeSMikulas Patocka 35148debafeSMikulas Patocka static struct wc_memory_superblock *sb(struct dm_writecache *wc) 35248debafeSMikulas Patocka { 35348debafeSMikulas Patocka return wc->memory_map; 35448debafeSMikulas Patocka } 35548debafeSMikulas Patocka 35648debafeSMikulas Patocka static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e) 35748debafeSMikulas Patocka { 35848debafeSMikulas Patocka return &sb(wc)->entries[e->index]; 35948debafeSMikulas Patocka } 36048debafeSMikulas Patocka 36148debafeSMikulas Patocka static void *memory_data(struct dm_writecache *wc, struct wc_entry *e) 36248debafeSMikulas Patocka { 36348debafeSMikulas Patocka return (char *)wc->block_start + (e->index << wc->block_size_bits); 36448debafeSMikulas Patocka } 36548debafeSMikulas Patocka 36648debafeSMikulas Patocka static sector_t cache_sector(struct dm_writecache *wc, struct wc_entry *e) 36748debafeSMikulas Patocka { 368d284f824SMikulas Patocka return wc->start_sector + wc->metadata_sectors + 36948debafeSMikulas Patocka ((sector_t)e->index << (wc->block_size_bits - SECTOR_SHIFT)); 37048debafeSMikulas Patocka } 37148debafeSMikulas Patocka 37248debafeSMikulas Patocka static uint64_t read_original_sector(struct dm_writecache *wc, struct wc_entry *e) 37348debafeSMikulas Patocka { 37448debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 37548debafeSMikulas Patocka return e->original_sector; 37648debafeSMikulas Patocka #else 37748debafeSMikulas Patocka return le64_to_cpu(memory_entry(wc, e)->original_sector); 37848debafeSMikulas Patocka #endif 37948debafeSMikulas Patocka } 38048debafeSMikulas Patocka 38148debafeSMikulas Patocka static uint64_t read_seq_count(struct dm_writecache *wc, struct wc_entry *e) 38248debafeSMikulas Patocka { 38348debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 38448debafeSMikulas Patocka return e->seq_count; 38548debafeSMikulas Patocka #else 38648debafeSMikulas Patocka return le64_to_cpu(memory_entry(wc, e)->seq_count); 38748debafeSMikulas Patocka #endif 38848debafeSMikulas Patocka } 38948debafeSMikulas Patocka 39048debafeSMikulas Patocka static void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e) 39148debafeSMikulas Patocka { 39248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 39348debafeSMikulas Patocka e->seq_count = -1; 39448debafeSMikulas Patocka #endif 39548debafeSMikulas Patocka pmem_assign(memory_entry(wc, e)->seq_count, cpu_to_le64(-1)); 39648debafeSMikulas Patocka } 39748debafeSMikulas Patocka 39848debafeSMikulas Patocka static void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e, 39948debafeSMikulas Patocka uint64_t original_sector, uint64_t seq_count) 40048debafeSMikulas Patocka { 40148debafeSMikulas Patocka struct wc_memory_entry me; 40248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 40348debafeSMikulas Patocka e->original_sector = original_sector; 40448debafeSMikulas Patocka e->seq_count = seq_count; 40548debafeSMikulas Patocka #endif 40648debafeSMikulas Patocka me.original_sector = cpu_to_le64(original_sector); 40748debafeSMikulas Patocka me.seq_count = cpu_to_le64(seq_count); 40848debafeSMikulas Patocka pmem_assign(*memory_entry(wc, e), me); 40948debafeSMikulas Patocka } 41048debafeSMikulas Patocka 41148debafeSMikulas Patocka #define writecache_error(wc, err, msg, arg...) \ 41248debafeSMikulas Patocka do { \ 41348debafeSMikulas Patocka if (!cmpxchg(&(wc)->error, 0, err)) \ 41448debafeSMikulas Patocka DMERR(msg, ##arg); \ 41548debafeSMikulas Patocka wake_up(&(wc)->freelist_wait); \ 41648debafeSMikulas Patocka } while (0) 41748debafeSMikulas Patocka 41848debafeSMikulas Patocka #define writecache_has_error(wc) (unlikely(READ_ONCE((wc)->error))) 41948debafeSMikulas Patocka 42048debafeSMikulas Patocka static void writecache_flush_all_metadata(struct dm_writecache *wc) 42148debafeSMikulas Patocka { 42248debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) 42348debafeSMikulas Patocka memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size); 42448debafeSMikulas Patocka } 42548debafeSMikulas Patocka 42648debafeSMikulas Patocka static void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size) 42748debafeSMikulas Patocka { 42848debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) 42948debafeSMikulas Patocka __set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY, 43048debafeSMikulas Patocka wc->dirty_bitmap); 43148debafeSMikulas Patocka } 43248debafeSMikulas Patocka 43348debafeSMikulas Patocka static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev); 43448debafeSMikulas Patocka 43548debafeSMikulas Patocka struct io_notify { 43648debafeSMikulas Patocka struct dm_writecache *wc; 43748debafeSMikulas Patocka struct completion c; 43848debafeSMikulas Patocka atomic_t count; 43948debafeSMikulas Patocka }; 44048debafeSMikulas Patocka 44148debafeSMikulas Patocka static void writecache_notify_io(unsigned long error, void *context) 44248debafeSMikulas Patocka { 44348debafeSMikulas Patocka struct io_notify *endio = context; 44448debafeSMikulas Patocka 44548debafeSMikulas Patocka if (unlikely(error != 0)) 44648debafeSMikulas Patocka writecache_error(endio->wc, -EIO, "error writing metadata"); 44748debafeSMikulas Patocka BUG_ON(atomic_read(&endio->count) <= 0); 44848debafeSMikulas Patocka if (atomic_dec_and_test(&endio->count)) 44948debafeSMikulas Patocka complete(&endio->c); 45048debafeSMikulas Patocka } 45148debafeSMikulas Patocka 452aa950920SMikulas Patocka static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) 453aa950920SMikulas Patocka { 454aa950920SMikulas Patocka wait_event(wc->bio_in_progress_wait[direction], 455aa950920SMikulas Patocka !atomic_read(&wc->bio_in_progress[direction])); 456aa950920SMikulas Patocka } 457aa950920SMikulas Patocka 458aa950920SMikulas Patocka static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 45948debafeSMikulas Patocka { 46048debafeSMikulas Patocka struct dm_io_region region; 46148debafeSMikulas Patocka struct dm_io_request req; 46248debafeSMikulas Patocka struct io_notify endio = { 46348debafeSMikulas Patocka wc, 46448debafeSMikulas Patocka COMPLETION_INITIALIZER_ONSTACK(endio.c), 46548debafeSMikulas Patocka ATOMIC_INIT(1), 46648debafeSMikulas Patocka }; 4671e1132eaSMikulas Patocka unsigned bitmap_bits = wc->dirty_bitmap_size * 8; 46848debafeSMikulas Patocka unsigned i = 0; 46948debafeSMikulas Patocka 47048debafeSMikulas Patocka while (1) { 47148debafeSMikulas Patocka unsigned j; 47248debafeSMikulas Patocka i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i); 47348debafeSMikulas Patocka if (unlikely(i == bitmap_bits)) 47448debafeSMikulas Patocka break; 47548debafeSMikulas Patocka j = find_next_zero_bit(wc->dirty_bitmap, bitmap_bits, i); 47648debafeSMikulas Patocka 47748debafeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 47848debafeSMikulas Patocka region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 47948debafeSMikulas Patocka region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 48048debafeSMikulas Patocka 48148debafeSMikulas Patocka if (unlikely(region.sector >= wc->metadata_sectors)) 48248debafeSMikulas Patocka break; 48348debafeSMikulas Patocka if (unlikely(region.sector + region.count > wc->metadata_sectors)) 48448debafeSMikulas Patocka region.count = wc->metadata_sectors - region.sector; 48548debafeSMikulas Patocka 486d284f824SMikulas Patocka region.sector += wc->start_sector; 48748debafeSMikulas Patocka atomic_inc(&endio.count); 48848debafeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 48948debafeSMikulas Patocka req.bi_op_flags = REQ_SYNC; 49048debafeSMikulas Patocka req.mem.type = DM_IO_VMA; 49148debafeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY; 49248debafeSMikulas Patocka req.client = wc->dm_io; 49348debafeSMikulas Patocka req.notify.fn = writecache_notify_io; 49448debafeSMikulas Patocka req.notify.context = &endio; 49548debafeSMikulas Patocka 49648debafeSMikulas Patocka /* writing via async dm-io (implied by notify.fn above) won't return an error */ 49748debafeSMikulas Patocka (void) dm_io(&req, 1, ®ion, NULL); 49848debafeSMikulas Patocka i = j; 49948debafeSMikulas Patocka } 50048debafeSMikulas Patocka 50148debafeSMikulas Patocka writecache_notify_io(0, &endio); 50248debafeSMikulas Patocka wait_for_completion_io(&endio.c); 50348debafeSMikulas Patocka 504aa950920SMikulas Patocka if (wait_for_ios) 505aa950920SMikulas Patocka writecache_wait_for_ios(wc, WRITE); 506aa950920SMikulas Patocka 50748debafeSMikulas Patocka writecache_disk_flush(wc, wc->ssd_dev); 50848debafeSMikulas Patocka 50948debafeSMikulas Patocka memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); 51048debafeSMikulas Patocka } 51148debafeSMikulas Patocka 512dc8a01aeSMikulas Patocka static void ssd_commit_superblock(struct dm_writecache *wc) 513dc8a01aeSMikulas Patocka { 514dc8a01aeSMikulas Patocka int r; 515dc8a01aeSMikulas Patocka struct dm_io_region region; 516dc8a01aeSMikulas Patocka struct dm_io_request req; 517dc8a01aeSMikulas Patocka 518dc8a01aeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 519dc8a01aeSMikulas Patocka region.sector = 0; 520dc8a01aeSMikulas Patocka region.count = PAGE_SIZE; 521dc8a01aeSMikulas Patocka 522dc8a01aeSMikulas Patocka if (unlikely(region.sector + region.count > wc->metadata_sectors)) 523dc8a01aeSMikulas Patocka region.count = wc->metadata_sectors - region.sector; 524dc8a01aeSMikulas Patocka 525dc8a01aeSMikulas Patocka region.sector += wc->start_sector; 526dc8a01aeSMikulas Patocka 527dc8a01aeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 528dc8a01aeSMikulas Patocka req.bi_op_flags = REQ_SYNC | REQ_FUA; 529dc8a01aeSMikulas Patocka req.mem.type = DM_IO_VMA; 530dc8a01aeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map; 531dc8a01aeSMikulas Patocka req.client = wc->dm_io; 532dc8a01aeSMikulas Patocka req.notify.fn = NULL; 533dc8a01aeSMikulas Patocka req.notify.context = NULL; 534dc8a01aeSMikulas Patocka 535dc8a01aeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 536dc8a01aeSMikulas Patocka if (unlikely(r)) 537dc8a01aeSMikulas Patocka writecache_error(wc, r, "error writing superblock"); 538dc8a01aeSMikulas Patocka } 539dc8a01aeSMikulas Patocka 540aa950920SMikulas Patocka static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 54148debafeSMikulas Patocka { 54248debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 54348debafeSMikulas Patocka wmb(); 54448debafeSMikulas Patocka else 545aa950920SMikulas Patocka ssd_commit_flushed(wc, wait_for_ios); 54648debafeSMikulas Patocka } 54748debafeSMikulas Patocka 54848debafeSMikulas Patocka static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) 54948debafeSMikulas Patocka { 55048debafeSMikulas Patocka int r; 55148debafeSMikulas Patocka struct dm_io_region region; 55248debafeSMikulas Patocka struct dm_io_request req; 55348debafeSMikulas Patocka 55448debafeSMikulas Patocka region.bdev = dev->bdev; 55548debafeSMikulas Patocka region.sector = 0; 55648debafeSMikulas Patocka region.count = 0; 55748debafeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 55848debafeSMikulas Patocka req.bi_op_flags = REQ_PREFLUSH; 55948debafeSMikulas Patocka req.mem.type = DM_IO_KMEM; 56048debafeSMikulas Patocka req.mem.ptr.addr = NULL; 56148debafeSMikulas Patocka req.client = wc->dm_io; 56248debafeSMikulas Patocka req.notify.fn = NULL; 56348debafeSMikulas Patocka 56448debafeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 56548debafeSMikulas Patocka if (unlikely(r)) 56648debafeSMikulas Patocka writecache_error(wc, r, "error flushing metadata: %d", r); 56748debafeSMikulas Patocka } 56848debafeSMikulas Patocka 56948debafeSMikulas Patocka #define WFE_RETURN_FOLLOWING 1 57048debafeSMikulas Patocka #define WFE_LOWEST_SEQ 2 57148debafeSMikulas Patocka 57248debafeSMikulas Patocka static struct wc_entry *writecache_find_entry(struct dm_writecache *wc, 57348debafeSMikulas Patocka uint64_t block, int flags) 57448debafeSMikulas Patocka { 57548debafeSMikulas Patocka struct wc_entry *e; 57648debafeSMikulas Patocka struct rb_node *node = wc->tree.rb_node; 57748debafeSMikulas Patocka 57848debafeSMikulas Patocka if (unlikely(!node)) 57948debafeSMikulas Patocka return NULL; 58048debafeSMikulas Patocka 58148debafeSMikulas Patocka while (1) { 58248debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 58348debafeSMikulas Patocka if (read_original_sector(wc, e) == block) 58448debafeSMikulas Patocka break; 585f8011d33SMikulas Patocka 58648debafeSMikulas Patocka node = (read_original_sector(wc, e) >= block ? 58748debafeSMikulas Patocka e->rb_node.rb_left : e->rb_node.rb_right); 58848debafeSMikulas Patocka if (unlikely(!node)) { 589f8011d33SMikulas Patocka if (!(flags & WFE_RETURN_FOLLOWING)) 59048debafeSMikulas Patocka return NULL; 59148debafeSMikulas Patocka if (read_original_sector(wc, e) >= block) { 592f8011d33SMikulas Patocka return e; 59348debafeSMikulas Patocka } else { 59448debafeSMikulas Patocka node = rb_next(&e->rb_node); 595f8011d33SMikulas Patocka if (unlikely(!node)) 59648debafeSMikulas Patocka return NULL; 59748debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 598f8011d33SMikulas Patocka return e; 59948debafeSMikulas Patocka } 60048debafeSMikulas Patocka } 60148debafeSMikulas Patocka } 60248debafeSMikulas Patocka 60348debafeSMikulas Patocka while (1) { 60448debafeSMikulas Patocka struct wc_entry *e2; 60548debafeSMikulas Patocka if (flags & WFE_LOWEST_SEQ) 60648debafeSMikulas Patocka node = rb_prev(&e->rb_node); 60748debafeSMikulas Patocka else 60848debafeSMikulas Patocka node = rb_next(&e->rb_node); 60984420b1eSHuaisheng Ye if (unlikely(!node)) 61048debafeSMikulas Patocka return e; 61148debafeSMikulas Patocka e2 = container_of(node, struct wc_entry, rb_node); 61248debafeSMikulas Patocka if (read_original_sector(wc, e2) != block) 61348debafeSMikulas Patocka return e; 61448debafeSMikulas Patocka e = e2; 61548debafeSMikulas Patocka } 61648debafeSMikulas Patocka } 61748debafeSMikulas Patocka 61848debafeSMikulas Patocka static void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *ins) 61948debafeSMikulas Patocka { 62048debafeSMikulas Patocka struct wc_entry *e; 62148debafeSMikulas Patocka struct rb_node **node = &wc->tree.rb_node, *parent = NULL; 62248debafeSMikulas Patocka 62348debafeSMikulas Patocka while (*node) { 62448debafeSMikulas Patocka e = container_of(*node, struct wc_entry, rb_node); 62548debafeSMikulas Patocka parent = &e->rb_node; 62648debafeSMikulas Patocka if (read_original_sector(wc, e) > read_original_sector(wc, ins)) 62748debafeSMikulas Patocka node = &parent->rb_left; 62848debafeSMikulas Patocka else 62948debafeSMikulas Patocka node = &parent->rb_right; 63048debafeSMikulas Patocka } 63148debafeSMikulas Patocka rb_link_node(&ins->rb_node, parent, node); 63248debafeSMikulas Patocka rb_insert_color(&ins->rb_node, &wc->tree); 63348debafeSMikulas Patocka list_add(&ins->lru, &wc->lru); 6343923d485SMikulas Patocka ins->age = jiffies; 63548debafeSMikulas Patocka } 63648debafeSMikulas Patocka 63748debafeSMikulas Patocka static void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e) 63848debafeSMikulas Patocka { 63948debafeSMikulas Patocka list_del(&e->lru); 64048debafeSMikulas Patocka rb_erase(&e->rb_node, &wc->tree); 64148debafeSMikulas Patocka } 64248debafeSMikulas Patocka 64348debafeSMikulas Patocka static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry *e) 64448debafeSMikulas Patocka { 64548debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 64648debafeSMikulas Patocka struct rb_node **node = &wc->freetree.rb_node, *parent = NULL; 64748debafeSMikulas Patocka if (unlikely(!*node)) 64848debafeSMikulas Patocka wc->current_free = e; 64948debafeSMikulas Patocka while (*node) { 65048debafeSMikulas Patocka parent = *node; 65148debafeSMikulas Patocka if (&e->rb_node < *node) 65248debafeSMikulas Patocka node = &parent->rb_left; 65348debafeSMikulas Patocka else 65448debafeSMikulas Patocka node = &parent->rb_right; 65548debafeSMikulas Patocka } 65648debafeSMikulas Patocka rb_link_node(&e->rb_node, parent, node); 65748debafeSMikulas Patocka rb_insert_color(&e->rb_node, &wc->freetree); 65848debafeSMikulas Patocka } else { 65948debafeSMikulas Patocka list_add_tail(&e->lru, &wc->freelist); 66048debafeSMikulas Patocka } 66148debafeSMikulas Patocka wc->freelist_size++; 66248debafeSMikulas Patocka } 66348debafeSMikulas Patocka 66441c526c5SMikulas Patocka static inline void writecache_verify_watermark(struct dm_writecache *wc) 66541c526c5SMikulas Patocka { 66641c526c5SMikulas Patocka if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) 66741c526c5SMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 66841c526c5SMikulas Patocka } 66941c526c5SMikulas Patocka 6703923d485SMikulas Patocka static void writecache_max_age_timer(struct timer_list *t) 6713923d485SMikulas Patocka { 6723923d485SMikulas Patocka struct dm_writecache *wc = from_timer(wc, t, max_age_timer); 6733923d485SMikulas Patocka 6743923d485SMikulas Patocka if (!dm_suspended(wc->ti) && !writecache_has_error(wc)) { 6753923d485SMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 6763923d485SMikulas Patocka mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); 6773923d485SMikulas Patocka } 6783923d485SMikulas Patocka } 6793923d485SMikulas Patocka 680dcd19507SMikulas Patocka static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector) 68148debafeSMikulas Patocka { 68248debafeSMikulas Patocka struct wc_entry *e; 68348debafeSMikulas Patocka 68448debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 68548debafeSMikulas Patocka struct rb_node *next; 68648debafeSMikulas Patocka if (unlikely(!wc->current_free)) 68748debafeSMikulas Patocka return NULL; 68848debafeSMikulas Patocka e = wc->current_free; 689dcd19507SMikulas Patocka if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 690dcd19507SMikulas Patocka return NULL; 69148debafeSMikulas Patocka next = rb_next(&e->rb_node); 69248debafeSMikulas Patocka rb_erase(&e->rb_node, &wc->freetree); 69348debafeSMikulas Patocka if (unlikely(!next)) 69448debafeSMikulas Patocka next = rb_first(&wc->freetree); 69548debafeSMikulas Patocka wc->current_free = next ? container_of(next, struct wc_entry, rb_node) : NULL; 69648debafeSMikulas Patocka } else { 69748debafeSMikulas Patocka if (unlikely(list_empty(&wc->freelist))) 69848debafeSMikulas Patocka return NULL; 69948debafeSMikulas Patocka e = container_of(wc->freelist.next, struct wc_entry, lru); 700dcd19507SMikulas Patocka if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 701dcd19507SMikulas Patocka return NULL; 70248debafeSMikulas Patocka list_del(&e->lru); 70348debafeSMikulas Patocka } 70448debafeSMikulas Patocka wc->freelist_size--; 70541c526c5SMikulas Patocka 70641c526c5SMikulas Patocka writecache_verify_watermark(wc); 70748debafeSMikulas Patocka 70848debafeSMikulas Patocka return e; 70948debafeSMikulas Patocka } 71048debafeSMikulas Patocka 71148debafeSMikulas Patocka static void writecache_free_entry(struct dm_writecache *wc, struct wc_entry *e) 71248debafeSMikulas Patocka { 71348debafeSMikulas Patocka writecache_unlink(wc, e); 71448debafeSMikulas Patocka writecache_add_to_freelist(wc, e); 71548debafeSMikulas Patocka clear_seq_count(wc, e); 71648debafeSMikulas Patocka writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 71748debafeSMikulas Patocka if (unlikely(waitqueue_active(&wc->freelist_wait))) 71848debafeSMikulas Patocka wake_up(&wc->freelist_wait); 71948debafeSMikulas Patocka } 72048debafeSMikulas Patocka 72148debafeSMikulas Patocka static void writecache_wait_on_freelist(struct dm_writecache *wc) 72248debafeSMikulas Patocka { 72348debafeSMikulas Patocka DEFINE_WAIT(wait); 72448debafeSMikulas Patocka 72548debafeSMikulas Patocka prepare_to_wait(&wc->freelist_wait, &wait, TASK_UNINTERRUPTIBLE); 72648debafeSMikulas Patocka wc_unlock(wc); 72748debafeSMikulas Patocka io_schedule(); 72848debafeSMikulas Patocka finish_wait(&wc->freelist_wait, &wait); 72948debafeSMikulas Patocka wc_lock(wc); 73048debafeSMikulas Patocka } 73148debafeSMikulas Patocka 73248debafeSMikulas Patocka static void writecache_poison_lists(struct dm_writecache *wc) 73348debafeSMikulas Patocka { 73448debafeSMikulas Patocka /* 73548debafeSMikulas Patocka * Catch incorrect access to these values while the device is suspended. 73648debafeSMikulas Patocka */ 73748debafeSMikulas Patocka memset(&wc->tree, -1, sizeof wc->tree); 73848debafeSMikulas Patocka wc->lru.next = LIST_POISON1; 73948debafeSMikulas Patocka wc->lru.prev = LIST_POISON2; 74048debafeSMikulas Patocka wc->freelist.next = LIST_POISON1; 74148debafeSMikulas Patocka wc->freelist.prev = LIST_POISON2; 74248debafeSMikulas Patocka } 74348debafeSMikulas Patocka 74448debafeSMikulas Patocka static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e) 74548debafeSMikulas Patocka { 74648debafeSMikulas Patocka writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 74748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 74848debafeSMikulas Patocka writecache_flush_region(wc, memory_data(wc, e), wc->block_size); 74948debafeSMikulas Patocka } 75048debafeSMikulas Patocka 75148debafeSMikulas Patocka static bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e) 75248debafeSMikulas Patocka { 75348debafeSMikulas Patocka return read_seq_count(wc, e) < wc->seq_count; 75448debafeSMikulas Patocka } 75548debafeSMikulas Patocka 75648debafeSMikulas Patocka static void writecache_flush(struct dm_writecache *wc) 75748debafeSMikulas Patocka { 75848debafeSMikulas Patocka struct wc_entry *e, *e2; 75948debafeSMikulas Patocka bool need_flush_after_free; 76048debafeSMikulas Patocka 76148debafeSMikulas Patocka wc->uncommitted_blocks = 0; 76248debafeSMikulas Patocka del_timer(&wc->autocommit_timer); 76348debafeSMikulas Patocka 76448debafeSMikulas Patocka if (list_empty(&wc->lru)) 76548debafeSMikulas Patocka return; 76648debafeSMikulas Patocka 76748debafeSMikulas Patocka e = container_of(wc->lru.next, struct wc_entry, lru); 76848debafeSMikulas Patocka if (writecache_entry_is_committed(wc, e)) { 76948debafeSMikulas Patocka if (wc->overwrote_committed) { 77048debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 77148debafeSMikulas Patocka writecache_disk_flush(wc, wc->ssd_dev); 77248debafeSMikulas Patocka wc->overwrote_committed = false; 77348debafeSMikulas Patocka } 77448debafeSMikulas Patocka return; 77548debafeSMikulas Patocka } 77648debafeSMikulas Patocka while (1) { 77748debafeSMikulas Patocka writecache_flush_entry(wc, e); 77848debafeSMikulas Patocka if (unlikely(e->lru.next == &wc->lru)) 77948debafeSMikulas Patocka break; 78048debafeSMikulas Patocka e2 = container_of(e->lru.next, struct wc_entry, lru); 78148debafeSMikulas Patocka if (writecache_entry_is_committed(wc, e2)) 78248debafeSMikulas Patocka break; 78348debafeSMikulas Patocka e = e2; 78448debafeSMikulas Patocka cond_resched(); 78548debafeSMikulas Patocka } 786aa950920SMikulas Patocka writecache_commit_flushed(wc, true); 78748debafeSMikulas Patocka 78848debafeSMikulas Patocka wc->seq_count++; 78948debafeSMikulas Patocka pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); 790dc8a01aeSMikulas Patocka if (WC_MODE_PMEM(wc)) 791aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 792dc8a01aeSMikulas Patocka else 793dc8a01aeSMikulas Patocka ssd_commit_superblock(wc); 79448debafeSMikulas Patocka 79548debafeSMikulas Patocka wc->overwrote_committed = false; 79648debafeSMikulas Patocka 79748debafeSMikulas Patocka need_flush_after_free = false; 79848debafeSMikulas Patocka while (1) { 79948debafeSMikulas Patocka /* Free another committed entry with lower seq-count */ 80048debafeSMikulas Patocka struct rb_node *rb_node = rb_prev(&e->rb_node); 80148debafeSMikulas Patocka 80248debafeSMikulas Patocka if (rb_node) { 80348debafeSMikulas Patocka e2 = container_of(rb_node, struct wc_entry, rb_node); 80448debafeSMikulas Patocka if (read_original_sector(wc, e2) == read_original_sector(wc, e) && 80548debafeSMikulas Patocka likely(!e2->write_in_progress)) { 80648debafeSMikulas Patocka writecache_free_entry(wc, e2); 80748debafeSMikulas Patocka need_flush_after_free = true; 80848debafeSMikulas Patocka } 80948debafeSMikulas Patocka } 81048debafeSMikulas Patocka if (unlikely(e->lru.prev == &wc->lru)) 81148debafeSMikulas Patocka break; 81248debafeSMikulas Patocka e = container_of(e->lru.prev, struct wc_entry, lru); 81348debafeSMikulas Patocka cond_resched(); 81448debafeSMikulas Patocka } 81548debafeSMikulas Patocka 81648debafeSMikulas Patocka if (need_flush_after_free) 817aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 81848debafeSMikulas Patocka } 81948debafeSMikulas Patocka 82048debafeSMikulas Patocka static void writecache_flush_work(struct work_struct *work) 82148debafeSMikulas Patocka { 82248debafeSMikulas Patocka struct dm_writecache *wc = container_of(work, struct dm_writecache, flush_work); 82348debafeSMikulas Patocka 82448debafeSMikulas Patocka wc_lock(wc); 82548debafeSMikulas Patocka writecache_flush(wc); 82648debafeSMikulas Patocka wc_unlock(wc); 82748debafeSMikulas Patocka } 82848debafeSMikulas Patocka 82948debafeSMikulas Patocka static void writecache_autocommit_timer(struct timer_list *t) 83048debafeSMikulas Patocka { 83148debafeSMikulas Patocka struct dm_writecache *wc = from_timer(wc, t, autocommit_timer); 83248debafeSMikulas Patocka if (!writecache_has_error(wc)) 83348debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->flush_work); 83448debafeSMikulas Patocka } 83548debafeSMikulas Patocka 83648debafeSMikulas Patocka static void writecache_schedule_autocommit(struct dm_writecache *wc) 83748debafeSMikulas Patocka { 83848debafeSMikulas Patocka if (!timer_pending(&wc->autocommit_timer)) 83948debafeSMikulas Patocka mod_timer(&wc->autocommit_timer, jiffies + wc->autocommit_jiffies); 84048debafeSMikulas Patocka } 84148debafeSMikulas Patocka 84248debafeSMikulas Patocka static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_t end) 84348debafeSMikulas Patocka { 84448debafeSMikulas Patocka struct wc_entry *e; 84548debafeSMikulas Patocka bool discarded_something = false; 84648debafeSMikulas Patocka 84748debafeSMikulas Patocka e = writecache_find_entry(wc, start, WFE_RETURN_FOLLOWING | WFE_LOWEST_SEQ); 84848debafeSMikulas Patocka if (unlikely(!e)) 84948debafeSMikulas Patocka return; 85048debafeSMikulas Patocka 85148debafeSMikulas Patocka while (read_original_sector(wc, e) < end) { 85248debafeSMikulas Patocka struct rb_node *node = rb_next(&e->rb_node); 85348debafeSMikulas Patocka 85448debafeSMikulas Patocka if (likely(!e->write_in_progress)) { 85548debafeSMikulas Patocka if (!discarded_something) { 85648debafeSMikulas Patocka writecache_wait_for_ios(wc, READ); 85748debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 85848debafeSMikulas Patocka discarded_something = true; 85948debafeSMikulas Patocka } 86048debafeSMikulas Patocka writecache_free_entry(wc, e); 86148debafeSMikulas Patocka } 86248debafeSMikulas Patocka 86384420b1eSHuaisheng Ye if (unlikely(!node)) 86448debafeSMikulas Patocka break; 86548debafeSMikulas Patocka 86648debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 86748debafeSMikulas Patocka } 86848debafeSMikulas Patocka 86948debafeSMikulas Patocka if (discarded_something) 870aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 87148debafeSMikulas Patocka } 87248debafeSMikulas Patocka 87348debafeSMikulas Patocka static bool writecache_wait_for_writeback(struct dm_writecache *wc) 87448debafeSMikulas Patocka { 87548debafeSMikulas Patocka if (wc->writeback_size) { 87648debafeSMikulas Patocka writecache_wait_on_freelist(wc); 87748debafeSMikulas Patocka return true; 87848debafeSMikulas Patocka } 87948debafeSMikulas Patocka return false; 88048debafeSMikulas Patocka } 88148debafeSMikulas Patocka 88248debafeSMikulas Patocka static void writecache_suspend(struct dm_target *ti) 88348debafeSMikulas Patocka { 88448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 88548debafeSMikulas Patocka bool flush_on_suspend; 88648debafeSMikulas Patocka 88748debafeSMikulas Patocka del_timer_sync(&wc->autocommit_timer); 8883923d485SMikulas Patocka del_timer_sync(&wc->max_age_timer); 88948debafeSMikulas Patocka 89048debafeSMikulas Patocka wc_lock(wc); 89148debafeSMikulas Patocka writecache_flush(wc); 89248debafeSMikulas Patocka flush_on_suspend = wc->flush_on_suspend; 89348debafeSMikulas Patocka if (flush_on_suspend) { 89448debafeSMikulas Patocka wc->flush_on_suspend = false; 89548debafeSMikulas Patocka wc->writeback_all++; 89648debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 89748debafeSMikulas Patocka } 89848debafeSMikulas Patocka wc_unlock(wc); 89948debafeSMikulas Patocka 900adc0daadSMikulas Patocka drain_workqueue(wc->writeback_wq); 90148debafeSMikulas Patocka 90248debafeSMikulas Patocka wc_lock(wc); 90348debafeSMikulas Patocka if (flush_on_suspend) 90448debafeSMikulas Patocka wc->writeback_all--; 90548debafeSMikulas Patocka while (writecache_wait_for_writeback(wc)); 90648debafeSMikulas Patocka 90748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 90848debafeSMikulas Patocka persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 90948debafeSMikulas Patocka 91048debafeSMikulas Patocka writecache_poison_lists(wc); 91148debafeSMikulas Patocka 91248debafeSMikulas Patocka wc_unlock(wc); 91348debafeSMikulas Patocka } 91448debafeSMikulas Patocka 91548debafeSMikulas Patocka static int writecache_alloc_entries(struct dm_writecache *wc) 91648debafeSMikulas Patocka { 91748debafeSMikulas Patocka size_t b; 91848debafeSMikulas Patocka 91948debafeSMikulas Patocka if (wc->entries) 92048debafeSMikulas Patocka return 0; 92150a7d3baSKees Cook wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks)); 92248debafeSMikulas Patocka if (!wc->entries) 92348debafeSMikulas Patocka return -ENOMEM; 92448debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 92548debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 92648debafeSMikulas Patocka e->index = b; 92748debafeSMikulas Patocka e->write_in_progress = false; 928*1edaa447SMikulas Patocka cond_resched(); 92948debafeSMikulas Patocka } 93048debafeSMikulas Patocka 93148debafeSMikulas Patocka return 0; 93248debafeSMikulas Patocka } 93348debafeSMikulas Patocka 93448debafeSMikulas Patocka static void writecache_resume(struct dm_target *ti) 93548debafeSMikulas Patocka { 93648debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 93748debafeSMikulas Patocka size_t b; 93848debafeSMikulas Patocka bool need_flush = false; 93948debafeSMikulas Patocka __le64 sb_seq_count; 94048debafeSMikulas Patocka int r; 94148debafeSMikulas Patocka 94248debafeSMikulas Patocka wc_lock(wc); 94348debafeSMikulas Patocka 94448debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 94548debafeSMikulas Patocka persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); 94648debafeSMikulas Patocka 94748debafeSMikulas Patocka wc->tree = RB_ROOT; 94848debafeSMikulas Patocka INIT_LIST_HEAD(&wc->lru); 94948debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 95048debafeSMikulas Patocka wc->freetree = RB_ROOT; 95148debafeSMikulas Patocka wc->current_free = NULL; 95248debafeSMikulas Patocka } else { 95348debafeSMikulas Patocka INIT_LIST_HEAD(&wc->freelist); 95448debafeSMikulas Patocka } 95548debafeSMikulas Patocka wc->freelist_size = 0; 95648debafeSMikulas Patocka 95748debafeSMikulas Patocka r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t)); 95848debafeSMikulas Patocka if (r) { 95948debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading superblock: %d", r); 96048debafeSMikulas Patocka sb_seq_count = cpu_to_le64(0); 96148debafeSMikulas Patocka } 96248debafeSMikulas Patocka wc->seq_count = le64_to_cpu(sb_seq_count); 96348debafeSMikulas Patocka 96448debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 96548debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 96648debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 96748debafeSMikulas Patocka struct wc_memory_entry wme; 96848debafeSMikulas Patocka if (writecache_has_error(wc)) { 96948debafeSMikulas Patocka e->original_sector = -1; 97048debafeSMikulas Patocka e->seq_count = -1; 97148debafeSMikulas Patocka continue; 97248debafeSMikulas Patocka } 97348debafeSMikulas Patocka r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 97448debafeSMikulas Patocka if (r) { 97548debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", 97648debafeSMikulas Patocka (unsigned long)b, r); 97748debafeSMikulas Patocka e->original_sector = -1; 97848debafeSMikulas Patocka e->seq_count = -1; 97948debafeSMikulas Patocka } else { 98048debafeSMikulas Patocka e->original_sector = le64_to_cpu(wme.original_sector); 98148debafeSMikulas Patocka e->seq_count = le64_to_cpu(wme.seq_count); 98248debafeSMikulas Patocka } 983*1edaa447SMikulas Patocka cond_resched(); 98448debafeSMikulas Patocka } 98548debafeSMikulas Patocka #endif 98648debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 98748debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 98848debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) { 98948debafeSMikulas Patocka if (read_seq_count(wc, e) != -1) { 99048debafeSMikulas Patocka erase_this: 99148debafeSMikulas Patocka clear_seq_count(wc, e); 99248debafeSMikulas Patocka need_flush = true; 99348debafeSMikulas Patocka } 99448debafeSMikulas Patocka writecache_add_to_freelist(wc, e); 99548debafeSMikulas Patocka } else { 99648debafeSMikulas Patocka struct wc_entry *old; 99748debafeSMikulas Patocka 99848debafeSMikulas Patocka old = writecache_find_entry(wc, read_original_sector(wc, e), 0); 99948debafeSMikulas Patocka if (!old) { 100048debafeSMikulas Patocka writecache_insert_entry(wc, e); 100148debafeSMikulas Patocka } else { 100248debafeSMikulas Patocka if (read_seq_count(wc, old) == read_seq_count(wc, e)) { 100348debafeSMikulas Patocka writecache_error(wc, -EINVAL, 100448debafeSMikulas Patocka "two identical entries, position %llu, sector %llu, sequence %llu", 100548debafeSMikulas Patocka (unsigned long long)b, (unsigned long long)read_original_sector(wc, e), 100648debafeSMikulas Patocka (unsigned long long)read_seq_count(wc, e)); 100748debafeSMikulas Patocka } 100848debafeSMikulas Patocka if (read_seq_count(wc, old) > read_seq_count(wc, e)) { 100948debafeSMikulas Patocka goto erase_this; 101048debafeSMikulas Patocka } else { 101148debafeSMikulas Patocka writecache_free_entry(wc, old); 101248debafeSMikulas Patocka writecache_insert_entry(wc, e); 101348debafeSMikulas Patocka need_flush = true; 101448debafeSMikulas Patocka } 101548debafeSMikulas Patocka } 101648debafeSMikulas Patocka } 101748debafeSMikulas Patocka cond_resched(); 101848debafeSMikulas Patocka } 101948debafeSMikulas Patocka 102048debafeSMikulas Patocka if (need_flush) { 102148debafeSMikulas Patocka writecache_flush_all_metadata(wc); 1022aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 102348debafeSMikulas Patocka } 102448debafeSMikulas Patocka 102541c526c5SMikulas Patocka writecache_verify_watermark(wc); 102641c526c5SMikulas Patocka 10273923d485SMikulas Patocka if (wc->max_age != MAX_AGE_UNSPECIFIED) 10283923d485SMikulas Patocka mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); 10293923d485SMikulas Patocka 103048debafeSMikulas Patocka wc_unlock(wc); 103148debafeSMikulas Patocka } 103248debafeSMikulas Patocka 103348debafeSMikulas Patocka static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 103448debafeSMikulas Patocka { 103548debafeSMikulas Patocka if (argc != 1) 103648debafeSMikulas Patocka return -EINVAL; 103748debafeSMikulas Patocka 103848debafeSMikulas Patocka wc_lock(wc); 103948debafeSMikulas Patocka if (dm_suspended(wc->ti)) { 104048debafeSMikulas Patocka wc_unlock(wc); 104148debafeSMikulas Patocka return -EBUSY; 104248debafeSMikulas Patocka } 104348debafeSMikulas Patocka if (writecache_has_error(wc)) { 104448debafeSMikulas Patocka wc_unlock(wc); 104548debafeSMikulas Patocka return -EIO; 104648debafeSMikulas Patocka } 104748debafeSMikulas Patocka 104848debafeSMikulas Patocka writecache_flush(wc); 104948debafeSMikulas Patocka wc->writeback_all++; 105048debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 105148debafeSMikulas Patocka wc_unlock(wc); 105248debafeSMikulas Patocka 105348debafeSMikulas Patocka flush_workqueue(wc->writeback_wq); 105448debafeSMikulas Patocka 105548debafeSMikulas Patocka wc_lock(wc); 105648debafeSMikulas Patocka wc->writeback_all--; 105748debafeSMikulas Patocka if (writecache_has_error(wc)) { 105848debafeSMikulas Patocka wc_unlock(wc); 105948debafeSMikulas Patocka return -EIO; 106048debafeSMikulas Patocka } 106148debafeSMikulas Patocka wc_unlock(wc); 106248debafeSMikulas Patocka 106348debafeSMikulas Patocka return 0; 106448debafeSMikulas Patocka } 106548debafeSMikulas Patocka 106648debafeSMikulas Patocka static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 106748debafeSMikulas Patocka { 106848debafeSMikulas Patocka if (argc != 1) 106948debafeSMikulas Patocka return -EINVAL; 107048debafeSMikulas Patocka 107148debafeSMikulas Patocka wc_lock(wc); 107248debafeSMikulas Patocka wc->flush_on_suspend = true; 107348debafeSMikulas Patocka wc_unlock(wc); 107448debafeSMikulas Patocka 107548debafeSMikulas Patocka return 0; 107648debafeSMikulas Patocka } 107748debafeSMikulas Patocka 107893de44ebSMikulas Patocka static void activate_cleaner(struct dm_writecache *wc) 107993de44ebSMikulas Patocka { 108093de44ebSMikulas Patocka wc->flush_on_suspend = true; 108193de44ebSMikulas Patocka wc->cleaner = true; 108293de44ebSMikulas Patocka wc->freelist_high_watermark = wc->n_blocks; 108393de44ebSMikulas Patocka wc->freelist_low_watermark = wc->n_blocks; 108493de44ebSMikulas Patocka } 108593de44ebSMikulas Patocka 108693de44ebSMikulas Patocka static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 108793de44ebSMikulas Patocka { 108893de44ebSMikulas Patocka if (argc != 1) 108993de44ebSMikulas Patocka return -EINVAL; 109093de44ebSMikulas Patocka 109193de44ebSMikulas Patocka wc_lock(wc); 109293de44ebSMikulas Patocka activate_cleaner(wc); 109393de44ebSMikulas Patocka if (!dm_suspended(wc->ti)) 109493de44ebSMikulas Patocka writecache_verify_watermark(wc); 109593de44ebSMikulas Patocka wc_unlock(wc); 109693de44ebSMikulas Patocka 109793de44ebSMikulas Patocka return 0; 109893de44ebSMikulas Patocka } 109993de44ebSMikulas Patocka 110048debafeSMikulas Patocka static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, 110148debafeSMikulas Patocka char *result, unsigned maxlen) 110248debafeSMikulas Patocka { 110348debafeSMikulas Patocka int r = -EINVAL; 110448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 110548debafeSMikulas Patocka 110648debafeSMikulas Patocka if (!strcasecmp(argv[0], "flush")) 110748debafeSMikulas Patocka r = process_flush_mesg(argc, argv, wc); 110848debafeSMikulas Patocka else if (!strcasecmp(argv[0], "flush_on_suspend")) 110948debafeSMikulas Patocka r = process_flush_on_suspend_mesg(argc, argv, wc); 111093de44ebSMikulas Patocka else if (!strcasecmp(argv[0], "cleaner")) 111193de44ebSMikulas Patocka r = process_cleaner_mesg(argc, argv, wc); 111248debafeSMikulas Patocka else 111348debafeSMikulas Patocka DMERR("unrecognised message received: %s", argv[0]); 111448debafeSMikulas Patocka 111548debafeSMikulas Patocka return r; 111648debafeSMikulas Patocka } 111748debafeSMikulas Patocka 111848debafeSMikulas Patocka static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data) 111948debafeSMikulas Patocka { 112048debafeSMikulas Patocka void *buf; 112148debafeSMikulas Patocka unsigned long flags; 112248debafeSMikulas Patocka unsigned size; 112348debafeSMikulas Patocka int rw = bio_data_dir(bio); 112448debafeSMikulas Patocka unsigned remaining_size = wc->block_size; 112548debafeSMikulas Patocka 112648debafeSMikulas Patocka do { 112748debafeSMikulas Patocka struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); 112848debafeSMikulas Patocka buf = bvec_kmap_irq(&bv, &flags); 112948debafeSMikulas Patocka size = bv.bv_len; 113048debafeSMikulas Patocka if (unlikely(size > remaining_size)) 113148debafeSMikulas Patocka size = remaining_size; 113248debafeSMikulas Patocka 113348debafeSMikulas Patocka if (rw == READ) { 113448debafeSMikulas Patocka int r; 113548debafeSMikulas Patocka r = memcpy_mcsafe(buf, data, size); 113648debafeSMikulas Patocka flush_dcache_page(bio_page(bio)); 113748debafeSMikulas Patocka if (unlikely(r)) { 113848debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading data: %d", r); 113948debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 114048debafeSMikulas Patocka } 114148debafeSMikulas Patocka } else { 114248debafeSMikulas Patocka flush_dcache_page(bio_page(bio)); 114348debafeSMikulas Patocka memcpy_flushcache(data, buf, size); 114448debafeSMikulas Patocka } 114548debafeSMikulas Patocka 114648debafeSMikulas Patocka bvec_kunmap_irq(buf, &flags); 114748debafeSMikulas Patocka 114848debafeSMikulas Patocka data = (char *)data + size; 114948debafeSMikulas Patocka remaining_size -= size; 115048debafeSMikulas Patocka bio_advance(bio, size); 115148debafeSMikulas Patocka } while (unlikely(remaining_size)); 115248debafeSMikulas Patocka } 115348debafeSMikulas Patocka 115448debafeSMikulas Patocka static int writecache_flush_thread(void *data) 115548debafeSMikulas Patocka { 115648debafeSMikulas Patocka struct dm_writecache *wc = data; 115748debafeSMikulas Patocka 115848debafeSMikulas Patocka while (1) { 115948debafeSMikulas Patocka struct bio *bio; 116048debafeSMikulas Patocka 116148debafeSMikulas Patocka wc_lock(wc); 116248debafeSMikulas Patocka bio = bio_list_pop(&wc->flush_list); 116348debafeSMikulas Patocka if (!bio) { 116448debafeSMikulas Patocka set_current_state(TASK_INTERRUPTIBLE); 116548debafeSMikulas Patocka wc_unlock(wc); 116648debafeSMikulas Patocka 116748debafeSMikulas Patocka if (unlikely(kthread_should_stop())) { 116848debafeSMikulas Patocka set_current_state(TASK_RUNNING); 116948debafeSMikulas Patocka break; 117048debafeSMikulas Patocka } 117148debafeSMikulas Patocka 117248debafeSMikulas Patocka schedule(); 117348debafeSMikulas Patocka continue; 117448debafeSMikulas Patocka } 117548debafeSMikulas Patocka 117648debafeSMikulas Patocka if (bio_op(bio) == REQ_OP_DISCARD) { 117748debafeSMikulas Patocka writecache_discard(wc, bio->bi_iter.bi_sector, 117848debafeSMikulas Patocka bio_end_sector(bio)); 117948debafeSMikulas Patocka wc_unlock(wc); 118048debafeSMikulas Patocka bio_set_dev(bio, wc->dev->bdev); 118148debafeSMikulas Patocka generic_make_request(bio); 118248debafeSMikulas Patocka } else { 118348debafeSMikulas Patocka writecache_flush(wc); 118448debafeSMikulas Patocka wc_unlock(wc); 118548debafeSMikulas Patocka if (writecache_has_error(wc)) 118648debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 118748debafeSMikulas Patocka bio_endio(bio); 118848debafeSMikulas Patocka } 118948debafeSMikulas Patocka } 119048debafeSMikulas Patocka 119148debafeSMikulas Patocka return 0; 119248debafeSMikulas Patocka } 119348debafeSMikulas Patocka 119448debafeSMikulas Patocka static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio) 119548debafeSMikulas Patocka { 119648debafeSMikulas Patocka if (bio_list_empty(&wc->flush_list)) 119748debafeSMikulas Patocka wake_up_process(wc->flush_thread); 119848debafeSMikulas Patocka bio_list_add(&wc->flush_list, bio); 119948debafeSMikulas Patocka } 120048debafeSMikulas Patocka 120148debafeSMikulas Patocka static int writecache_map(struct dm_target *ti, struct bio *bio) 120248debafeSMikulas Patocka { 120348debafeSMikulas Patocka struct wc_entry *e; 120448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 120548debafeSMikulas Patocka 120648debafeSMikulas Patocka bio->bi_private = NULL; 120748debafeSMikulas Patocka 120848debafeSMikulas Patocka wc_lock(wc); 120948debafeSMikulas Patocka 121048debafeSMikulas Patocka if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 121148debafeSMikulas Patocka if (writecache_has_error(wc)) 121248debafeSMikulas Patocka goto unlock_error; 121348debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 121448debafeSMikulas Patocka writecache_flush(wc); 121548debafeSMikulas Patocka if (writecache_has_error(wc)) 121648debafeSMikulas Patocka goto unlock_error; 121748debafeSMikulas Patocka goto unlock_submit; 121848debafeSMikulas Patocka } else { 121948debafeSMikulas Patocka writecache_offload_bio(wc, bio); 122048debafeSMikulas Patocka goto unlock_return; 122148debafeSMikulas Patocka } 122248debafeSMikulas Patocka } 122348debafeSMikulas Patocka 122448debafeSMikulas Patocka bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 122548debafeSMikulas Patocka 122648debafeSMikulas Patocka if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & 122748debafeSMikulas Patocka (wc->block_size / 512 - 1)) != 0)) { 122848debafeSMikulas Patocka DMERR("I/O is not aligned, sector %llu, size %u, block size %u", 122948debafeSMikulas Patocka (unsigned long long)bio->bi_iter.bi_sector, 123048debafeSMikulas Patocka bio->bi_iter.bi_size, wc->block_size); 123148debafeSMikulas Patocka goto unlock_error; 123248debafeSMikulas Patocka } 123348debafeSMikulas Patocka 123448debafeSMikulas Patocka if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { 123548debafeSMikulas Patocka if (writecache_has_error(wc)) 123648debafeSMikulas Patocka goto unlock_error; 123748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 123848debafeSMikulas Patocka writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); 123948debafeSMikulas Patocka goto unlock_remap_origin; 124048debafeSMikulas Patocka } else { 124148debafeSMikulas Patocka writecache_offload_bio(wc, bio); 124248debafeSMikulas Patocka goto unlock_return; 124348debafeSMikulas Patocka } 124448debafeSMikulas Patocka } 124548debafeSMikulas Patocka 124648debafeSMikulas Patocka if (bio_data_dir(bio) == READ) { 124748debafeSMikulas Patocka read_next_block: 124848debafeSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 124948debafeSMikulas Patocka if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { 125048debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 125148debafeSMikulas Patocka bio_copy_block(wc, bio, memory_data(wc, e)); 125248debafeSMikulas Patocka if (bio->bi_iter.bi_size) 125348debafeSMikulas Patocka goto read_next_block; 125448debafeSMikulas Patocka goto unlock_submit; 125548debafeSMikulas Patocka } else { 125648debafeSMikulas Patocka dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); 125748debafeSMikulas Patocka bio_set_dev(bio, wc->ssd_dev->bdev); 125848debafeSMikulas Patocka bio->bi_iter.bi_sector = cache_sector(wc, e); 125948debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) 126048debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 126148debafeSMikulas Patocka goto unlock_remap; 126248debafeSMikulas Patocka } 126348debafeSMikulas Patocka } else { 126448debafeSMikulas Patocka if (e) { 126548debafeSMikulas Patocka sector_t next_boundary = 126648debafeSMikulas Patocka read_original_sector(wc, e) - bio->bi_iter.bi_sector; 126748debafeSMikulas Patocka if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { 126848debafeSMikulas Patocka dm_accept_partial_bio(bio, next_boundary); 126948debafeSMikulas Patocka } 127048debafeSMikulas Patocka } 127148debafeSMikulas Patocka goto unlock_remap_origin; 127248debafeSMikulas Patocka } 127348debafeSMikulas Patocka } else { 127448debafeSMikulas Patocka do { 1275d53f1fafSMikulas Patocka bool found_entry = false; 127648debafeSMikulas Patocka if (writecache_has_error(wc)) 127748debafeSMikulas Patocka goto unlock_error; 127848debafeSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); 127948debafeSMikulas Patocka if (e) { 128048debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) 128148debafeSMikulas Patocka goto bio_copy; 128248debafeSMikulas Patocka if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { 128348debafeSMikulas Patocka wc->overwrote_committed = true; 128448debafeSMikulas Patocka goto bio_copy; 128548debafeSMikulas Patocka } 1286d53f1fafSMikulas Patocka found_entry = true; 128793de44ebSMikulas Patocka } else { 128893de44ebSMikulas Patocka if (unlikely(wc->cleaner)) 128993de44ebSMikulas Patocka goto direct_write; 129048debafeSMikulas Patocka } 1291dcd19507SMikulas Patocka e = writecache_pop_from_freelist(wc, (sector_t)-1); 129248debafeSMikulas Patocka if (unlikely(!e)) { 1293d53f1fafSMikulas Patocka if (!found_entry) { 129493de44ebSMikulas Patocka direct_write: 1295d53f1fafSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 1296d53f1fafSMikulas Patocka if (e) { 1297d53f1fafSMikulas Patocka sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; 1298d53f1fafSMikulas Patocka BUG_ON(!next_boundary); 1299d53f1fafSMikulas Patocka if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { 1300d53f1fafSMikulas Patocka dm_accept_partial_bio(bio, next_boundary); 1301d53f1fafSMikulas Patocka } 1302d53f1fafSMikulas Patocka } 1303d53f1fafSMikulas Patocka goto unlock_remap_origin; 1304d53f1fafSMikulas Patocka } 130548debafeSMikulas Patocka writecache_wait_on_freelist(wc); 130648debafeSMikulas Patocka continue; 130748debafeSMikulas Patocka } 130848debafeSMikulas Patocka write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); 130948debafeSMikulas Patocka writecache_insert_entry(wc, e); 131048debafeSMikulas Patocka wc->uncommitted_blocks++; 131148debafeSMikulas Patocka bio_copy: 131248debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 131348debafeSMikulas Patocka bio_copy_block(wc, bio, memory_data(wc, e)); 131448debafeSMikulas Patocka } else { 1315dcd19507SMikulas Patocka unsigned bio_size = wc->block_size; 1316dcd19507SMikulas Patocka sector_t start_cache_sec = cache_sector(wc, e); 1317dcd19507SMikulas Patocka sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); 1318dcd19507SMikulas Patocka 1319dcd19507SMikulas Patocka while (bio_size < bio->bi_iter.bi_size) { 1320dcd19507SMikulas Patocka struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); 1321dcd19507SMikulas Patocka if (!f) 1322dcd19507SMikulas Patocka break; 1323dcd19507SMikulas Patocka write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + 1324dcd19507SMikulas Patocka (bio_size >> SECTOR_SHIFT), wc->seq_count); 1325dcd19507SMikulas Patocka writecache_insert_entry(wc, f); 1326dcd19507SMikulas Patocka wc->uncommitted_blocks++; 1327dcd19507SMikulas Patocka bio_size += wc->block_size; 1328dcd19507SMikulas Patocka current_cache_sec += wc->block_size >> SECTOR_SHIFT; 1329dcd19507SMikulas Patocka } 1330dcd19507SMikulas Patocka 133148debafeSMikulas Patocka bio_set_dev(bio, wc->ssd_dev->bdev); 1332dcd19507SMikulas Patocka bio->bi_iter.bi_sector = start_cache_sec; 1333dcd19507SMikulas Patocka dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); 1334dcd19507SMikulas Patocka 133548debafeSMikulas Patocka if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { 133648debafeSMikulas Patocka wc->uncommitted_blocks = 0; 133748debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->flush_work); 133848debafeSMikulas Patocka } else { 133948debafeSMikulas Patocka writecache_schedule_autocommit(wc); 134048debafeSMikulas Patocka } 134148debafeSMikulas Patocka goto unlock_remap; 134248debafeSMikulas Patocka } 134348debafeSMikulas Patocka } while (bio->bi_iter.bi_size); 134448debafeSMikulas Patocka 1345c1005322SMaged Mokhtar if (unlikely(bio->bi_opf & REQ_FUA || 1346c1005322SMaged Mokhtar wc->uncommitted_blocks >= wc->autocommit_blocks)) 134748debafeSMikulas Patocka writecache_flush(wc); 134848debafeSMikulas Patocka else 134948debafeSMikulas Patocka writecache_schedule_autocommit(wc); 135048debafeSMikulas Patocka goto unlock_submit; 135148debafeSMikulas Patocka } 135248debafeSMikulas Patocka 135348debafeSMikulas Patocka unlock_remap_origin: 135448debafeSMikulas Patocka bio_set_dev(bio, wc->dev->bdev); 135548debafeSMikulas Patocka wc_unlock(wc); 135648debafeSMikulas Patocka return DM_MAPIO_REMAPPED; 135748debafeSMikulas Patocka 135848debafeSMikulas Patocka unlock_remap: 135948debafeSMikulas Patocka /* make sure that writecache_end_io decrements bio_in_progress: */ 136048debafeSMikulas Patocka bio->bi_private = (void *)1; 136148debafeSMikulas Patocka atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); 136248debafeSMikulas Patocka wc_unlock(wc); 136348debafeSMikulas Patocka return DM_MAPIO_REMAPPED; 136448debafeSMikulas Patocka 136548debafeSMikulas Patocka unlock_submit: 136648debafeSMikulas Patocka wc_unlock(wc); 136748debafeSMikulas Patocka bio_endio(bio); 136848debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 136948debafeSMikulas Patocka 137048debafeSMikulas Patocka unlock_return: 137148debafeSMikulas Patocka wc_unlock(wc); 137248debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 137348debafeSMikulas Patocka 137448debafeSMikulas Patocka unlock_error: 137548debafeSMikulas Patocka wc_unlock(wc); 137648debafeSMikulas Patocka bio_io_error(bio); 137748debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 137848debafeSMikulas Patocka } 137948debafeSMikulas Patocka 138048debafeSMikulas Patocka static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) 138148debafeSMikulas Patocka { 138248debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 138348debafeSMikulas Patocka 138448debafeSMikulas Patocka if (bio->bi_private != NULL) { 138548debafeSMikulas Patocka int dir = bio_data_dir(bio); 138648debafeSMikulas Patocka if (atomic_dec_and_test(&wc->bio_in_progress[dir])) 138748debafeSMikulas Patocka if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir]))) 138848debafeSMikulas Patocka wake_up(&wc->bio_in_progress_wait[dir]); 138948debafeSMikulas Patocka } 139048debafeSMikulas Patocka return 0; 139148debafeSMikulas Patocka } 139248debafeSMikulas Patocka 139348debafeSMikulas Patocka static int writecache_iterate_devices(struct dm_target *ti, 139448debafeSMikulas Patocka iterate_devices_callout_fn fn, void *data) 139548debafeSMikulas Patocka { 139648debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 139748debafeSMikulas Patocka 139848debafeSMikulas Patocka return fn(ti, wc->dev, 0, ti->len, data); 139948debafeSMikulas Patocka } 140048debafeSMikulas Patocka 140148debafeSMikulas Patocka static void writecache_io_hints(struct dm_target *ti, struct queue_limits *limits) 140248debafeSMikulas Patocka { 140348debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 140448debafeSMikulas Patocka 140548debafeSMikulas Patocka if (limits->logical_block_size < wc->block_size) 140648debafeSMikulas Patocka limits->logical_block_size = wc->block_size; 140748debafeSMikulas Patocka 140848debafeSMikulas Patocka if (limits->physical_block_size < wc->block_size) 140948debafeSMikulas Patocka limits->physical_block_size = wc->block_size; 141048debafeSMikulas Patocka 141148debafeSMikulas Patocka if (limits->io_min < wc->block_size) 141248debafeSMikulas Patocka limits->io_min = wc->block_size; 141348debafeSMikulas Patocka } 141448debafeSMikulas Patocka 141548debafeSMikulas Patocka 141648debafeSMikulas Patocka static void writecache_writeback_endio(struct bio *bio) 141748debafeSMikulas Patocka { 141848debafeSMikulas Patocka struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio); 141948debafeSMikulas Patocka struct dm_writecache *wc = wb->wc; 142048debafeSMikulas Patocka unsigned long flags; 142148debafeSMikulas Patocka 142248debafeSMikulas Patocka raw_spin_lock_irqsave(&wc->endio_list_lock, flags); 142348debafeSMikulas Patocka if (unlikely(list_empty(&wc->endio_list))) 142448debafeSMikulas Patocka wake_up_process(wc->endio_thread); 142548debafeSMikulas Patocka list_add_tail(&wb->endio_entry, &wc->endio_list); 142648debafeSMikulas Patocka raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags); 142748debafeSMikulas Patocka } 142848debafeSMikulas Patocka 142948debafeSMikulas Patocka static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr) 143048debafeSMikulas Patocka { 143148debafeSMikulas Patocka struct copy_struct *c = ptr; 143248debafeSMikulas Patocka struct dm_writecache *wc = c->wc; 143348debafeSMikulas Patocka 143448debafeSMikulas Patocka c->error = likely(!(read_err | write_err)) ? 0 : -EIO; 143548debafeSMikulas Patocka 143648debafeSMikulas Patocka raw_spin_lock_irq(&wc->endio_list_lock); 143748debafeSMikulas Patocka if (unlikely(list_empty(&wc->endio_list))) 143848debafeSMikulas Patocka wake_up_process(wc->endio_thread); 143948debafeSMikulas Patocka list_add_tail(&c->endio_entry, &wc->endio_list); 144048debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 144148debafeSMikulas Patocka } 144248debafeSMikulas Patocka 144348debafeSMikulas Patocka static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list) 144448debafeSMikulas Patocka { 144548debafeSMikulas Patocka unsigned i; 144648debafeSMikulas Patocka struct writeback_struct *wb; 144748debafeSMikulas Patocka struct wc_entry *e; 144848debafeSMikulas Patocka unsigned long n_walked = 0; 144948debafeSMikulas Patocka 145048debafeSMikulas Patocka do { 145148debafeSMikulas Patocka wb = list_entry(list->next, struct writeback_struct, endio_entry); 145248debafeSMikulas Patocka list_del(&wb->endio_entry); 145348debafeSMikulas Patocka 145448debafeSMikulas Patocka if (unlikely(wb->bio.bi_status != BLK_STS_OK)) 145548debafeSMikulas Patocka writecache_error(wc, blk_status_to_errno(wb->bio.bi_status), 145648debafeSMikulas Patocka "write error %d", wb->bio.bi_status); 145748debafeSMikulas Patocka i = 0; 145848debafeSMikulas Patocka do { 145948debafeSMikulas Patocka e = wb->wc_list[i]; 146048debafeSMikulas Patocka BUG_ON(!e->write_in_progress); 146148debafeSMikulas Patocka e->write_in_progress = false; 146248debafeSMikulas Patocka INIT_LIST_HEAD(&e->lru); 146348debafeSMikulas Patocka if (!writecache_has_error(wc)) 146448debafeSMikulas Patocka writecache_free_entry(wc, e); 146548debafeSMikulas Patocka BUG_ON(!wc->writeback_size); 146648debafeSMikulas Patocka wc->writeback_size--; 146748debafeSMikulas Patocka n_walked++; 146848debafeSMikulas Patocka if (unlikely(n_walked >= ENDIO_LATENCY)) { 1469aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 147048debafeSMikulas Patocka wc_unlock(wc); 147148debafeSMikulas Patocka wc_lock(wc); 147248debafeSMikulas Patocka n_walked = 0; 147348debafeSMikulas Patocka } 147448debafeSMikulas Patocka } while (++i < wb->wc_list_n); 147548debafeSMikulas Patocka 147648debafeSMikulas Patocka if (wb->wc_list != wb->wc_list_inline) 147748debafeSMikulas Patocka kfree(wb->wc_list); 147848debafeSMikulas Patocka bio_put(&wb->bio); 147948debafeSMikulas Patocka } while (!list_empty(list)); 148048debafeSMikulas Patocka } 148148debafeSMikulas Patocka 148248debafeSMikulas Patocka static void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list) 148348debafeSMikulas Patocka { 148448debafeSMikulas Patocka struct copy_struct *c; 148548debafeSMikulas Patocka struct wc_entry *e; 148648debafeSMikulas Patocka 148748debafeSMikulas Patocka do { 148848debafeSMikulas Patocka c = list_entry(list->next, struct copy_struct, endio_entry); 148948debafeSMikulas Patocka list_del(&c->endio_entry); 149048debafeSMikulas Patocka 149148debafeSMikulas Patocka if (unlikely(c->error)) 149248debafeSMikulas Patocka writecache_error(wc, c->error, "copy error"); 149348debafeSMikulas Patocka 149448debafeSMikulas Patocka e = c->e; 149548debafeSMikulas Patocka do { 149648debafeSMikulas Patocka BUG_ON(!e->write_in_progress); 149748debafeSMikulas Patocka e->write_in_progress = false; 149848debafeSMikulas Patocka INIT_LIST_HEAD(&e->lru); 149948debafeSMikulas Patocka if (!writecache_has_error(wc)) 150048debafeSMikulas Patocka writecache_free_entry(wc, e); 150148debafeSMikulas Patocka 150248debafeSMikulas Patocka BUG_ON(!wc->writeback_size); 150348debafeSMikulas Patocka wc->writeback_size--; 150448debafeSMikulas Patocka e++; 150548debafeSMikulas Patocka } while (--c->n_entries); 150648debafeSMikulas Patocka mempool_free(c, &wc->copy_pool); 150748debafeSMikulas Patocka } while (!list_empty(list)); 150848debafeSMikulas Patocka } 150948debafeSMikulas Patocka 151048debafeSMikulas Patocka static int writecache_endio_thread(void *data) 151148debafeSMikulas Patocka { 151248debafeSMikulas Patocka struct dm_writecache *wc = data; 151348debafeSMikulas Patocka 151448debafeSMikulas Patocka while (1) { 151548debafeSMikulas Patocka struct list_head list; 151648debafeSMikulas Patocka 151748debafeSMikulas Patocka raw_spin_lock_irq(&wc->endio_list_lock); 151848debafeSMikulas Patocka if (!list_empty(&wc->endio_list)) 151948debafeSMikulas Patocka goto pop_from_list; 152048debafeSMikulas Patocka set_current_state(TASK_INTERRUPTIBLE); 152148debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 152248debafeSMikulas Patocka 152348debafeSMikulas Patocka if (unlikely(kthread_should_stop())) { 152448debafeSMikulas Patocka set_current_state(TASK_RUNNING); 152548debafeSMikulas Patocka break; 152648debafeSMikulas Patocka } 152748debafeSMikulas Patocka 152848debafeSMikulas Patocka schedule(); 152948debafeSMikulas Patocka 153048debafeSMikulas Patocka continue; 153148debafeSMikulas Patocka 153248debafeSMikulas Patocka pop_from_list: 153348debafeSMikulas Patocka list = wc->endio_list; 153448debafeSMikulas Patocka list.next->prev = list.prev->next = &list; 153548debafeSMikulas Patocka INIT_LIST_HEAD(&wc->endio_list); 153648debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 153748debafeSMikulas Patocka 153848debafeSMikulas Patocka if (!WC_MODE_FUA(wc)) 153948debafeSMikulas Patocka writecache_disk_flush(wc, wc->dev); 154048debafeSMikulas Patocka 154148debafeSMikulas Patocka wc_lock(wc); 154248debafeSMikulas Patocka 154348debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 154448debafeSMikulas Patocka __writecache_endio_pmem(wc, &list); 154548debafeSMikulas Patocka } else { 154648debafeSMikulas Patocka __writecache_endio_ssd(wc, &list); 154748debafeSMikulas Patocka writecache_wait_for_ios(wc, READ); 154848debafeSMikulas Patocka } 154948debafeSMikulas Patocka 1550aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 155148debafeSMikulas Patocka 155248debafeSMikulas Patocka wc_unlock(wc); 155348debafeSMikulas Patocka } 155448debafeSMikulas Patocka 155548debafeSMikulas Patocka return 0; 155648debafeSMikulas Patocka } 155748debafeSMikulas Patocka 155848debafeSMikulas Patocka static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e, gfp_t gfp) 155948debafeSMikulas Patocka { 156048debafeSMikulas Patocka struct dm_writecache *wc = wb->wc; 156148debafeSMikulas Patocka unsigned block_size = wc->block_size; 156248debafeSMikulas Patocka void *address = memory_data(wc, e); 156348debafeSMikulas Patocka 156448debafeSMikulas Patocka persistent_memory_flush_cache(address, block_size); 156548debafeSMikulas Patocka return bio_add_page(&wb->bio, persistent_memory_page(address), 156648debafeSMikulas Patocka block_size, persistent_memory_page_offset(address)) != 0; 156748debafeSMikulas Patocka } 156848debafeSMikulas Patocka 156948debafeSMikulas Patocka struct writeback_list { 157048debafeSMikulas Patocka struct list_head list; 157148debafeSMikulas Patocka size_t size; 157248debafeSMikulas Patocka }; 157348debafeSMikulas Patocka 157448debafeSMikulas Patocka static void __writeback_throttle(struct dm_writecache *wc, struct writeback_list *wbl) 157548debafeSMikulas Patocka { 157648debafeSMikulas Patocka if (unlikely(wc->max_writeback_jobs)) { 157748debafeSMikulas Patocka if (READ_ONCE(wc->writeback_size) - wbl->size >= wc->max_writeback_jobs) { 157848debafeSMikulas Patocka wc_lock(wc); 157948debafeSMikulas Patocka while (wc->writeback_size - wbl->size >= wc->max_writeback_jobs) 158048debafeSMikulas Patocka writecache_wait_on_freelist(wc); 158148debafeSMikulas Patocka wc_unlock(wc); 158248debafeSMikulas Patocka } 158348debafeSMikulas Patocka } 158448debafeSMikulas Patocka cond_resched(); 158548debafeSMikulas Patocka } 158648debafeSMikulas Patocka 158748debafeSMikulas Patocka static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl) 158848debafeSMikulas Patocka { 158948debafeSMikulas Patocka struct wc_entry *e, *f; 159048debafeSMikulas Patocka struct bio *bio; 159148debafeSMikulas Patocka struct writeback_struct *wb; 159248debafeSMikulas Patocka unsigned max_pages; 159348debafeSMikulas Patocka 159448debafeSMikulas Patocka while (wbl->size) { 159548debafeSMikulas Patocka wbl->size--; 159648debafeSMikulas Patocka e = container_of(wbl->list.prev, struct wc_entry, lru); 159748debafeSMikulas Patocka list_del(&e->lru); 159848debafeSMikulas Patocka 159948debafeSMikulas Patocka max_pages = e->wc_list_contiguous; 160048debafeSMikulas Patocka 160148debafeSMikulas Patocka bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set); 160248debafeSMikulas Patocka wb = container_of(bio, struct writeback_struct, bio); 160348debafeSMikulas Patocka wb->wc = wc; 160409f2d656SHuaisheng Ye bio->bi_end_io = writecache_writeback_endio; 160509f2d656SHuaisheng Ye bio_set_dev(bio, wc->dev->bdev); 160609f2d656SHuaisheng Ye bio->bi_iter.bi_sector = read_original_sector(wc, e); 160748debafeSMikulas Patocka if (max_pages <= WB_LIST_INLINE || 160850a7d3baSKees Cook unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), 160948debafeSMikulas Patocka GFP_NOIO | __GFP_NORETRY | 161048debafeSMikulas Patocka __GFP_NOMEMALLOC | __GFP_NOWARN)))) { 161148debafeSMikulas Patocka wb->wc_list = wb->wc_list_inline; 161248debafeSMikulas Patocka max_pages = WB_LIST_INLINE; 161348debafeSMikulas Patocka } 161448debafeSMikulas Patocka 161548debafeSMikulas Patocka BUG_ON(!wc_add_block(wb, e, GFP_NOIO)); 161648debafeSMikulas Patocka 161748debafeSMikulas Patocka wb->wc_list[0] = e; 161848debafeSMikulas Patocka wb->wc_list_n = 1; 161948debafeSMikulas Patocka 162048debafeSMikulas Patocka while (wbl->size && wb->wc_list_n < max_pages) { 162148debafeSMikulas Patocka f = container_of(wbl->list.prev, struct wc_entry, lru); 162248debafeSMikulas Patocka if (read_original_sector(wc, f) != 162348debafeSMikulas Patocka read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) 162448debafeSMikulas Patocka break; 162548debafeSMikulas Patocka if (!wc_add_block(wb, f, GFP_NOWAIT | __GFP_NOWARN)) 162648debafeSMikulas Patocka break; 162748debafeSMikulas Patocka wbl->size--; 162848debafeSMikulas Patocka list_del(&f->lru); 162948debafeSMikulas Patocka wb->wc_list[wb->wc_list_n++] = f; 163048debafeSMikulas Patocka e = f; 163148debafeSMikulas Patocka } 163209f2d656SHuaisheng Ye bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA); 163348debafeSMikulas Patocka if (writecache_has_error(wc)) { 163448debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 163509f2d656SHuaisheng Ye bio_endio(bio); 163648debafeSMikulas Patocka } else { 163709f2d656SHuaisheng Ye submit_bio(bio); 163848debafeSMikulas Patocka } 163948debafeSMikulas Patocka 164048debafeSMikulas Patocka __writeback_throttle(wc, wbl); 164148debafeSMikulas Patocka } 164248debafeSMikulas Patocka } 164348debafeSMikulas Patocka 164448debafeSMikulas Patocka static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl) 164548debafeSMikulas Patocka { 164648debafeSMikulas Patocka struct wc_entry *e, *f; 164748debafeSMikulas Patocka struct dm_io_region from, to; 164848debafeSMikulas Patocka struct copy_struct *c; 164948debafeSMikulas Patocka 165048debafeSMikulas Patocka while (wbl->size) { 165148debafeSMikulas Patocka unsigned n_sectors; 165248debafeSMikulas Patocka 165348debafeSMikulas Patocka wbl->size--; 165448debafeSMikulas Patocka e = container_of(wbl->list.prev, struct wc_entry, lru); 165548debafeSMikulas Patocka list_del(&e->lru); 165648debafeSMikulas Patocka 165748debafeSMikulas Patocka n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT); 165848debafeSMikulas Patocka 165948debafeSMikulas Patocka from.bdev = wc->ssd_dev->bdev; 166048debafeSMikulas Patocka from.sector = cache_sector(wc, e); 166148debafeSMikulas Patocka from.count = n_sectors; 166248debafeSMikulas Patocka to.bdev = wc->dev->bdev; 166348debafeSMikulas Patocka to.sector = read_original_sector(wc, e); 166448debafeSMikulas Patocka to.count = n_sectors; 166548debafeSMikulas Patocka 166648debafeSMikulas Patocka c = mempool_alloc(&wc->copy_pool, GFP_NOIO); 166748debafeSMikulas Patocka c->wc = wc; 166848debafeSMikulas Patocka c->e = e; 166948debafeSMikulas Patocka c->n_entries = e->wc_list_contiguous; 167048debafeSMikulas Patocka 167148debafeSMikulas Patocka while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) { 167248debafeSMikulas Patocka wbl->size--; 167348debafeSMikulas Patocka f = container_of(wbl->list.prev, struct wc_entry, lru); 167448debafeSMikulas Patocka BUG_ON(f != e + 1); 167548debafeSMikulas Patocka list_del(&f->lru); 167648debafeSMikulas Patocka e = f; 167748debafeSMikulas Patocka } 167848debafeSMikulas Patocka 167948debafeSMikulas Patocka dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c); 168048debafeSMikulas Patocka 168148debafeSMikulas Patocka __writeback_throttle(wc, wbl); 168248debafeSMikulas Patocka } 168348debafeSMikulas Patocka } 168448debafeSMikulas Patocka 168548debafeSMikulas Patocka static void writecache_writeback(struct work_struct *work) 168648debafeSMikulas Patocka { 168748debafeSMikulas Patocka struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work); 168848debafeSMikulas Patocka struct blk_plug plug; 16898dd85873SMikulas Patocka struct wc_entry *f, *uninitialized_var(g), *e = NULL; 169048debafeSMikulas Patocka struct rb_node *node, *next_node; 169148debafeSMikulas Patocka struct list_head skipped; 169248debafeSMikulas Patocka struct writeback_list wbl; 169348debafeSMikulas Patocka unsigned long n_walked; 169448debafeSMikulas Patocka 169548debafeSMikulas Patocka wc_lock(wc); 169648debafeSMikulas Patocka restart: 169748debafeSMikulas Patocka if (writecache_has_error(wc)) { 169848debafeSMikulas Patocka wc_unlock(wc); 169948debafeSMikulas Patocka return; 170048debafeSMikulas Patocka } 170148debafeSMikulas Patocka 170248debafeSMikulas Patocka if (unlikely(wc->writeback_all)) { 170348debafeSMikulas Patocka if (writecache_wait_for_writeback(wc)) 170448debafeSMikulas Patocka goto restart; 170548debafeSMikulas Patocka } 170648debafeSMikulas Patocka 170748debafeSMikulas Patocka if (wc->overwrote_committed) { 170848debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 170948debafeSMikulas Patocka } 171048debafeSMikulas Patocka 171148debafeSMikulas Patocka n_walked = 0; 171248debafeSMikulas Patocka INIT_LIST_HEAD(&skipped); 171348debafeSMikulas Patocka INIT_LIST_HEAD(&wbl.list); 171448debafeSMikulas Patocka wbl.size = 0; 171548debafeSMikulas Patocka while (!list_empty(&wc->lru) && 171648debafeSMikulas Patocka (wc->writeback_all || 17173923d485SMikulas Patocka wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark || 17183923d485SMikulas Patocka (jiffies - container_of(wc->lru.prev, struct wc_entry, lru)->age >= 17193923d485SMikulas Patocka wc->max_age - wc->max_age / MAX_AGE_DIV))) { 172048debafeSMikulas Patocka 172148debafeSMikulas Patocka n_walked++; 172248debafeSMikulas Patocka if (unlikely(n_walked > WRITEBACK_LATENCY) && 172348debafeSMikulas Patocka likely(!wc->writeback_all) && likely(!dm_suspended(wc->ti))) { 172448debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 172548debafeSMikulas Patocka break; 172648debafeSMikulas Patocka } 172748debafeSMikulas Patocka 17285229b489SHuaisheng Ye if (unlikely(wc->writeback_all)) { 17295229b489SHuaisheng Ye if (unlikely(!e)) { 17305229b489SHuaisheng Ye writecache_flush(wc); 17315229b489SHuaisheng Ye e = container_of(rb_first(&wc->tree), struct wc_entry, rb_node); 17325229b489SHuaisheng Ye } else 17335229b489SHuaisheng Ye e = g; 17345229b489SHuaisheng Ye } else 173548debafeSMikulas Patocka e = container_of(wc->lru.prev, struct wc_entry, lru); 173648debafeSMikulas Patocka BUG_ON(e->write_in_progress); 173748debafeSMikulas Patocka if (unlikely(!writecache_entry_is_committed(wc, e))) { 173848debafeSMikulas Patocka writecache_flush(wc); 173948debafeSMikulas Patocka } 174048debafeSMikulas Patocka node = rb_prev(&e->rb_node); 174148debafeSMikulas Patocka if (node) { 174248debafeSMikulas Patocka f = container_of(node, struct wc_entry, rb_node); 174348debafeSMikulas Patocka if (unlikely(read_original_sector(wc, f) == 174448debafeSMikulas Patocka read_original_sector(wc, e))) { 174548debafeSMikulas Patocka BUG_ON(!f->write_in_progress); 174648debafeSMikulas Patocka list_del(&e->lru); 174748debafeSMikulas Patocka list_add(&e->lru, &skipped); 174848debafeSMikulas Patocka cond_resched(); 174948debafeSMikulas Patocka continue; 175048debafeSMikulas Patocka } 175148debafeSMikulas Patocka } 175248debafeSMikulas Patocka wc->writeback_size++; 175348debafeSMikulas Patocka list_del(&e->lru); 175448debafeSMikulas Patocka list_add(&e->lru, &wbl.list); 175548debafeSMikulas Patocka wbl.size++; 175648debafeSMikulas Patocka e->write_in_progress = true; 175748debafeSMikulas Patocka e->wc_list_contiguous = 1; 175848debafeSMikulas Patocka 175948debafeSMikulas Patocka f = e; 176048debafeSMikulas Patocka 176148debafeSMikulas Patocka while (1) { 176248debafeSMikulas Patocka next_node = rb_next(&f->rb_node); 176348debafeSMikulas Patocka if (unlikely(!next_node)) 176448debafeSMikulas Patocka break; 176548debafeSMikulas Patocka g = container_of(next_node, struct wc_entry, rb_node); 176662421b38SHuaisheng Ye if (unlikely(read_original_sector(wc, g) == 176762421b38SHuaisheng Ye read_original_sector(wc, f))) { 176848debafeSMikulas Patocka f = g; 176948debafeSMikulas Patocka continue; 177048debafeSMikulas Patocka } 177148debafeSMikulas Patocka if (read_original_sector(wc, g) != 177248debafeSMikulas Patocka read_original_sector(wc, f) + (wc->block_size >> SECTOR_SHIFT)) 177348debafeSMikulas Patocka break; 177448debafeSMikulas Patocka if (unlikely(g->write_in_progress)) 177548debafeSMikulas Patocka break; 177648debafeSMikulas Patocka if (unlikely(!writecache_entry_is_committed(wc, g))) 177748debafeSMikulas Patocka break; 177848debafeSMikulas Patocka 177948debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) { 178048debafeSMikulas Patocka if (g != f + 1) 178148debafeSMikulas Patocka break; 178248debafeSMikulas Patocka } 178348debafeSMikulas Patocka 178448debafeSMikulas Patocka n_walked++; 178548debafeSMikulas Patocka //if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all)) 178648debafeSMikulas Patocka // break; 178748debafeSMikulas Patocka 178848debafeSMikulas Patocka wc->writeback_size++; 178948debafeSMikulas Patocka list_del(&g->lru); 179048debafeSMikulas Patocka list_add(&g->lru, &wbl.list); 179148debafeSMikulas Patocka wbl.size++; 179248debafeSMikulas Patocka g->write_in_progress = true; 179348debafeSMikulas Patocka g->wc_list_contiguous = BIO_MAX_PAGES; 179448debafeSMikulas Patocka f = g; 179548debafeSMikulas Patocka e->wc_list_contiguous++; 17965229b489SHuaisheng Ye if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) { 17975229b489SHuaisheng Ye if (unlikely(wc->writeback_all)) { 17985229b489SHuaisheng Ye next_node = rb_next(&f->rb_node); 17995229b489SHuaisheng Ye if (likely(next_node)) 18005229b489SHuaisheng Ye g = container_of(next_node, struct wc_entry, rb_node); 18015229b489SHuaisheng Ye } 180248debafeSMikulas Patocka break; 180348debafeSMikulas Patocka } 18045229b489SHuaisheng Ye } 180548debafeSMikulas Patocka cond_resched(); 180648debafeSMikulas Patocka } 180748debafeSMikulas Patocka 180848debafeSMikulas Patocka if (!list_empty(&skipped)) { 180948debafeSMikulas Patocka list_splice_tail(&skipped, &wc->lru); 181048debafeSMikulas Patocka /* 181148debafeSMikulas Patocka * If we didn't do any progress, we must wait until some 181248debafeSMikulas Patocka * writeback finishes to avoid burning CPU in a loop 181348debafeSMikulas Patocka */ 181448debafeSMikulas Patocka if (unlikely(!wbl.size)) 181548debafeSMikulas Patocka writecache_wait_for_writeback(wc); 181648debafeSMikulas Patocka } 181748debafeSMikulas Patocka 181848debafeSMikulas Patocka wc_unlock(wc); 181948debafeSMikulas Patocka 182048debafeSMikulas Patocka blk_start_plug(&plug); 182148debafeSMikulas Patocka 182248debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 182348debafeSMikulas Patocka __writecache_writeback_pmem(wc, &wbl); 182448debafeSMikulas Patocka else 182548debafeSMikulas Patocka __writecache_writeback_ssd(wc, &wbl); 182648debafeSMikulas Patocka 182748debafeSMikulas Patocka blk_finish_plug(&plug); 182848debafeSMikulas Patocka 182948debafeSMikulas Patocka if (unlikely(wc->writeback_all)) { 183048debafeSMikulas Patocka wc_lock(wc); 183148debafeSMikulas Patocka while (writecache_wait_for_writeback(wc)); 183248debafeSMikulas Patocka wc_unlock(wc); 183348debafeSMikulas Patocka } 183448debafeSMikulas Patocka } 183548debafeSMikulas Patocka 183648debafeSMikulas Patocka static int calculate_memory_size(uint64_t device_size, unsigned block_size, 183748debafeSMikulas Patocka size_t *n_blocks_p, size_t *n_metadata_blocks_p) 183848debafeSMikulas Patocka { 183948debafeSMikulas Patocka uint64_t n_blocks, offset; 184048debafeSMikulas Patocka struct wc_entry e; 184148debafeSMikulas Patocka 184248debafeSMikulas Patocka n_blocks = device_size; 184348debafeSMikulas Patocka do_div(n_blocks, block_size + sizeof(struct wc_memory_entry)); 184448debafeSMikulas Patocka 184548debafeSMikulas Patocka while (1) { 184648debafeSMikulas Patocka if (!n_blocks) 184748debafeSMikulas Patocka return -ENOSPC; 184848debafeSMikulas Patocka /* Verify the following entries[n_blocks] won't overflow */ 184948debafeSMikulas Patocka if (n_blocks >= ((size_t)-sizeof(struct wc_memory_superblock) / 185048debafeSMikulas Patocka sizeof(struct wc_memory_entry))) 185148debafeSMikulas Patocka return -EFBIG; 185248debafeSMikulas Patocka offset = offsetof(struct wc_memory_superblock, entries[n_blocks]); 185348debafeSMikulas Patocka offset = (offset + block_size - 1) & ~(uint64_t)(block_size - 1); 185448debafeSMikulas Patocka if (offset + n_blocks * block_size <= device_size) 185548debafeSMikulas Patocka break; 185648debafeSMikulas Patocka n_blocks--; 185748debafeSMikulas Patocka } 185848debafeSMikulas Patocka 185948debafeSMikulas Patocka /* check if the bit field overflows */ 186048debafeSMikulas Patocka e.index = n_blocks; 186148debafeSMikulas Patocka if (e.index != n_blocks) 186248debafeSMikulas Patocka return -EFBIG; 186348debafeSMikulas Patocka 186448debafeSMikulas Patocka if (n_blocks_p) 186548debafeSMikulas Patocka *n_blocks_p = n_blocks; 186648debafeSMikulas Patocka if (n_metadata_blocks_p) 186748debafeSMikulas Patocka *n_metadata_blocks_p = offset >> __ffs(block_size); 186848debafeSMikulas Patocka return 0; 186948debafeSMikulas Patocka } 187048debafeSMikulas Patocka 187148debafeSMikulas Patocka static int init_memory(struct dm_writecache *wc) 187248debafeSMikulas Patocka { 187348debafeSMikulas Patocka size_t b; 187448debafeSMikulas Patocka int r; 187548debafeSMikulas Patocka 187648debafeSMikulas Patocka r = calculate_memory_size(wc->memory_map_size, wc->block_size, &wc->n_blocks, NULL); 187748debafeSMikulas Patocka if (r) 187848debafeSMikulas Patocka return r; 187948debafeSMikulas Patocka 188048debafeSMikulas Patocka r = writecache_alloc_entries(wc); 188148debafeSMikulas Patocka if (r) 188248debafeSMikulas Patocka return r; 188348debafeSMikulas Patocka 188448debafeSMikulas Patocka for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++) 188548debafeSMikulas Patocka pmem_assign(sb(wc)->padding[b], cpu_to_le64(0)); 188648debafeSMikulas Patocka pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION)); 188748debafeSMikulas Patocka pmem_assign(sb(wc)->block_size, cpu_to_le32(wc->block_size)); 188848debafeSMikulas Patocka pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks)); 188948debafeSMikulas Patocka pmem_assign(sb(wc)->seq_count, cpu_to_le64(0)); 189048debafeSMikulas Patocka 1891*1edaa447SMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 189248debafeSMikulas Patocka write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); 1893*1edaa447SMikulas Patocka cond_resched(); 1894*1edaa447SMikulas Patocka } 189548debafeSMikulas Patocka 189648debafeSMikulas Patocka writecache_flush_all_metadata(wc); 1897aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 189848debafeSMikulas Patocka pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); 189948debafeSMikulas Patocka writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); 1900aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 190148debafeSMikulas Patocka 190248debafeSMikulas Patocka return 0; 190348debafeSMikulas Patocka } 190448debafeSMikulas Patocka 190548debafeSMikulas Patocka static void writecache_dtr(struct dm_target *ti) 190648debafeSMikulas Patocka { 190748debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 190848debafeSMikulas Patocka 190948debafeSMikulas Patocka if (!wc) 191048debafeSMikulas Patocka return; 191148debafeSMikulas Patocka 191248debafeSMikulas Patocka if (wc->endio_thread) 191348debafeSMikulas Patocka kthread_stop(wc->endio_thread); 191448debafeSMikulas Patocka 191548debafeSMikulas Patocka if (wc->flush_thread) 191648debafeSMikulas Patocka kthread_stop(wc->flush_thread); 191748debafeSMikulas Patocka 191848debafeSMikulas Patocka bioset_exit(&wc->bio_set); 191948debafeSMikulas Patocka 192048debafeSMikulas Patocka mempool_exit(&wc->copy_pool); 192148debafeSMikulas Patocka 192248debafeSMikulas Patocka if (wc->writeback_wq) 192348debafeSMikulas Patocka destroy_workqueue(wc->writeback_wq); 192448debafeSMikulas Patocka 192548debafeSMikulas Patocka if (wc->dev) 192648debafeSMikulas Patocka dm_put_device(ti, wc->dev); 192748debafeSMikulas Patocka 192848debafeSMikulas Patocka if (wc->ssd_dev) 192948debafeSMikulas Patocka dm_put_device(ti, wc->ssd_dev); 193048debafeSMikulas Patocka 193148debafeSMikulas Patocka if (wc->entries) 193248debafeSMikulas Patocka vfree(wc->entries); 193348debafeSMikulas Patocka 193448debafeSMikulas Patocka if (wc->memory_map) { 193548debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 193648debafeSMikulas Patocka persistent_memory_release(wc); 193748debafeSMikulas Patocka else 193848debafeSMikulas Patocka vfree(wc->memory_map); 193948debafeSMikulas Patocka } 194048debafeSMikulas Patocka 194148debafeSMikulas Patocka if (wc->dm_kcopyd) 194248debafeSMikulas Patocka dm_kcopyd_client_destroy(wc->dm_kcopyd); 194348debafeSMikulas Patocka 194448debafeSMikulas Patocka if (wc->dm_io) 194548debafeSMikulas Patocka dm_io_client_destroy(wc->dm_io); 194648debafeSMikulas Patocka 194748debafeSMikulas Patocka if (wc->dirty_bitmap) 194848debafeSMikulas Patocka vfree(wc->dirty_bitmap); 194948debafeSMikulas Patocka 195048debafeSMikulas Patocka kfree(wc); 195148debafeSMikulas Patocka } 195248debafeSMikulas Patocka 195348debafeSMikulas Patocka static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) 195448debafeSMikulas Patocka { 195548debafeSMikulas Patocka struct dm_writecache *wc; 195648debafeSMikulas Patocka struct dm_arg_set as; 195748debafeSMikulas Patocka const char *string; 195848debafeSMikulas Patocka unsigned opt_params; 195948debafeSMikulas Patocka size_t offset, data_size; 196048debafeSMikulas Patocka int i, r; 196148debafeSMikulas Patocka char dummy; 196248debafeSMikulas Patocka int high_wm_percent = HIGH_WATERMARK; 196348debafeSMikulas Patocka int low_wm_percent = LOW_WATERMARK; 196448debafeSMikulas Patocka uint64_t x; 196548debafeSMikulas Patocka struct wc_memory_superblock s; 196648debafeSMikulas Patocka 196748debafeSMikulas Patocka static struct dm_arg _args[] = { 196848debafeSMikulas Patocka {0, 10, "Invalid number of feature args"}, 196948debafeSMikulas Patocka }; 197048debafeSMikulas Patocka 197148debafeSMikulas Patocka as.argc = argc; 197248debafeSMikulas Patocka as.argv = argv; 197348debafeSMikulas Patocka 197448debafeSMikulas Patocka wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL); 197548debafeSMikulas Patocka if (!wc) { 197648debafeSMikulas Patocka ti->error = "Cannot allocate writecache structure"; 197748debafeSMikulas Patocka r = -ENOMEM; 197848debafeSMikulas Patocka goto bad; 197948debafeSMikulas Patocka } 198048debafeSMikulas Patocka ti->private = wc; 198148debafeSMikulas Patocka wc->ti = ti; 198248debafeSMikulas Patocka 198348debafeSMikulas Patocka mutex_init(&wc->lock); 19843923d485SMikulas Patocka wc->max_age = MAX_AGE_UNSPECIFIED; 198548debafeSMikulas Patocka writecache_poison_lists(wc); 198648debafeSMikulas Patocka init_waitqueue_head(&wc->freelist_wait); 198748debafeSMikulas Patocka timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0); 19883923d485SMikulas Patocka timer_setup(&wc->max_age_timer, writecache_max_age_timer, 0); 198948debafeSMikulas Patocka 199048debafeSMikulas Patocka for (i = 0; i < 2; i++) { 199148debafeSMikulas Patocka atomic_set(&wc->bio_in_progress[i], 0); 199248debafeSMikulas Patocka init_waitqueue_head(&wc->bio_in_progress_wait[i]); 199348debafeSMikulas Patocka } 199448debafeSMikulas Patocka 199548debafeSMikulas Patocka wc->dm_io = dm_io_client_create(); 199648debafeSMikulas Patocka if (IS_ERR(wc->dm_io)) { 199748debafeSMikulas Patocka r = PTR_ERR(wc->dm_io); 199848debafeSMikulas Patocka ti->error = "Unable to allocate dm-io client"; 199948debafeSMikulas Patocka wc->dm_io = NULL; 200048debafeSMikulas Patocka goto bad; 200148debafeSMikulas Patocka } 200248debafeSMikulas Patocka 2003f87e033bSHuaisheng Ye wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1); 200448debafeSMikulas Patocka if (!wc->writeback_wq) { 200548debafeSMikulas Patocka r = -ENOMEM; 200648debafeSMikulas Patocka ti->error = "Could not allocate writeback workqueue"; 200748debafeSMikulas Patocka goto bad; 200848debafeSMikulas Patocka } 200948debafeSMikulas Patocka INIT_WORK(&wc->writeback_work, writecache_writeback); 201048debafeSMikulas Patocka INIT_WORK(&wc->flush_work, writecache_flush_work); 201148debafeSMikulas Patocka 201248debafeSMikulas Patocka raw_spin_lock_init(&wc->endio_list_lock); 201348debafeSMikulas Patocka INIT_LIST_HEAD(&wc->endio_list); 201448debafeSMikulas Patocka wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio"); 201548debafeSMikulas Patocka if (IS_ERR(wc->endio_thread)) { 201648debafeSMikulas Patocka r = PTR_ERR(wc->endio_thread); 201748debafeSMikulas Patocka wc->endio_thread = NULL; 201848debafeSMikulas Patocka ti->error = "Couldn't spawn endio thread"; 201948debafeSMikulas Patocka goto bad; 202048debafeSMikulas Patocka } 202148debafeSMikulas Patocka wake_up_process(wc->endio_thread); 202248debafeSMikulas Patocka 202348debafeSMikulas Patocka /* 202448debafeSMikulas Patocka * Parse the mode (pmem or ssd) 202548debafeSMikulas Patocka */ 202648debafeSMikulas Patocka string = dm_shift_arg(&as); 202748debafeSMikulas Patocka if (!string) 202848debafeSMikulas Patocka goto bad_arguments; 202948debafeSMikulas Patocka 203048debafeSMikulas Patocka if (!strcasecmp(string, "s")) { 203148debafeSMikulas Patocka wc->pmem_mode = false; 203248debafeSMikulas Patocka } else if (!strcasecmp(string, "p")) { 203348debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 203448debafeSMikulas Patocka wc->pmem_mode = true; 203548debafeSMikulas Patocka wc->writeback_fua = true; 203648debafeSMikulas Patocka #else 203748debafeSMikulas Patocka /* 203848debafeSMikulas Patocka * If the architecture doesn't support persistent memory or 203948debafeSMikulas Patocka * the kernel doesn't support any DAX drivers, this driver can 204048debafeSMikulas Patocka * only be used in SSD-only mode. 204148debafeSMikulas Patocka */ 204248debafeSMikulas Patocka r = -EOPNOTSUPP; 204348debafeSMikulas Patocka ti->error = "Persistent memory or DAX not supported on this system"; 204448debafeSMikulas Patocka goto bad; 204548debafeSMikulas Patocka #endif 204648debafeSMikulas Patocka } else { 204748debafeSMikulas Patocka goto bad_arguments; 204848debafeSMikulas Patocka } 204948debafeSMikulas Patocka 205048debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 205148debafeSMikulas Patocka r = bioset_init(&wc->bio_set, BIO_POOL_SIZE, 205248debafeSMikulas Patocka offsetof(struct writeback_struct, bio), 205348debafeSMikulas Patocka BIOSET_NEED_BVECS); 205448debafeSMikulas Patocka if (r) { 205548debafeSMikulas Patocka ti->error = "Could not allocate bio set"; 205648debafeSMikulas Patocka goto bad; 205748debafeSMikulas Patocka } 205848debafeSMikulas Patocka } else { 205948debafeSMikulas Patocka r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct)); 206048debafeSMikulas Patocka if (r) { 206148debafeSMikulas Patocka ti->error = "Could not allocate mempool"; 206248debafeSMikulas Patocka goto bad; 206348debafeSMikulas Patocka } 206448debafeSMikulas Patocka } 206548debafeSMikulas Patocka 206648debafeSMikulas Patocka /* 206748debafeSMikulas Patocka * Parse the origin data device 206848debafeSMikulas Patocka */ 206948debafeSMikulas Patocka string = dm_shift_arg(&as); 207048debafeSMikulas Patocka if (!string) 207148debafeSMikulas Patocka goto bad_arguments; 207248debafeSMikulas Patocka r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev); 207348debafeSMikulas Patocka if (r) { 207448debafeSMikulas Patocka ti->error = "Origin data device lookup failed"; 207548debafeSMikulas Patocka goto bad; 207648debafeSMikulas Patocka } 207748debafeSMikulas Patocka 207848debafeSMikulas Patocka /* 207948debafeSMikulas Patocka * Parse cache data device (be it pmem or ssd) 208048debafeSMikulas Patocka */ 208148debafeSMikulas Patocka string = dm_shift_arg(&as); 208248debafeSMikulas Patocka if (!string) 208348debafeSMikulas Patocka goto bad_arguments; 208448debafeSMikulas Patocka 208548debafeSMikulas Patocka r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev); 208648debafeSMikulas Patocka if (r) { 208748debafeSMikulas Patocka ti->error = "Cache data device lookup failed"; 208848debafeSMikulas Patocka goto bad; 208948debafeSMikulas Patocka } 209048debafeSMikulas Patocka wc->memory_map_size = i_size_read(wc->ssd_dev->bdev->bd_inode); 209148debafeSMikulas Patocka 209248debafeSMikulas Patocka /* 209348debafeSMikulas Patocka * Parse the cache block size 209448debafeSMikulas Patocka */ 209548debafeSMikulas Patocka string = dm_shift_arg(&as); 209648debafeSMikulas Patocka if (!string) 209748debafeSMikulas Patocka goto bad_arguments; 209848debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 || 209948debafeSMikulas Patocka wc->block_size < 512 || wc->block_size > PAGE_SIZE || 210048debafeSMikulas Patocka (wc->block_size & (wc->block_size - 1))) { 210148debafeSMikulas Patocka r = -EINVAL; 210248debafeSMikulas Patocka ti->error = "Invalid block size"; 210348debafeSMikulas Patocka goto bad; 210448debafeSMikulas Patocka } 210548debafeSMikulas Patocka wc->block_size_bits = __ffs(wc->block_size); 210648debafeSMikulas Patocka 210748debafeSMikulas Patocka wc->max_writeback_jobs = MAX_WRITEBACK_JOBS; 210848debafeSMikulas Patocka wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM; 210948debafeSMikulas Patocka wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC); 211048debafeSMikulas Patocka 211148debafeSMikulas Patocka /* 211248debafeSMikulas Patocka * Parse optional arguments 211348debafeSMikulas Patocka */ 211448debafeSMikulas Patocka r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); 211548debafeSMikulas Patocka if (r) 211648debafeSMikulas Patocka goto bad; 211748debafeSMikulas Patocka 211848debafeSMikulas Patocka while (opt_params) { 211948debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2120d284f824SMikulas Patocka if (!strcasecmp(string, "start_sector") && opt_params >= 1) { 2121d284f824SMikulas Patocka unsigned long long start_sector; 2122d284f824SMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2123d284f824SMikulas Patocka if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1) 2124d284f824SMikulas Patocka goto invalid_optional; 2125d284f824SMikulas Patocka wc->start_sector = start_sector; 2126d284f824SMikulas Patocka if (wc->start_sector != start_sector || 2127d284f824SMikulas Patocka wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT) 2128d284f824SMikulas Patocka goto invalid_optional; 2129d284f824SMikulas Patocka } else if (!strcasecmp(string, "high_watermark") && opt_params >= 1) { 213048debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 213148debafeSMikulas Patocka if (sscanf(string, "%d%c", &high_wm_percent, &dummy) != 1) 213248debafeSMikulas Patocka goto invalid_optional; 213348debafeSMikulas Patocka if (high_wm_percent < 0 || high_wm_percent > 100) 213448debafeSMikulas Patocka goto invalid_optional; 213548debafeSMikulas Patocka wc->high_wm_percent_set = true; 213648debafeSMikulas Patocka } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) { 213748debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 213848debafeSMikulas Patocka if (sscanf(string, "%d%c", &low_wm_percent, &dummy) != 1) 213948debafeSMikulas Patocka goto invalid_optional; 214048debafeSMikulas Patocka if (low_wm_percent < 0 || low_wm_percent > 100) 214148debafeSMikulas Patocka goto invalid_optional; 214248debafeSMikulas Patocka wc->low_wm_percent_set = true; 214348debafeSMikulas Patocka } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) { 214448debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 214548debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1) 214648debafeSMikulas Patocka goto invalid_optional; 214748debafeSMikulas Patocka wc->max_writeback_jobs_set = true; 214848debafeSMikulas Patocka } else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) { 214948debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 215048debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1) 215148debafeSMikulas Patocka goto invalid_optional; 215248debafeSMikulas Patocka wc->autocommit_blocks_set = true; 215348debafeSMikulas Patocka } else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) { 215448debafeSMikulas Patocka unsigned autocommit_msecs; 215548debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 215648debafeSMikulas Patocka if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1) 215748debafeSMikulas Patocka goto invalid_optional; 215848debafeSMikulas Patocka if (autocommit_msecs > 3600000) 215948debafeSMikulas Patocka goto invalid_optional; 216048debafeSMikulas Patocka wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs); 216148debafeSMikulas Patocka wc->autocommit_time_set = true; 21623923d485SMikulas Patocka } else if (!strcasecmp(string, "max_age") && opt_params >= 1) { 21633923d485SMikulas Patocka unsigned max_age_msecs; 21643923d485SMikulas Patocka string = dm_shift_arg(&as), opt_params--; 21653923d485SMikulas Patocka if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1) 21663923d485SMikulas Patocka goto invalid_optional; 21673923d485SMikulas Patocka if (max_age_msecs > 86400000) 21683923d485SMikulas Patocka goto invalid_optional; 21693923d485SMikulas Patocka wc->max_age = msecs_to_jiffies(max_age_msecs); 217093de44ebSMikulas Patocka } else if (!strcasecmp(string, "cleaner")) { 217193de44ebSMikulas Patocka wc->cleaner = true; 217248debafeSMikulas Patocka } else if (!strcasecmp(string, "fua")) { 217348debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 217448debafeSMikulas Patocka wc->writeback_fua = true; 217548debafeSMikulas Patocka wc->writeback_fua_set = true; 217648debafeSMikulas Patocka } else goto invalid_optional; 217748debafeSMikulas Patocka } else if (!strcasecmp(string, "nofua")) { 217848debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 217948debafeSMikulas Patocka wc->writeback_fua = false; 218048debafeSMikulas Patocka wc->writeback_fua_set = true; 218148debafeSMikulas Patocka } else goto invalid_optional; 218248debafeSMikulas Patocka } else { 218348debafeSMikulas Patocka invalid_optional: 218448debafeSMikulas Patocka r = -EINVAL; 218548debafeSMikulas Patocka ti->error = "Invalid optional argument"; 218648debafeSMikulas Patocka goto bad; 218748debafeSMikulas Patocka } 218848debafeSMikulas Patocka } 218948debafeSMikulas Patocka 219048debafeSMikulas Patocka if (high_wm_percent < low_wm_percent) { 219148debafeSMikulas Patocka r = -EINVAL; 219248debafeSMikulas Patocka ti->error = "High watermark must be greater than or equal to low watermark"; 219348debafeSMikulas Patocka goto bad; 219448debafeSMikulas Patocka } 219548debafeSMikulas Patocka 2196d284f824SMikulas Patocka if (WC_MODE_PMEM(wc)) { 2197d284f824SMikulas Patocka r = persistent_memory_claim(wc); 2198d284f824SMikulas Patocka if (r) { 2199d284f824SMikulas Patocka ti->error = "Unable to map persistent memory for cache"; 2200d284f824SMikulas Patocka goto bad; 2201d284f824SMikulas Patocka } 2202d284f824SMikulas Patocka } else { 220348debafeSMikulas Patocka struct dm_io_region region; 220448debafeSMikulas Patocka struct dm_io_request req; 220548debafeSMikulas Patocka size_t n_blocks, n_metadata_blocks; 220648debafeSMikulas Patocka uint64_t n_bitmap_bits; 220748debafeSMikulas Patocka 2208d284f824SMikulas Patocka wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT; 2209d284f824SMikulas Patocka 221048debafeSMikulas Patocka bio_list_init(&wc->flush_list); 221148debafeSMikulas Patocka wc->flush_thread = kthread_create(writecache_flush_thread, wc, "dm_writecache_flush"); 221248debafeSMikulas Patocka if (IS_ERR(wc->flush_thread)) { 221348debafeSMikulas Patocka r = PTR_ERR(wc->flush_thread); 221448debafeSMikulas Patocka wc->flush_thread = NULL; 2215e8ea141aSShenghui Wang ti->error = "Couldn't spawn flush thread"; 221648debafeSMikulas Patocka goto bad; 221748debafeSMikulas Patocka } 221848debafeSMikulas Patocka wake_up_process(wc->flush_thread); 221948debafeSMikulas Patocka 222048debafeSMikulas Patocka r = calculate_memory_size(wc->memory_map_size, wc->block_size, 222148debafeSMikulas Patocka &n_blocks, &n_metadata_blocks); 222248debafeSMikulas Patocka if (r) { 222348debafeSMikulas Patocka ti->error = "Invalid device size"; 222448debafeSMikulas Patocka goto bad; 222548debafeSMikulas Patocka } 222648debafeSMikulas Patocka 222748debafeSMikulas Patocka n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) + 222848debafeSMikulas Patocka BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY; 222948debafeSMikulas Patocka /* this is limitation of test_bit functions */ 223048debafeSMikulas Patocka if (n_bitmap_bits > 1U << 31) { 223148debafeSMikulas Patocka r = -EFBIG; 223248debafeSMikulas Patocka ti->error = "Invalid device size"; 223348debafeSMikulas Patocka goto bad; 223448debafeSMikulas Patocka } 223548debafeSMikulas Patocka 223648debafeSMikulas Patocka wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits); 223748debafeSMikulas Patocka if (!wc->memory_map) { 223848debafeSMikulas Patocka r = -ENOMEM; 223948debafeSMikulas Patocka ti->error = "Unable to allocate memory for metadata"; 224048debafeSMikulas Patocka goto bad; 224148debafeSMikulas Patocka } 224248debafeSMikulas Patocka 224348debafeSMikulas Patocka wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle); 224448debafeSMikulas Patocka if (IS_ERR(wc->dm_kcopyd)) { 224548debafeSMikulas Patocka r = PTR_ERR(wc->dm_kcopyd); 224648debafeSMikulas Patocka ti->error = "Unable to allocate dm-kcopyd client"; 224748debafeSMikulas Patocka wc->dm_kcopyd = NULL; 224848debafeSMikulas Patocka goto bad; 224948debafeSMikulas Patocka } 225048debafeSMikulas Patocka 225148debafeSMikulas Patocka wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT); 225248debafeSMikulas Patocka wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) / 225348debafeSMikulas Patocka BITS_PER_LONG * sizeof(unsigned long); 225448debafeSMikulas Patocka wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size); 225548debafeSMikulas Patocka if (!wc->dirty_bitmap) { 225648debafeSMikulas Patocka r = -ENOMEM; 225748debafeSMikulas Patocka ti->error = "Unable to allocate dirty bitmap"; 225848debafeSMikulas Patocka goto bad; 225948debafeSMikulas Patocka } 226048debafeSMikulas Patocka 226148debafeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 2262d284f824SMikulas Patocka region.sector = wc->start_sector; 226348debafeSMikulas Patocka region.count = wc->metadata_sectors; 226448debafeSMikulas Patocka req.bi_op = REQ_OP_READ; 226548debafeSMikulas Patocka req.bi_op_flags = REQ_SYNC; 226648debafeSMikulas Patocka req.mem.type = DM_IO_VMA; 226748debafeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map; 226848debafeSMikulas Patocka req.client = wc->dm_io; 226948debafeSMikulas Patocka req.notify.fn = NULL; 227048debafeSMikulas Patocka 227148debafeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 227248debafeSMikulas Patocka if (r) { 227348debafeSMikulas Patocka ti->error = "Unable to read metadata"; 227448debafeSMikulas Patocka goto bad; 227548debafeSMikulas Patocka } 227648debafeSMikulas Patocka } 227748debafeSMikulas Patocka 227848debafeSMikulas Patocka r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); 227948debafeSMikulas Patocka if (r) { 228048debafeSMikulas Patocka ti->error = "Hardware memory error when reading superblock"; 228148debafeSMikulas Patocka goto bad; 228248debafeSMikulas Patocka } 228348debafeSMikulas Patocka if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) { 228448debafeSMikulas Patocka r = init_memory(wc); 228548debafeSMikulas Patocka if (r) { 228648debafeSMikulas Patocka ti->error = "Unable to initialize device"; 228748debafeSMikulas Patocka goto bad; 228848debafeSMikulas Patocka } 228948debafeSMikulas Patocka r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); 229048debafeSMikulas Patocka if (r) { 229148debafeSMikulas Patocka ti->error = "Hardware memory error when reading superblock"; 229248debafeSMikulas Patocka goto bad; 229348debafeSMikulas Patocka } 229448debafeSMikulas Patocka } 229548debafeSMikulas Patocka 229648debafeSMikulas Patocka if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) { 229748debafeSMikulas Patocka ti->error = "Invalid magic in the superblock"; 229848debafeSMikulas Patocka r = -EINVAL; 229948debafeSMikulas Patocka goto bad; 230048debafeSMikulas Patocka } 230148debafeSMikulas Patocka 230248debafeSMikulas Patocka if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) { 230348debafeSMikulas Patocka ti->error = "Invalid version in the superblock"; 230448debafeSMikulas Patocka r = -EINVAL; 230548debafeSMikulas Patocka goto bad; 230648debafeSMikulas Patocka } 230748debafeSMikulas Patocka 230848debafeSMikulas Patocka if (le32_to_cpu(s.block_size) != wc->block_size) { 230948debafeSMikulas Patocka ti->error = "Block size does not match superblock"; 231048debafeSMikulas Patocka r = -EINVAL; 231148debafeSMikulas Patocka goto bad; 231248debafeSMikulas Patocka } 231348debafeSMikulas Patocka 231448debafeSMikulas Patocka wc->n_blocks = le64_to_cpu(s.n_blocks); 231548debafeSMikulas Patocka 231648debafeSMikulas Patocka offset = wc->n_blocks * sizeof(struct wc_memory_entry); 231748debafeSMikulas Patocka if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) { 231848debafeSMikulas Patocka overflow: 231948debafeSMikulas Patocka ti->error = "Overflow in size calculation"; 232048debafeSMikulas Patocka r = -EINVAL; 232148debafeSMikulas Patocka goto bad; 232248debafeSMikulas Patocka } 232348debafeSMikulas Patocka offset += sizeof(struct wc_memory_superblock); 232448debafeSMikulas Patocka if (offset < sizeof(struct wc_memory_superblock)) 232548debafeSMikulas Patocka goto overflow; 232648debafeSMikulas Patocka offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1); 232748debafeSMikulas Patocka data_size = wc->n_blocks * (size_t)wc->block_size; 232848debafeSMikulas Patocka if (!offset || (data_size / wc->block_size != wc->n_blocks) || 232948debafeSMikulas Patocka (offset + data_size < offset)) 233048debafeSMikulas Patocka goto overflow; 233148debafeSMikulas Patocka if (offset + data_size > wc->memory_map_size) { 233248debafeSMikulas Patocka ti->error = "Memory area is too small"; 233348debafeSMikulas Patocka r = -EINVAL; 233448debafeSMikulas Patocka goto bad; 233548debafeSMikulas Patocka } 233648debafeSMikulas Patocka 233748debafeSMikulas Patocka wc->metadata_sectors = offset >> SECTOR_SHIFT; 233848debafeSMikulas Patocka wc->block_start = (char *)sb(wc) + offset; 233948debafeSMikulas Patocka 234048debafeSMikulas Patocka x = (uint64_t)wc->n_blocks * (100 - high_wm_percent); 234148debafeSMikulas Patocka x += 50; 234248debafeSMikulas Patocka do_div(x, 100); 234348debafeSMikulas Patocka wc->freelist_high_watermark = x; 234448debafeSMikulas Patocka x = (uint64_t)wc->n_blocks * (100 - low_wm_percent); 234548debafeSMikulas Patocka x += 50; 234648debafeSMikulas Patocka do_div(x, 100); 234748debafeSMikulas Patocka wc->freelist_low_watermark = x; 234848debafeSMikulas Patocka 234993de44ebSMikulas Patocka if (wc->cleaner) 235093de44ebSMikulas Patocka activate_cleaner(wc); 235193de44ebSMikulas Patocka 235248debafeSMikulas Patocka r = writecache_alloc_entries(wc); 235348debafeSMikulas Patocka if (r) { 235448debafeSMikulas Patocka ti->error = "Cannot allocate memory"; 235548debafeSMikulas Patocka goto bad; 235648debafeSMikulas Patocka } 235748debafeSMikulas Patocka 235848debafeSMikulas Patocka ti->num_flush_bios = 1; 235948debafeSMikulas Patocka ti->flush_supported = true; 236048debafeSMikulas Patocka ti->num_discard_bios = 1; 236148debafeSMikulas Patocka 236248debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 236348debafeSMikulas Patocka persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 236448debafeSMikulas Patocka 236548debafeSMikulas Patocka return 0; 236648debafeSMikulas Patocka 236748debafeSMikulas Patocka bad_arguments: 236848debafeSMikulas Patocka r = -EINVAL; 236948debafeSMikulas Patocka ti->error = "Bad arguments"; 237048debafeSMikulas Patocka bad: 237148debafeSMikulas Patocka writecache_dtr(ti); 237248debafeSMikulas Patocka return r; 237348debafeSMikulas Patocka } 237448debafeSMikulas Patocka 237548debafeSMikulas Patocka static void writecache_status(struct dm_target *ti, status_type_t type, 237648debafeSMikulas Patocka unsigned status_flags, char *result, unsigned maxlen) 237748debafeSMikulas Patocka { 237848debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 237948debafeSMikulas Patocka unsigned extra_args; 238048debafeSMikulas Patocka unsigned sz = 0; 238148debafeSMikulas Patocka uint64_t x; 238248debafeSMikulas Patocka 238348debafeSMikulas Patocka switch (type) { 238448debafeSMikulas Patocka case STATUSTYPE_INFO: 238548debafeSMikulas Patocka DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc), 238648debafeSMikulas Patocka (unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size, 238748debafeSMikulas Patocka (unsigned long long)wc->writeback_size); 238848debafeSMikulas Patocka break; 238948debafeSMikulas Patocka case STATUSTYPE_TABLE: 239048debafeSMikulas Patocka DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', 239148debafeSMikulas Patocka wc->dev->name, wc->ssd_dev->name, wc->block_size); 239248debafeSMikulas Patocka extra_args = 0; 23939ff07e7dSMikulas Patocka if (wc->start_sector) 23949ff07e7dSMikulas Patocka extra_args += 2; 239593de44ebSMikulas Patocka if (wc->high_wm_percent_set && !wc->cleaner) 239648debafeSMikulas Patocka extra_args += 2; 239793de44ebSMikulas Patocka if (wc->low_wm_percent_set && !wc->cleaner) 239848debafeSMikulas Patocka extra_args += 2; 239948debafeSMikulas Patocka if (wc->max_writeback_jobs_set) 240048debafeSMikulas Patocka extra_args += 2; 240148debafeSMikulas Patocka if (wc->autocommit_blocks_set) 240248debafeSMikulas Patocka extra_args += 2; 240348debafeSMikulas Patocka if (wc->autocommit_time_set) 240448debafeSMikulas Patocka extra_args += 2; 240593de44ebSMikulas Patocka if (wc->cleaner) 240693de44ebSMikulas Patocka extra_args++; 240748debafeSMikulas Patocka if (wc->writeback_fua_set) 240848debafeSMikulas Patocka extra_args++; 240948debafeSMikulas Patocka 241048debafeSMikulas Patocka DMEMIT("%u", extra_args); 24119ff07e7dSMikulas Patocka if (wc->start_sector) 24129ff07e7dSMikulas Patocka DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); 241393de44ebSMikulas Patocka if (wc->high_wm_percent_set && !wc->cleaner) { 241448debafeSMikulas Patocka x = (uint64_t)wc->freelist_high_watermark * 100; 241548debafeSMikulas Patocka x += wc->n_blocks / 2; 241648debafeSMikulas Patocka do_div(x, (size_t)wc->n_blocks); 241748debafeSMikulas Patocka DMEMIT(" high_watermark %u", 100 - (unsigned)x); 241848debafeSMikulas Patocka } 241993de44ebSMikulas Patocka if (wc->low_wm_percent_set && !wc->cleaner) { 242048debafeSMikulas Patocka x = (uint64_t)wc->freelist_low_watermark * 100; 242148debafeSMikulas Patocka x += wc->n_blocks / 2; 242248debafeSMikulas Patocka do_div(x, (size_t)wc->n_blocks); 242348debafeSMikulas Patocka DMEMIT(" low_watermark %u", 100 - (unsigned)x); 242448debafeSMikulas Patocka } 242548debafeSMikulas Patocka if (wc->max_writeback_jobs_set) 242648debafeSMikulas Patocka DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs); 242748debafeSMikulas Patocka if (wc->autocommit_blocks_set) 242848debafeSMikulas Patocka DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks); 242948debafeSMikulas Patocka if (wc->autocommit_time_set) 243048debafeSMikulas Patocka DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies)); 24313923d485SMikulas Patocka if (wc->max_age != MAX_AGE_UNSPECIFIED) 24323923d485SMikulas Patocka DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age)); 243393de44ebSMikulas Patocka if (wc->cleaner) 243493de44ebSMikulas Patocka DMEMIT(" cleaner"); 243548debafeSMikulas Patocka if (wc->writeback_fua_set) 243648debafeSMikulas Patocka DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); 243748debafeSMikulas Patocka break; 243848debafeSMikulas Patocka } 243948debafeSMikulas Patocka } 244048debafeSMikulas Patocka 244148debafeSMikulas Patocka static struct target_type writecache_target = { 244248debafeSMikulas Patocka .name = "writecache", 244393de44ebSMikulas Patocka .version = {1, 3, 0}, 244448debafeSMikulas Patocka .module = THIS_MODULE, 244548debafeSMikulas Patocka .ctr = writecache_ctr, 244648debafeSMikulas Patocka .dtr = writecache_dtr, 244748debafeSMikulas Patocka .status = writecache_status, 244848debafeSMikulas Patocka .postsuspend = writecache_suspend, 244948debafeSMikulas Patocka .resume = writecache_resume, 245048debafeSMikulas Patocka .message = writecache_message, 245148debafeSMikulas Patocka .map = writecache_map, 245248debafeSMikulas Patocka .end_io = writecache_end_io, 245348debafeSMikulas Patocka .iterate_devices = writecache_iterate_devices, 245448debafeSMikulas Patocka .io_hints = writecache_io_hints, 245548debafeSMikulas Patocka }; 245648debafeSMikulas Patocka 245748debafeSMikulas Patocka static int __init dm_writecache_init(void) 245848debafeSMikulas Patocka { 245948debafeSMikulas Patocka int r; 246048debafeSMikulas Patocka 246148debafeSMikulas Patocka r = dm_register_target(&writecache_target); 246248debafeSMikulas Patocka if (r < 0) { 246348debafeSMikulas Patocka DMERR("register failed %d", r); 246448debafeSMikulas Patocka return r; 246548debafeSMikulas Patocka } 246648debafeSMikulas Patocka 246748debafeSMikulas Patocka return 0; 246848debafeSMikulas Patocka } 246948debafeSMikulas Patocka 247048debafeSMikulas Patocka static void __exit dm_writecache_exit(void) 247148debafeSMikulas Patocka { 247248debafeSMikulas Patocka dm_unregister_target(&writecache_target); 247348debafeSMikulas Patocka } 247448debafeSMikulas Patocka 247548debafeSMikulas Patocka module_init(dm_writecache_init); 247648debafeSMikulas Patocka module_exit(dm_writecache_exit); 247748debafeSMikulas Patocka 247848debafeSMikulas Patocka MODULE_DESCRIPTION(DM_NAME " writecache target"); 247948debafeSMikulas Patocka MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); 248048debafeSMikulas Patocka MODULE_LICENSE("GPL"); 2481