148debafeSMikulas Patocka // SPDX-License-Identifier: GPL-2.0 248debafeSMikulas Patocka /* 348debafeSMikulas Patocka * Copyright (C) 2018 Red Hat. All rights reserved. 448debafeSMikulas Patocka * 548debafeSMikulas Patocka * This file is released under the GPL. 648debafeSMikulas Patocka */ 748debafeSMikulas Patocka 848debafeSMikulas Patocka #include <linux/device-mapper.h> 948debafeSMikulas Patocka #include <linux/module.h> 1048debafeSMikulas Patocka #include <linux/init.h> 1148debafeSMikulas Patocka #include <linux/vmalloc.h> 1248debafeSMikulas Patocka #include <linux/kthread.h> 1348debafeSMikulas Patocka #include <linux/dm-io.h> 1448debafeSMikulas Patocka #include <linux/dm-kcopyd.h> 1548debafeSMikulas Patocka #include <linux/dax.h> 1648debafeSMikulas Patocka #include <linux/pfn_t.h> 1748debafeSMikulas Patocka #include <linux/libnvdimm.h> 1848debafeSMikulas Patocka 1948debafeSMikulas Patocka #define DM_MSG_PREFIX "writecache" 2048debafeSMikulas Patocka 2148debafeSMikulas Patocka #define HIGH_WATERMARK 50 2248debafeSMikulas Patocka #define LOW_WATERMARK 45 2348debafeSMikulas Patocka #define MAX_WRITEBACK_JOBS 0 2448debafeSMikulas Patocka #define ENDIO_LATENCY 16 2548debafeSMikulas Patocka #define WRITEBACK_LATENCY 64 2648debafeSMikulas Patocka #define AUTOCOMMIT_BLOCKS_SSD 65536 2748debafeSMikulas Patocka #define AUTOCOMMIT_BLOCKS_PMEM 64 2848debafeSMikulas Patocka #define AUTOCOMMIT_MSEC 1000 29*3923d485SMikulas Patocka #define MAX_AGE_DIV 16 30*3923d485SMikulas Patocka #define MAX_AGE_UNSPECIFIED -1UL 3148debafeSMikulas Patocka 3248debafeSMikulas Patocka #define BITMAP_GRANULARITY 65536 3348debafeSMikulas Patocka #if BITMAP_GRANULARITY < PAGE_SIZE 3448debafeSMikulas Patocka #undef BITMAP_GRANULARITY 3548debafeSMikulas Patocka #define BITMAP_GRANULARITY PAGE_SIZE 3648debafeSMikulas Patocka #endif 3748debafeSMikulas Patocka 3848debafeSMikulas Patocka #if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER) 3948debafeSMikulas Patocka #define DM_WRITECACHE_HAS_PMEM 4048debafeSMikulas Patocka #endif 4148debafeSMikulas Patocka 4248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 4348debafeSMikulas Patocka #define pmem_assign(dest, src) \ 4448debafeSMikulas Patocka do { \ 4548debafeSMikulas Patocka typeof(dest) uniq = (src); \ 4648debafeSMikulas Patocka memcpy_flushcache(&(dest), &uniq, sizeof(dest)); \ 4748debafeSMikulas Patocka } while (0) 4848debafeSMikulas Patocka #else 4948debafeSMikulas Patocka #define pmem_assign(dest, src) ((dest) = (src)) 5048debafeSMikulas Patocka #endif 5148debafeSMikulas Patocka 5248debafeSMikulas Patocka #if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM) 5348debafeSMikulas Patocka #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 5448debafeSMikulas Patocka #endif 5548debafeSMikulas Patocka 5648debafeSMikulas Patocka #define MEMORY_SUPERBLOCK_MAGIC 0x23489321 5748debafeSMikulas Patocka #define MEMORY_SUPERBLOCK_VERSION 1 5848debafeSMikulas Patocka 5948debafeSMikulas Patocka struct wc_memory_entry { 6048debafeSMikulas Patocka __le64 original_sector; 6148debafeSMikulas Patocka __le64 seq_count; 6248debafeSMikulas Patocka }; 6348debafeSMikulas Patocka 6448debafeSMikulas Patocka struct wc_memory_superblock { 6548debafeSMikulas Patocka union { 6648debafeSMikulas Patocka struct { 6748debafeSMikulas Patocka __le32 magic; 6848debafeSMikulas Patocka __le32 version; 6948debafeSMikulas Patocka __le32 block_size; 7048debafeSMikulas Patocka __le32 pad; 7148debafeSMikulas Patocka __le64 n_blocks; 7248debafeSMikulas Patocka __le64 seq_count; 7348debafeSMikulas Patocka }; 7448debafeSMikulas Patocka __le64 padding[8]; 7548debafeSMikulas Patocka }; 7648debafeSMikulas Patocka struct wc_memory_entry entries[0]; 7748debafeSMikulas Patocka }; 7848debafeSMikulas Patocka 7948debafeSMikulas Patocka struct wc_entry { 8048debafeSMikulas Patocka struct rb_node rb_node; 8148debafeSMikulas Patocka struct list_head lru; 8248debafeSMikulas Patocka unsigned short wc_list_contiguous; 8348debafeSMikulas Patocka bool write_in_progress 8448debafeSMikulas Patocka #if BITS_PER_LONG == 64 8548debafeSMikulas Patocka :1 8648debafeSMikulas Patocka #endif 8748debafeSMikulas Patocka ; 8848debafeSMikulas Patocka unsigned long index 8948debafeSMikulas Patocka #if BITS_PER_LONG == 64 9048debafeSMikulas Patocka :47 9148debafeSMikulas Patocka #endif 9248debafeSMikulas Patocka ; 93*3923d485SMikulas Patocka unsigned long age; 9448debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 9548debafeSMikulas Patocka uint64_t original_sector; 9648debafeSMikulas Patocka uint64_t seq_count; 9748debafeSMikulas Patocka #endif 9848debafeSMikulas Patocka }; 9948debafeSMikulas Patocka 10048debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 10148debafeSMikulas Patocka #define WC_MODE_PMEM(wc) ((wc)->pmem_mode) 10248debafeSMikulas Patocka #define WC_MODE_FUA(wc) ((wc)->writeback_fua) 10348debafeSMikulas Patocka #else 10448debafeSMikulas Patocka #define WC_MODE_PMEM(wc) false 10548debafeSMikulas Patocka #define WC_MODE_FUA(wc) false 10648debafeSMikulas Patocka #endif 10748debafeSMikulas Patocka #define WC_MODE_SORT_FREELIST(wc) (!WC_MODE_PMEM(wc)) 10848debafeSMikulas Patocka 10948debafeSMikulas Patocka struct dm_writecache { 11048debafeSMikulas Patocka struct mutex lock; 11148debafeSMikulas Patocka struct list_head lru; 11248debafeSMikulas Patocka union { 11348debafeSMikulas Patocka struct list_head freelist; 11448debafeSMikulas Patocka struct { 11548debafeSMikulas Patocka struct rb_root freetree; 11648debafeSMikulas Patocka struct wc_entry *current_free; 11748debafeSMikulas Patocka }; 11848debafeSMikulas Patocka }; 11948debafeSMikulas Patocka struct rb_root tree; 12048debafeSMikulas Patocka 12148debafeSMikulas Patocka size_t freelist_size; 12248debafeSMikulas Patocka size_t writeback_size; 12348debafeSMikulas Patocka size_t freelist_high_watermark; 12448debafeSMikulas Patocka size_t freelist_low_watermark; 125*3923d485SMikulas Patocka unsigned long max_age; 12648debafeSMikulas Patocka 12748debafeSMikulas Patocka unsigned uncommitted_blocks; 12848debafeSMikulas Patocka unsigned autocommit_blocks; 12948debafeSMikulas Patocka unsigned max_writeback_jobs; 13048debafeSMikulas Patocka 13148debafeSMikulas Patocka int error; 13248debafeSMikulas Patocka 13348debafeSMikulas Patocka unsigned long autocommit_jiffies; 13448debafeSMikulas Patocka struct timer_list autocommit_timer; 13548debafeSMikulas Patocka struct wait_queue_head freelist_wait; 13648debafeSMikulas Patocka 137*3923d485SMikulas Patocka struct timer_list max_age_timer; 138*3923d485SMikulas Patocka 13948debafeSMikulas Patocka atomic_t bio_in_progress[2]; 14048debafeSMikulas Patocka struct wait_queue_head bio_in_progress_wait[2]; 14148debafeSMikulas Patocka 14248debafeSMikulas Patocka struct dm_target *ti; 14348debafeSMikulas Patocka struct dm_dev *dev; 14448debafeSMikulas Patocka struct dm_dev *ssd_dev; 145d284f824SMikulas Patocka sector_t start_sector; 14648debafeSMikulas Patocka void *memory_map; 14748debafeSMikulas Patocka uint64_t memory_map_size; 14848debafeSMikulas Patocka size_t metadata_sectors; 14948debafeSMikulas Patocka size_t n_blocks; 15048debafeSMikulas Patocka uint64_t seq_count; 15148debafeSMikulas Patocka void *block_start; 15248debafeSMikulas Patocka struct wc_entry *entries; 15348debafeSMikulas Patocka unsigned block_size; 15448debafeSMikulas Patocka unsigned char block_size_bits; 15548debafeSMikulas Patocka 15648debafeSMikulas Patocka bool pmem_mode:1; 15748debafeSMikulas Patocka bool writeback_fua:1; 15848debafeSMikulas Patocka 15948debafeSMikulas Patocka bool overwrote_committed:1; 16048debafeSMikulas Patocka bool memory_vmapped:1; 16148debafeSMikulas Patocka 16248debafeSMikulas Patocka bool high_wm_percent_set:1; 16348debafeSMikulas Patocka bool low_wm_percent_set:1; 16448debafeSMikulas Patocka bool max_writeback_jobs_set:1; 16548debafeSMikulas Patocka bool autocommit_blocks_set:1; 16648debafeSMikulas Patocka bool autocommit_time_set:1; 16748debafeSMikulas Patocka bool writeback_fua_set:1; 16848debafeSMikulas Patocka bool flush_on_suspend:1; 16993de44ebSMikulas Patocka bool cleaner:1; 17048debafeSMikulas Patocka 17148debafeSMikulas Patocka unsigned writeback_all; 17248debafeSMikulas Patocka struct workqueue_struct *writeback_wq; 17348debafeSMikulas Patocka struct work_struct writeback_work; 17448debafeSMikulas Patocka struct work_struct flush_work; 17548debafeSMikulas Patocka 17648debafeSMikulas Patocka struct dm_io_client *dm_io; 17748debafeSMikulas Patocka 17848debafeSMikulas Patocka raw_spinlock_t endio_list_lock; 17948debafeSMikulas Patocka struct list_head endio_list; 18048debafeSMikulas Patocka struct task_struct *endio_thread; 18148debafeSMikulas Patocka 18248debafeSMikulas Patocka struct task_struct *flush_thread; 18348debafeSMikulas Patocka struct bio_list flush_list; 18448debafeSMikulas Patocka 18548debafeSMikulas Patocka struct dm_kcopyd_client *dm_kcopyd; 18648debafeSMikulas Patocka unsigned long *dirty_bitmap; 18748debafeSMikulas Patocka unsigned dirty_bitmap_size; 18848debafeSMikulas Patocka 18948debafeSMikulas Patocka struct bio_set bio_set; 19048debafeSMikulas Patocka mempool_t copy_pool; 19148debafeSMikulas Patocka }; 19248debafeSMikulas Patocka 19348debafeSMikulas Patocka #define WB_LIST_INLINE 16 19448debafeSMikulas Patocka 19548debafeSMikulas Patocka struct writeback_struct { 19648debafeSMikulas Patocka struct list_head endio_entry; 19748debafeSMikulas Patocka struct dm_writecache *wc; 19848debafeSMikulas Patocka struct wc_entry **wc_list; 19948debafeSMikulas Patocka unsigned wc_list_n; 20048debafeSMikulas Patocka struct wc_entry *wc_list_inline[WB_LIST_INLINE]; 20148debafeSMikulas Patocka struct bio bio; 20248debafeSMikulas Patocka }; 20348debafeSMikulas Patocka 20448debafeSMikulas Patocka struct copy_struct { 20548debafeSMikulas Patocka struct list_head endio_entry; 20648debafeSMikulas Patocka struct dm_writecache *wc; 20748debafeSMikulas Patocka struct wc_entry *e; 20848debafeSMikulas Patocka unsigned n_entries; 20948debafeSMikulas Patocka int error; 21048debafeSMikulas Patocka }; 21148debafeSMikulas Patocka 21248debafeSMikulas Patocka DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle, 21348debafeSMikulas Patocka "A percentage of time allocated for data copying"); 21448debafeSMikulas Patocka 21548debafeSMikulas Patocka static void wc_lock(struct dm_writecache *wc) 21648debafeSMikulas Patocka { 21748debafeSMikulas Patocka mutex_lock(&wc->lock); 21848debafeSMikulas Patocka } 21948debafeSMikulas Patocka 22048debafeSMikulas Patocka static void wc_unlock(struct dm_writecache *wc) 22148debafeSMikulas Patocka { 22248debafeSMikulas Patocka mutex_unlock(&wc->lock); 22348debafeSMikulas Patocka } 22448debafeSMikulas Patocka 22548debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 22648debafeSMikulas Patocka static int persistent_memory_claim(struct dm_writecache *wc) 22748debafeSMikulas Patocka { 22848debafeSMikulas Patocka int r; 22948debafeSMikulas Patocka loff_t s; 23048debafeSMikulas Patocka long p, da; 23148debafeSMikulas Patocka pfn_t pfn; 23248debafeSMikulas Patocka int id; 23348debafeSMikulas Patocka struct page **pages; 23448debafeSMikulas Patocka 23548debafeSMikulas Patocka wc->memory_vmapped = false; 23648debafeSMikulas Patocka 23748debafeSMikulas Patocka if (!wc->ssd_dev->dax_dev) { 23848debafeSMikulas Patocka r = -EOPNOTSUPP; 23948debafeSMikulas Patocka goto err1; 24048debafeSMikulas Patocka } 24148debafeSMikulas Patocka s = wc->memory_map_size; 24248debafeSMikulas Patocka p = s >> PAGE_SHIFT; 24348debafeSMikulas Patocka if (!p) { 24448debafeSMikulas Patocka r = -EINVAL; 24548debafeSMikulas Patocka goto err1; 24648debafeSMikulas Patocka } 24748debafeSMikulas Patocka if (p != s >> PAGE_SHIFT) { 24848debafeSMikulas Patocka r = -EOVERFLOW; 24948debafeSMikulas Patocka goto err1; 25048debafeSMikulas Patocka } 25148debafeSMikulas Patocka 25248debafeSMikulas Patocka id = dax_read_lock(); 25348debafeSMikulas Patocka 25448debafeSMikulas Patocka da = dax_direct_access(wc->ssd_dev->dax_dev, 0, p, &wc->memory_map, &pfn); 25548debafeSMikulas Patocka if (da < 0) { 25648debafeSMikulas Patocka wc->memory_map = NULL; 25748debafeSMikulas Patocka r = da; 25848debafeSMikulas Patocka goto err2; 25948debafeSMikulas Patocka } 26048debafeSMikulas Patocka if (!pfn_t_has_page(pfn)) { 26148debafeSMikulas Patocka wc->memory_map = NULL; 26248debafeSMikulas Patocka r = -EOPNOTSUPP; 26348debafeSMikulas Patocka goto err2; 26448debafeSMikulas Patocka } 26548debafeSMikulas Patocka if (da != p) { 26648debafeSMikulas Patocka long i; 26748debafeSMikulas Patocka wc->memory_map = NULL; 26850a7d3baSKees Cook pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); 26948debafeSMikulas Patocka if (!pages) { 27048debafeSMikulas Patocka r = -ENOMEM; 27148debafeSMikulas Patocka goto err2; 27248debafeSMikulas Patocka } 27348debafeSMikulas Patocka i = 0; 27448debafeSMikulas Patocka do { 27548debafeSMikulas Patocka long daa; 27648debafeSMikulas Patocka daa = dax_direct_access(wc->ssd_dev->dax_dev, i, p - i, 277f742267aSHuaisheng Ye NULL, &pfn); 27848debafeSMikulas Patocka if (daa <= 0) { 27948debafeSMikulas Patocka r = daa ? daa : -EINVAL; 28048debafeSMikulas Patocka goto err3; 28148debafeSMikulas Patocka } 28248debafeSMikulas Patocka if (!pfn_t_has_page(pfn)) { 28348debafeSMikulas Patocka r = -EOPNOTSUPP; 28448debafeSMikulas Patocka goto err3; 28548debafeSMikulas Patocka } 28648debafeSMikulas Patocka while (daa-- && i < p) { 28748debafeSMikulas Patocka pages[i++] = pfn_t_to_page(pfn); 28848debafeSMikulas Patocka pfn.val++; 28948debafeSMikulas Patocka } 29048debafeSMikulas Patocka } while (i < p); 29148debafeSMikulas Patocka wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL); 29248debafeSMikulas Patocka if (!wc->memory_map) { 29348debafeSMikulas Patocka r = -ENOMEM; 29448debafeSMikulas Patocka goto err3; 29548debafeSMikulas Patocka } 29648debafeSMikulas Patocka kvfree(pages); 29748debafeSMikulas Patocka wc->memory_vmapped = true; 29848debafeSMikulas Patocka } 29948debafeSMikulas Patocka 30048debafeSMikulas Patocka dax_read_unlock(id); 301d284f824SMikulas Patocka 302d284f824SMikulas Patocka wc->memory_map += (size_t)wc->start_sector << SECTOR_SHIFT; 303d284f824SMikulas Patocka wc->memory_map_size -= (size_t)wc->start_sector << SECTOR_SHIFT; 304d284f824SMikulas Patocka 30548debafeSMikulas Patocka return 0; 30648debafeSMikulas Patocka err3: 30748debafeSMikulas Patocka kvfree(pages); 30848debafeSMikulas Patocka err2: 30948debafeSMikulas Patocka dax_read_unlock(id); 31048debafeSMikulas Patocka err1: 31148debafeSMikulas Patocka return r; 31248debafeSMikulas Patocka } 31348debafeSMikulas Patocka #else 31448debafeSMikulas Patocka static int persistent_memory_claim(struct dm_writecache *wc) 31548debafeSMikulas Patocka { 31648debafeSMikulas Patocka BUG(); 31748debafeSMikulas Patocka } 31848debafeSMikulas Patocka #endif 31948debafeSMikulas Patocka 32048debafeSMikulas Patocka static void persistent_memory_release(struct dm_writecache *wc) 32148debafeSMikulas Patocka { 32248debafeSMikulas Patocka if (wc->memory_vmapped) 323d284f824SMikulas Patocka vunmap(wc->memory_map - ((size_t)wc->start_sector << SECTOR_SHIFT)); 32448debafeSMikulas Patocka } 32548debafeSMikulas Patocka 32648debafeSMikulas Patocka static struct page *persistent_memory_page(void *addr) 32748debafeSMikulas Patocka { 32848debafeSMikulas Patocka if (is_vmalloc_addr(addr)) 32948debafeSMikulas Patocka return vmalloc_to_page(addr); 33048debafeSMikulas Patocka else 33148debafeSMikulas Patocka return virt_to_page(addr); 33248debafeSMikulas Patocka } 33348debafeSMikulas Patocka 33448debafeSMikulas Patocka static unsigned persistent_memory_page_offset(void *addr) 33548debafeSMikulas Patocka { 33648debafeSMikulas Patocka return (unsigned long)addr & (PAGE_SIZE - 1); 33748debafeSMikulas Patocka } 33848debafeSMikulas Patocka 33948debafeSMikulas Patocka static void persistent_memory_flush_cache(void *ptr, size_t size) 34048debafeSMikulas Patocka { 34148debafeSMikulas Patocka if (is_vmalloc_addr(ptr)) 34248debafeSMikulas Patocka flush_kernel_vmap_range(ptr, size); 34348debafeSMikulas Patocka } 34448debafeSMikulas Patocka 34548debafeSMikulas Patocka static void persistent_memory_invalidate_cache(void *ptr, size_t size) 34648debafeSMikulas Patocka { 34748debafeSMikulas Patocka if (is_vmalloc_addr(ptr)) 34848debafeSMikulas Patocka invalidate_kernel_vmap_range(ptr, size); 34948debafeSMikulas Patocka } 35048debafeSMikulas Patocka 35148debafeSMikulas Patocka static struct wc_memory_superblock *sb(struct dm_writecache *wc) 35248debafeSMikulas Patocka { 35348debafeSMikulas Patocka return wc->memory_map; 35448debafeSMikulas Patocka } 35548debafeSMikulas Patocka 35648debafeSMikulas Patocka static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e) 35748debafeSMikulas Patocka { 35848debafeSMikulas Patocka return &sb(wc)->entries[e->index]; 35948debafeSMikulas Patocka } 36048debafeSMikulas Patocka 36148debafeSMikulas Patocka static void *memory_data(struct dm_writecache *wc, struct wc_entry *e) 36248debafeSMikulas Patocka { 36348debafeSMikulas Patocka return (char *)wc->block_start + (e->index << wc->block_size_bits); 36448debafeSMikulas Patocka } 36548debafeSMikulas Patocka 36648debafeSMikulas Patocka static sector_t cache_sector(struct dm_writecache *wc, struct wc_entry *e) 36748debafeSMikulas Patocka { 368d284f824SMikulas Patocka return wc->start_sector + wc->metadata_sectors + 36948debafeSMikulas Patocka ((sector_t)e->index << (wc->block_size_bits - SECTOR_SHIFT)); 37048debafeSMikulas Patocka } 37148debafeSMikulas Patocka 37248debafeSMikulas Patocka static uint64_t read_original_sector(struct dm_writecache *wc, struct wc_entry *e) 37348debafeSMikulas Patocka { 37448debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 37548debafeSMikulas Patocka return e->original_sector; 37648debafeSMikulas Patocka #else 37748debafeSMikulas Patocka return le64_to_cpu(memory_entry(wc, e)->original_sector); 37848debafeSMikulas Patocka #endif 37948debafeSMikulas Patocka } 38048debafeSMikulas Patocka 38148debafeSMikulas Patocka static uint64_t read_seq_count(struct dm_writecache *wc, struct wc_entry *e) 38248debafeSMikulas Patocka { 38348debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 38448debafeSMikulas Patocka return e->seq_count; 38548debafeSMikulas Patocka #else 38648debafeSMikulas Patocka return le64_to_cpu(memory_entry(wc, e)->seq_count); 38748debafeSMikulas Patocka #endif 38848debafeSMikulas Patocka } 38948debafeSMikulas Patocka 39048debafeSMikulas Patocka static void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e) 39148debafeSMikulas Patocka { 39248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 39348debafeSMikulas Patocka e->seq_count = -1; 39448debafeSMikulas Patocka #endif 39548debafeSMikulas Patocka pmem_assign(memory_entry(wc, e)->seq_count, cpu_to_le64(-1)); 39648debafeSMikulas Patocka } 39748debafeSMikulas Patocka 39848debafeSMikulas Patocka static void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e, 39948debafeSMikulas Patocka uint64_t original_sector, uint64_t seq_count) 40048debafeSMikulas Patocka { 40148debafeSMikulas Patocka struct wc_memory_entry me; 40248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 40348debafeSMikulas Patocka e->original_sector = original_sector; 40448debafeSMikulas Patocka e->seq_count = seq_count; 40548debafeSMikulas Patocka #endif 40648debafeSMikulas Patocka me.original_sector = cpu_to_le64(original_sector); 40748debafeSMikulas Patocka me.seq_count = cpu_to_le64(seq_count); 40848debafeSMikulas Patocka pmem_assign(*memory_entry(wc, e), me); 40948debafeSMikulas Patocka } 41048debafeSMikulas Patocka 41148debafeSMikulas Patocka #define writecache_error(wc, err, msg, arg...) \ 41248debafeSMikulas Patocka do { \ 41348debafeSMikulas Patocka if (!cmpxchg(&(wc)->error, 0, err)) \ 41448debafeSMikulas Patocka DMERR(msg, ##arg); \ 41548debafeSMikulas Patocka wake_up(&(wc)->freelist_wait); \ 41648debafeSMikulas Patocka } while (0) 41748debafeSMikulas Patocka 41848debafeSMikulas Patocka #define writecache_has_error(wc) (unlikely(READ_ONCE((wc)->error))) 41948debafeSMikulas Patocka 42048debafeSMikulas Patocka static void writecache_flush_all_metadata(struct dm_writecache *wc) 42148debafeSMikulas Patocka { 42248debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) 42348debafeSMikulas Patocka memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size); 42448debafeSMikulas Patocka } 42548debafeSMikulas Patocka 42648debafeSMikulas Patocka static void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size) 42748debafeSMikulas Patocka { 42848debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) 42948debafeSMikulas Patocka __set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY, 43048debafeSMikulas Patocka wc->dirty_bitmap); 43148debafeSMikulas Patocka } 43248debafeSMikulas Patocka 43348debafeSMikulas Patocka static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev); 43448debafeSMikulas Patocka 43548debafeSMikulas Patocka struct io_notify { 43648debafeSMikulas Patocka struct dm_writecache *wc; 43748debafeSMikulas Patocka struct completion c; 43848debafeSMikulas Patocka atomic_t count; 43948debafeSMikulas Patocka }; 44048debafeSMikulas Patocka 44148debafeSMikulas Patocka static void writecache_notify_io(unsigned long error, void *context) 44248debafeSMikulas Patocka { 44348debafeSMikulas Patocka struct io_notify *endio = context; 44448debafeSMikulas Patocka 44548debafeSMikulas Patocka if (unlikely(error != 0)) 44648debafeSMikulas Patocka writecache_error(endio->wc, -EIO, "error writing metadata"); 44748debafeSMikulas Patocka BUG_ON(atomic_read(&endio->count) <= 0); 44848debafeSMikulas Patocka if (atomic_dec_and_test(&endio->count)) 44948debafeSMikulas Patocka complete(&endio->c); 45048debafeSMikulas Patocka } 45148debafeSMikulas Patocka 452aa950920SMikulas Patocka static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) 453aa950920SMikulas Patocka { 454aa950920SMikulas Patocka wait_event(wc->bio_in_progress_wait[direction], 455aa950920SMikulas Patocka !atomic_read(&wc->bio_in_progress[direction])); 456aa950920SMikulas Patocka } 457aa950920SMikulas Patocka 458aa950920SMikulas Patocka static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 45948debafeSMikulas Patocka { 46048debafeSMikulas Patocka struct dm_io_region region; 46148debafeSMikulas Patocka struct dm_io_request req; 46248debafeSMikulas Patocka struct io_notify endio = { 46348debafeSMikulas Patocka wc, 46448debafeSMikulas Patocka COMPLETION_INITIALIZER_ONSTACK(endio.c), 46548debafeSMikulas Patocka ATOMIC_INIT(1), 46648debafeSMikulas Patocka }; 4671e1132eaSMikulas Patocka unsigned bitmap_bits = wc->dirty_bitmap_size * 8; 46848debafeSMikulas Patocka unsigned i = 0; 46948debafeSMikulas Patocka 47048debafeSMikulas Patocka while (1) { 47148debafeSMikulas Patocka unsigned j; 47248debafeSMikulas Patocka i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i); 47348debafeSMikulas Patocka if (unlikely(i == bitmap_bits)) 47448debafeSMikulas Patocka break; 47548debafeSMikulas Patocka j = find_next_zero_bit(wc->dirty_bitmap, bitmap_bits, i); 47648debafeSMikulas Patocka 47748debafeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 47848debafeSMikulas Patocka region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 47948debafeSMikulas Patocka region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 48048debafeSMikulas Patocka 48148debafeSMikulas Patocka if (unlikely(region.sector >= wc->metadata_sectors)) 48248debafeSMikulas Patocka break; 48348debafeSMikulas Patocka if (unlikely(region.sector + region.count > wc->metadata_sectors)) 48448debafeSMikulas Patocka region.count = wc->metadata_sectors - region.sector; 48548debafeSMikulas Patocka 486d284f824SMikulas Patocka region.sector += wc->start_sector; 48748debafeSMikulas Patocka atomic_inc(&endio.count); 48848debafeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 48948debafeSMikulas Patocka req.bi_op_flags = REQ_SYNC; 49048debafeSMikulas Patocka req.mem.type = DM_IO_VMA; 49148debafeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY; 49248debafeSMikulas Patocka req.client = wc->dm_io; 49348debafeSMikulas Patocka req.notify.fn = writecache_notify_io; 49448debafeSMikulas Patocka req.notify.context = &endio; 49548debafeSMikulas Patocka 49648debafeSMikulas Patocka /* writing via async dm-io (implied by notify.fn above) won't return an error */ 49748debafeSMikulas Patocka (void) dm_io(&req, 1, ®ion, NULL); 49848debafeSMikulas Patocka i = j; 49948debafeSMikulas Patocka } 50048debafeSMikulas Patocka 50148debafeSMikulas Patocka writecache_notify_io(0, &endio); 50248debafeSMikulas Patocka wait_for_completion_io(&endio.c); 50348debafeSMikulas Patocka 504aa950920SMikulas Patocka if (wait_for_ios) 505aa950920SMikulas Patocka writecache_wait_for_ios(wc, WRITE); 506aa950920SMikulas Patocka 50748debafeSMikulas Patocka writecache_disk_flush(wc, wc->ssd_dev); 50848debafeSMikulas Patocka 50948debafeSMikulas Patocka memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); 51048debafeSMikulas Patocka } 51148debafeSMikulas Patocka 512aa950920SMikulas Patocka static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 51348debafeSMikulas Patocka { 51448debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 51548debafeSMikulas Patocka wmb(); 51648debafeSMikulas Patocka else 517aa950920SMikulas Patocka ssd_commit_flushed(wc, wait_for_ios); 51848debafeSMikulas Patocka } 51948debafeSMikulas Patocka 52048debafeSMikulas Patocka static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) 52148debafeSMikulas Patocka { 52248debafeSMikulas Patocka int r; 52348debafeSMikulas Patocka struct dm_io_region region; 52448debafeSMikulas Patocka struct dm_io_request req; 52548debafeSMikulas Patocka 52648debafeSMikulas Patocka region.bdev = dev->bdev; 52748debafeSMikulas Patocka region.sector = 0; 52848debafeSMikulas Patocka region.count = 0; 52948debafeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 53048debafeSMikulas Patocka req.bi_op_flags = REQ_PREFLUSH; 53148debafeSMikulas Patocka req.mem.type = DM_IO_KMEM; 53248debafeSMikulas Patocka req.mem.ptr.addr = NULL; 53348debafeSMikulas Patocka req.client = wc->dm_io; 53448debafeSMikulas Patocka req.notify.fn = NULL; 53548debafeSMikulas Patocka 53648debafeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 53748debafeSMikulas Patocka if (unlikely(r)) 53848debafeSMikulas Patocka writecache_error(wc, r, "error flushing metadata: %d", r); 53948debafeSMikulas Patocka } 54048debafeSMikulas Patocka 54148debafeSMikulas Patocka #define WFE_RETURN_FOLLOWING 1 54248debafeSMikulas Patocka #define WFE_LOWEST_SEQ 2 54348debafeSMikulas Patocka 54448debafeSMikulas Patocka static struct wc_entry *writecache_find_entry(struct dm_writecache *wc, 54548debafeSMikulas Patocka uint64_t block, int flags) 54648debafeSMikulas Patocka { 54748debafeSMikulas Patocka struct wc_entry *e; 54848debafeSMikulas Patocka struct rb_node *node = wc->tree.rb_node; 54948debafeSMikulas Patocka 55048debafeSMikulas Patocka if (unlikely(!node)) 55148debafeSMikulas Patocka return NULL; 55248debafeSMikulas Patocka 55348debafeSMikulas Patocka while (1) { 55448debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 55548debafeSMikulas Patocka if (read_original_sector(wc, e) == block) 55648debafeSMikulas Patocka break; 557f8011d33SMikulas Patocka 55848debafeSMikulas Patocka node = (read_original_sector(wc, e) >= block ? 55948debafeSMikulas Patocka e->rb_node.rb_left : e->rb_node.rb_right); 56048debafeSMikulas Patocka if (unlikely(!node)) { 561f8011d33SMikulas Patocka if (!(flags & WFE_RETURN_FOLLOWING)) 56248debafeSMikulas Patocka return NULL; 56348debafeSMikulas Patocka if (read_original_sector(wc, e) >= block) { 564f8011d33SMikulas Patocka return e; 56548debafeSMikulas Patocka } else { 56648debafeSMikulas Patocka node = rb_next(&e->rb_node); 567f8011d33SMikulas Patocka if (unlikely(!node)) 56848debafeSMikulas Patocka return NULL; 56948debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 570f8011d33SMikulas Patocka return e; 57148debafeSMikulas Patocka } 57248debafeSMikulas Patocka } 57348debafeSMikulas Patocka } 57448debafeSMikulas Patocka 57548debafeSMikulas Patocka while (1) { 57648debafeSMikulas Patocka struct wc_entry *e2; 57748debafeSMikulas Patocka if (flags & WFE_LOWEST_SEQ) 57848debafeSMikulas Patocka node = rb_prev(&e->rb_node); 57948debafeSMikulas Patocka else 58048debafeSMikulas Patocka node = rb_next(&e->rb_node); 58184420b1eSHuaisheng Ye if (unlikely(!node)) 58248debafeSMikulas Patocka return e; 58348debafeSMikulas Patocka e2 = container_of(node, struct wc_entry, rb_node); 58448debafeSMikulas Patocka if (read_original_sector(wc, e2) != block) 58548debafeSMikulas Patocka return e; 58648debafeSMikulas Patocka e = e2; 58748debafeSMikulas Patocka } 58848debafeSMikulas Patocka } 58948debafeSMikulas Patocka 59048debafeSMikulas Patocka static void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *ins) 59148debafeSMikulas Patocka { 59248debafeSMikulas Patocka struct wc_entry *e; 59348debafeSMikulas Patocka struct rb_node **node = &wc->tree.rb_node, *parent = NULL; 59448debafeSMikulas Patocka 59548debafeSMikulas Patocka while (*node) { 59648debafeSMikulas Patocka e = container_of(*node, struct wc_entry, rb_node); 59748debafeSMikulas Patocka parent = &e->rb_node; 59848debafeSMikulas Patocka if (read_original_sector(wc, e) > read_original_sector(wc, ins)) 59948debafeSMikulas Patocka node = &parent->rb_left; 60048debafeSMikulas Patocka else 60148debafeSMikulas Patocka node = &parent->rb_right; 60248debafeSMikulas Patocka } 60348debafeSMikulas Patocka rb_link_node(&ins->rb_node, parent, node); 60448debafeSMikulas Patocka rb_insert_color(&ins->rb_node, &wc->tree); 60548debafeSMikulas Patocka list_add(&ins->lru, &wc->lru); 606*3923d485SMikulas Patocka ins->age = jiffies; 60748debafeSMikulas Patocka } 60848debafeSMikulas Patocka 60948debafeSMikulas Patocka static void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e) 61048debafeSMikulas Patocka { 61148debafeSMikulas Patocka list_del(&e->lru); 61248debafeSMikulas Patocka rb_erase(&e->rb_node, &wc->tree); 61348debafeSMikulas Patocka } 61448debafeSMikulas Patocka 61548debafeSMikulas Patocka static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry *e) 61648debafeSMikulas Patocka { 61748debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 61848debafeSMikulas Patocka struct rb_node **node = &wc->freetree.rb_node, *parent = NULL; 61948debafeSMikulas Patocka if (unlikely(!*node)) 62048debafeSMikulas Patocka wc->current_free = e; 62148debafeSMikulas Patocka while (*node) { 62248debafeSMikulas Patocka parent = *node; 62348debafeSMikulas Patocka if (&e->rb_node < *node) 62448debafeSMikulas Patocka node = &parent->rb_left; 62548debafeSMikulas Patocka else 62648debafeSMikulas Patocka node = &parent->rb_right; 62748debafeSMikulas Patocka } 62848debafeSMikulas Patocka rb_link_node(&e->rb_node, parent, node); 62948debafeSMikulas Patocka rb_insert_color(&e->rb_node, &wc->freetree); 63048debafeSMikulas Patocka } else { 63148debafeSMikulas Patocka list_add_tail(&e->lru, &wc->freelist); 63248debafeSMikulas Patocka } 63348debafeSMikulas Patocka wc->freelist_size++; 63448debafeSMikulas Patocka } 63548debafeSMikulas Patocka 63641c526c5SMikulas Patocka static inline void writecache_verify_watermark(struct dm_writecache *wc) 63741c526c5SMikulas Patocka { 63841c526c5SMikulas Patocka if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) 63941c526c5SMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 64041c526c5SMikulas Patocka } 64141c526c5SMikulas Patocka 642*3923d485SMikulas Patocka static void writecache_max_age_timer(struct timer_list *t) 643*3923d485SMikulas Patocka { 644*3923d485SMikulas Patocka struct dm_writecache *wc = from_timer(wc, t, max_age_timer); 645*3923d485SMikulas Patocka 646*3923d485SMikulas Patocka if (!dm_suspended(wc->ti) && !writecache_has_error(wc)) { 647*3923d485SMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 648*3923d485SMikulas Patocka mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); 649*3923d485SMikulas Patocka } 650*3923d485SMikulas Patocka } 651*3923d485SMikulas Patocka 652dcd19507SMikulas Patocka static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector) 65348debafeSMikulas Patocka { 65448debafeSMikulas Patocka struct wc_entry *e; 65548debafeSMikulas Patocka 65648debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 65748debafeSMikulas Patocka struct rb_node *next; 65848debafeSMikulas Patocka if (unlikely(!wc->current_free)) 65948debafeSMikulas Patocka return NULL; 66048debafeSMikulas Patocka e = wc->current_free; 661dcd19507SMikulas Patocka if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 662dcd19507SMikulas Patocka return NULL; 66348debafeSMikulas Patocka next = rb_next(&e->rb_node); 66448debafeSMikulas Patocka rb_erase(&e->rb_node, &wc->freetree); 66548debafeSMikulas Patocka if (unlikely(!next)) 66648debafeSMikulas Patocka next = rb_first(&wc->freetree); 66748debafeSMikulas Patocka wc->current_free = next ? container_of(next, struct wc_entry, rb_node) : NULL; 66848debafeSMikulas Patocka } else { 66948debafeSMikulas Patocka if (unlikely(list_empty(&wc->freelist))) 67048debafeSMikulas Patocka return NULL; 67148debafeSMikulas Patocka e = container_of(wc->freelist.next, struct wc_entry, lru); 672dcd19507SMikulas Patocka if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 673dcd19507SMikulas Patocka return NULL; 67448debafeSMikulas Patocka list_del(&e->lru); 67548debafeSMikulas Patocka } 67648debafeSMikulas Patocka wc->freelist_size--; 67741c526c5SMikulas Patocka 67841c526c5SMikulas Patocka writecache_verify_watermark(wc); 67948debafeSMikulas Patocka 68048debafeSMikulas Patocka return e; 68148debafeSMikulas Patocka } 68248debafeSMikulas Patocka 68348debafeSMikulas Patocka static void writecache_free_entry(struct dm_writecache *wc, struct wc_entry *e) 68448debafeSMikulas Patocka { 68548debafeSMikulas Patocka writecache_unlink(wc, e); 68648debafeSMikulas Patocka writecache_add_to_freelist(wc, e); 68748debafeSMikulas Patocka clear_seq_count(wc, e); 68848debafeSMikulas Patocka writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 68948debafeSMikulas Patocka if (unlikely(waitqueue_active(&wc->freelist_wait))) 69048debafeSMikulas Patocka wake_up(&wc->freelist_wait); 69148debafeSMikulas Patocka } 69248debafeSMikulas Patocka 69348debafeSMikulas Patocka static void writecache_wait_on_freelist(struct dm_writecache *wc) 69448debafeSMikulas Patocka { 69548debafeSMikulas Patocka DEFINE_WAIT(wait); 69648debafeSMikulas Patocka 69748debafeSMikulas Patocka prepare_to_wait(&wc->freelist_wait, &wait, TASK_UNINTERRUPTIBLE); 69848debafeSMikulas Patocka wc_unlock(wc); 69948debafeSMikulas Patocka io_schedule(); 70048debafeSMikulas Patocka finish_wait(&wc->freelist_wait, &wait); 70148debafeSMikulas Patocka wc_lock(wc); 70248debafeSMikulas Patocka } 70348debafeSMikulas Patocka 70448debafeSMikulas Patocka static void writecache_poison_lists(struct dm_writecache *wc) 70548debafeSMikulas Patocka { 70648debafeSMikulas Patocka /* 70748debafeSMikulas Patocka * Catch incorrect access to these values while the device is suspended. 70848debafeSMikulas Patocka */ 70948debafeSMikulas Patocka memset(&wc->tree, -1, sizeof wc->tree); 71048debafeSMikulas Patocka wc->lru.next = LIST_POISON1; 71148debafeSMikulas Patocka wc->lru.prev = LIST_POISON2; 71248debafeSMikulas Patocka wc->freelist.next = LIST_POISON1; 71348debafeSMikulas Patocka wc->freelist.prev = LIST_POISON2; 71448debafeSMikulas Patocka } 71548debafeSMikulas Patocka 71648debafeSMikulas Patocka static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e) 71748debafeSMikulas Patocka { 71848debafeSMikulas Patocka writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 71948debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 72048debafeSMikulas Patocka writecache_flush_region(wc, memory_data(wc, e), wc->block_size); 72148debafeSMikulas Patocka } 72248debafeSMikulas Patocka 72348debafeSMikulas Patocka static bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e) 72448debafeSMikulas Patocka { 72548debafeSMikulas Patocka return read_seq_count(wc, e) < wc->seq_count; 72648debafeSMikulas Patocka } 72748debafeSMikulas Patocka 72848debafeSMikulas Patocka static void writecache_flush(struct dm_writecache *wc) 72948debafeSMikulas Patocka { 73048debafeSMikulas Patocka struct wc_entry *e, *e2; 73148debafeSMikulas Patocka bool need_flush_after_free; 73248debafeSMikulas Patocka 73348debafeSMikulas Patocka wc->uncommitted_blocks = 0; 73448debafeSMikulas Patocka del_timer(&wc->autocommit_timer); 73548debafeSMikulas Patocka 73648debafeSMikulas Patocka if (list_empty(&wc->lru)) 73748debafeSMikulas Patocka return; 73848debafeSMikulas Patocka 73948debafeSMikulas Patocka e = container_of(wc->lru.next, struct wc_entry, lru); 74048debafeSMikulas Patocka if (writecache_entry_is_committed(wc, e)) { 74148debafeSMikulas Patocka if (wc->overwrote_committed) { 74248debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 74348debafeSMikulas Patocka writecache_disk_flush(wc, wc->ssd_dev); 74448debafeSMikulas Patocka wc->overwrote_committed = false; 74548debafeSMikulas Patocka } 74648debafeSMikulas Patocka return; 74748debafeSMikulas Patocka } 74848debafeSMikulas Patocka while (1) { 74948debafeSMikulas Patocka writecache_flush_entry(wc, e); 75048debafeSMikulas Patocka if (unlikely(e->lru.next == &wc->lru)) 75148debafeSMikulas Patocka break; 75248debafeSMikulas Patocka e2 = container_of(e->lru.next, struct wc_entry, lru); 75348debafeSMikulas Patocka if (writecache_entry_is_committed(wc, e2)) 75448debafeSMikulas Patocka break; 75548debafeSMikulas Patocka e = e2; 75648debafeSMikulas Patocka cond_resched(); 75748debafeSMikulas Patocka } 758aa950920SMikulas Patocka writecache_commit_flushed(wc, true); 75948debafeSMikulas Patocka 76048debafeSMikulas Patocka wc->seq_count++; 76148debafeSMikulas Patocka pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); 76248debafeSMikulas Patocka writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count); 763aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 76448debafeSMikulas Patocka 76548debafeSMikulas Patocka wc->overwrote_committed = false; 76648debafeSMikulas Patocka 76748debafeSMikulas Patocka need_flush_after_free = false; 76848debafeSMikulas Patocka while (1) { 76948debafeSMikulas Patocka /* Free another committed entry with lower seq-count */ 77048debafeSMikulas Patocka struct rb_node *rb_node = rb_prev(&e->rb_node); 77148debafeSMikulas Patocka 77248debafeSMikulas Patocka if (rb_node) { 77348debafeSMikulas Patocka e2 = container_of(rb_node, struct wc_entry, rb_node); 77448debafeSMikulas Patocka if (read_original_sector(wc, e2) == read_original_sector(wc, e) && 77548debafeSMikulas Patocka likely(!e2->write_in_progress)) { 77648debafeSMikulas Patocka writecache_free_entry(wc, e2); 77748debafeSMikulas Patocka need_flush_after_free = true; 77848debafeSMikulas Patocka } 77948debafeSMikulas Patocka } 78048debafeSMikulas Patocka if (unlikely(e->lru.prev == &wc->lru)) 78148debafeSMikulas Patocka break; 78248debafeSMikulas Patocka e = container_of(e->lru.prev, struct wc_entry, lru); 78348debafeSMikulas Patocka cond_resched(); 78448debafeSMikulas Patocka } 78548debafeSMikulas Patocka 78648debafeSMikulas Patocka if (need_flush_after_free) 787aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 78848debafeSMikulas Patocka } 78948debafeSMikulas Patocka 79048debafeSMikulas Patocka static void writecache_flush_work(struct work_struct *work) 79148debafeSMikulas Patocka { 79248debafeSMikulas Patocka struct dm_writecache *wc = container_of(work, struct dm_writecache, flush_work); 79348debafeSMikulas Patocka 79448debafeSMikulas Patocka wc_lock(wc); 79548debafeSMikulas Patocka writecache_flush(wc); 79648debafeSMikulas Patocka wc_unlock(wc); 79748debafeSMikulas Patocka } 79848debafeSMikulas Patocka 79948debafeSMikulas Patocka static void writecache_autocommit_timer(struct timer_list *t) 80048debafeSMikulas Patocka { 80148debafeSMikulas Patocka struct dm_writecache *wc = from_timer(wc, t, autocommit_timer); 80248debafeSMikulas Patocka if (!writecache_has_error(wc)) 80348debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->flush_work); 80448debafeSMikulas Patocka } 80548debafeSMikulas Patocka 80648debafeSMikulas Patocka static void writecache_schedule_autocommit(struct dm_writecache *wc) 80748debafeSMikulas Patocka { 80848debafeSMikulas Patocka if (!timer_pending(&wc->autocommit_timer)) 80948debafeSMikulas Patocka mod_timer(&wc->autocommit_timer, jiffies + wc->autocommit_jiffies); 81048debafeSMikulas Patocka } 81148debafeSMikulas Patocka 81248debafeSMikulas Patocka static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_t end) 81348debafeSMikulas Patocka { 81448debafeSMikulas Patocka struct wc_entry *e; 81548debafeSMikulas Patocka bool discarded_something = false; 81648debafeSMikulas Patocka 81748debafeSMikulas Patocka e = writecache_find_entry(wc, start, WFE_RETURN_FOLLOWING | WFE_LOWEST_SEQ); 81848debafeSMikulas Patocka if (unlikely(!e)) 81948debafeSMikulas Patocka return; 82048debafeSMikulas Patocka 82148debafeSMikulas Patocka while (read_original_sector(wc, e) < end) { 82248debafeSMikulas Patocka struct rb_node *node = rb_next(&e->rb_node); 82348debafeSMikulas Patocka 82448debafeSMikulas Patocka if (likely(!e->write_in_progress)) { 82548debafeSMikulas Patocka if (!discarded_something) { 82648debafeSMikulas Patocka writecache_wait_for_ios(wc, READ); 82748debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 82848debafeSMikulas Patocka discarded_something = true; 82948debafeSMikulas Patocka } 83048debafeSMikulas Patocka writecache_free_entry(wc, e); 83148debafeSMikulas Patocka } 83248debafeSMikulas Patocka 83384420b1eSHuaisheng Ye if (unlikely(!node)) 83448debafeSMikulas Patocka break; 83548debafeSMikulas Patocka 83648debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 83748debafeSMikulas Patocka } 83848debafeSMikulas Patocka 83948debafeSMikulas Patocka if (discarded_something) 840aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 84148debafeSMikulas Patocka } 84248debafeSMikulas Patocka 84348debafeSMikulas Patocka static bool writecache_wait_for_writeback(struct dm_writecache *wc) 84448debafeSMikulas Patocka { 84548debafeSMikulas Patocka if (wc->writeback_size) { 84648debafeSMikulas Patocka writecache_wait_on_freelist(wc); 84748debafeSMikulas Patocka return true; 84848debafeSMikulas Patocka } 84948debafeSMikulas Patocka return false; 85048debafeSMikulas Patocka } 85148debafeSMikulas Patocka 85248debafeSMikulas Patocka static void writecache_suspend(struct dm_target *ti) 85348debafeSMikulas Patocka { 85448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 85548debafeSMikulas Patocka bool flush_on_suspend; 85648debafeSMikulas Patocka 85748debafeSMikulas Patocka del_timer_sync(&wc->autocommit_timer); 858*3923d485SMikulas Patocka del_timer_sync(&wc->max_age_timer); 85948debafeSMikulas Patocka 86048debafeSMikulas Patocka wc_lock(wc); 86148debafeSMikulas Patocka writecache_flush(wc); 86248debafeSMikulas Patocka flush_on_suspend = wc->flush_on_suspend; 86348debafeSMikulas Patocka if (flush_on_suspend) { 86448debafeSMikulas Patocka wc->flush_on_suspend = false; 86548debafeSMikulas Patocka wc->writeback_all++; 86648debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 86748debafeSMikulas Patocka } 86848debafeSMikulas Patocka wc_unlock(wc); 86948debafeSMikulas Patocka 870adc0daadSMikulas Patocka drain_workqueue(wc->writeback_wq); 87148debafeSMikulas Patocka 87248debafeSMikulas Patocka wc_lock(wc); 87348debafeSMikulas Patocka if (flush_on_suspend) 87448debafeSMikulas Patocka wc->writeback_all--; 87548debafeSMikulas Patocka while (writecache_wait_for_writeback(wc)); 87648debafeSMikulas Patocka 87748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 87848debafeSMikulas Patocka persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 87948debafeSMikulas Patocka 88048debafeSMikulas Patocka writecache_poison_lists(wc); 88148debafeSMikulas Patocka 88248debafeSMikulas Patocka wc_unlock(wc); 88348debafeSMikulas Patocka } 88448debafeSMikulas Patocka 88548debafeSMikulas Patocka static int writecache_alloc_entries(struct dm_writecache *wc) 88648debafeSMikulas Patocka { 88748debafeSMikulas Patocka size_t b; 88848debafeSMikulas Patocka 88948debafeSMikulas Patocka if (wc->entries) 89048debafeSMikulas Patocka return 0; 89150a7d3baSKees Cook wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks)); 89248debafeSMikulas Patocka if (!wc->entries) 89348debafeSMikulas Patocka return -ENOMEM; 89448debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 89548debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 89648debafeSMikulas Patocka e->index = b; 89748debafeSMikulas Patocka e->write_in_progress = false; 89848debafeSMikulas Patocka } 89948debafeSMikulas Patocka 90048debafeSMikulas Patocka return 0; 90148debafeSMikulas Patocka } 90248debafeSMikulas Patocka 90348debafeSMikulas Patocka static void writecache_resume(struct dm_target *ti) 90448debafeSMikulas Patocka { 90548debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 90648debafeSMikulas Patocka size_t b; 90748debafeSMikulas Patocka bool need_flush = false; 90848debafeSMikulas Patocka __le64 sb_seq_count; 90948debafeSMikulas Patocka int r; 91048debafeSMikulas Patocka 91148debafeSMikulas Patocka wc_lock(wc); 91248debafeSMikulas Patocka 91348debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 91448debafeSMikulas Patocka persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); 91548debafeSMikulas Patocka 91648debafeSMikulas Patocka wc->tree = RB_ROOT; 91748debafeSMikulas Patocka INIT_LIST_HEAD(&wc->lru); 91848debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 91948debafeSMikulas Patocka wc->freetree = RB_ROOT; 92048debafeSMikulas Patocka wc->current_free = NULL; 92148debafeSMikulas Patocka } else { 92248debafeSMikulas Patocka INIT_LIST_HEAD(&wc->freelist); 92348debafeSMikulas Patocka } 92448debafeSMikulas Patocka wc->freelist_size = 0; 92548debafeSMikulas Patocka 92648debafeSMikulas Patocka r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t)); 92748debafeSMikulas Patocka if (r) { 92848debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading superblock: %d", r); 92948debafeSMikulas Patocka sb_seq_count = cpu_to_le64(0); 93048debafeSMikulas Patocka } 93148debafeSMikulas Patocka wc->seq_count = le64_to_cpu(sb_seq_count); 93248debafeSMikulas Patocka 93348debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 93448debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 93548debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 93648debafeSMikulas Patocka struct wc_memory_entry wme; 93748debafeSMikulas Patocka if (writecache_has_error(wc)) { 93848debafeSMikulas Patocka e->original_sector = -1; 93948debafeSMikulas Patocka e->seq_count = -1; 94048debafeSMikulas Patocka continue; 94148debafeSMikulas Patocka } 94248debafeSMikulas Patocka r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 94348debafeSMikulas Patocka if (r) { 94448debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", 94548debafeSMikulas Patocka (unsigned long)b, r); 94648debafeSMikulas Patocka e->original_sector = -1; 94748debafeSMikulas Patocka e->seq_count = -1; 94848debafeSMikulas Patocka } else { 94948debafeSMikulas Patocka e->original_sector = le64_to_cpu(wme.original_sector); 95048debafeSMikulas Patocka e->seq_count = le64_to_cpu(wme.seq_count); 95148debafeSMikulas Patocka } 95248debafeSMikulas Patocka } 95348debafeSMikulas Patocka #endif 95448debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 95548debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 95648debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) { 95748debafeSMikulas Patocka if (read_seq_count(wc, e) != -1) { 95848debafeSMikulas Patocka erase_this: 95948debafeSMikulas Patocka clear_seq_count(wc, e); 96048debafeSMikulas Patocka need_flush = true; 96148debafeSMikulas Patocka } 96248debafeSMikulas Patocka writecache_add_to_freelist(wc, e); 96348debafeSMikulas Patocka } else { 96448debafeSMikulas Patocka struct wc_entry *old; 96548debafeSMikulas Patocka 96648debafeSMikulas Patocka old = writecache_find_entry(wc, read_original_sector(wc, e), 0); 96748debafeSMikulas Patocka if (!old) { 96848debafeSMikulas Patocka writecache_insert_entry(wc, e); 96948debafeSMikulas Patocka } else { 97048debafeSMikulas Patocka if (read_seq_count(wc, old) == read_seq_count(wc, e)) { 97148debafeSMikulas Patocka writecache_error(wc, -EINVAL, 97248debafeSMikulas Patocka "two identical entries, position %llu, sector %llu, sequence %llu", 97348debafeSMikulas Patocka (unsigned long long)b, (unsigned long long)read_original_sector(wc, e), 97448debafeSMikulas Patocka (unsigned long long)read_seq_count(wc, e)); 97548debafeSMikulas Patocka } 97648debafeSMikulas Patocka if (read_seq_count(wc, old) > read_seq_count(wc, e)) { 97748debafeSMikulas Patocka goto erase_this; 97848debafeSMikulas Patocka } else { 97948debafeSMikulas Patocka writecache_free_entry(wc, old); 98048debafeSMikulas Patocka writecache_insert_entry(wc, e); 98148debafeSMikulas Patocka need_flush = true; 98248debafeSMikulas Patocka } 98348debafeSMikulas Patocka } 98448debafeSMikulas Patocka } 98548debafeSMikulas Patocka cond_resched(); 98648debafeSMikulas Patocka } 98748debafeSMikulas Patocka 98848debafeSMikulas Patocka if (need_flush) { 98948debafeSMikulas Patocka writecache_flush_all_metadata(wc); 990aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 99148debafeSMikulas Patocka } 99248debafeSMikulas Patocka 99341c526c5SMikulas Patocka writecache_verify_watermark(wc); 99441c526c5SMikulas Patocka 995*3923d485SMikulas Patocka if (wc->max_age != MAX_AGE_UNSPECIFIED) 996*3923d485SMikulas Patocka mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); 997*3923d485SMikulas Patocka 99848debafeSMikulas Patocka wc_unlock(wc); 99948debafeSMikulas Patocka } 100048debafeSMikulas Patocka 100148debafeSMikulas Patocka static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 100248debafeSMikulas Patocka { 100348debafeSMikulas Patocka if (argc != 1) 100448debafeSMikulas Patocka return -EINVAL; 100548debafeSMikulas Patocka 100648debafeSMikulas Patocka wc_lock(wc); 100748debafeSMikulas Patocka if (dm_suspended(wc->ti)) { 100848debafeSMikulas Patocka wc_unlock(wc); 100948debafeSMikulas Patocka return -EBUSY; 101048debafeSMikulas Patocka } 101148debafeSMikulas Patocka if (writecache_has_error(wc)) { 101248debafeSMikulas Patocka wc_unlock(wc); 101348debafeSMikulas Patocka return -EIO; 101448debafeSMikulas Patocka } 101548debafeSMikulas Patocka 101648debafeSMikulas Patocka writecache_flush(wc); 101748debafeSMikulas Patocka wc->writeback_all++; 101848debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 101948debafeSMikulas Patocka wc_unlock(wc); 102048debafeSMikulas Patocka 102148debafeSMikulas Patocka flush_workqueue(wc->writeback_wq); 102248debafeSMikulas Patocka 102348debafeSMikulas Patocka wc_lock(wc); 102448debafeSMikulas Patocka wc->writeback_all--; 102548debafeSMikulas Patocka if (writecache_has_error(wc)) { 102648debafeSMikulas Patocka wc_unlock(wc); 102748debafeSMikulas Patocka return -EIO; 102848debafeSMikulas Patocka } 102948debafeSMikulas Patocka wc_unlock(wc); 103048debafeSMikulas Patocka 103148debafeSMikulas Patocka return 0; 103248debafeSMikulas Patocka } 103348debafeSMikulas Patocka 103448debafeSMikulas Patocka static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 103548debafeSMikulas Patocka { 103648debafeSMikulas Patocka if (argc != 1) 103748debafeSMikulas Patocka return -EINVAL; 103848debafeSMikulas Patocka 103948debafeSMikulas Patocka wc_lock(wc); 104048debafeSMikulas Patocka wc->flush_on_suspend = true; 104148debafeSMikulas Patocka wc_unlock(wc); 104248debafeSMikulas Patocka 104348debafeSMikulas Patocka return 0; 104448debafeSMikulas Patocka } 104548debafeSMikulas Patocka 104693de44ebSMikulas Patocka static void activate_cleaner(struct dm_writecache *wc) 104793de44ebSMikulas Patocka { 104893de44ebSMikulas Patocka wc->flush_on_suspend = true; 104993de44ebSMikulas Patocka wc->cleaner = true; 105093de44ebSMikulas Patocka wc->freelist_high_watermark = wc->n_blocks; 105193de44ebSMikulas Patocka wc->freelist_low_watermark = wc->n_blocks; 105293de44ebSMikulas Patocka } 105393de44ebSMikulas Patocka 105493de44ebSMikulas Patocka static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 105593de44ebSMikulas Patocka { 105693de44ebSMikulas Patocka if (argc != 1) 105793de44ebSMikulas Patocka return -EINVAL; 105893de44ebSMikulas Patocka 105993de44ebSMikulas Patocka wc_lock(wc); 106093de44ebSMikulas Patocka activate_cleaner(wc); 106193de44ebSMikulas Patocka if (!dm_suspended(wc->ti)) 106293de44ebSMikulas Patocka writecache_verify_watermark(wc); 106393de44ebSMikulas Patocka wc_unlock(wc); 106493de44ebSMikulas Patocka 106593de44ebSMikulas Patocka return 0; 106693de44ebSMikulas Patocka } 106793de44ebSMikulas Patocka 106848debafeSMikulas Patocka static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, 106948debafeSMikulas Patocka char *result, unsigned maxlen) 107048debafeSMikulas Patocka { 107148debafeSMikulas Patocka int r = -EINVAL; 107248debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 107348debafeSMikulas Patocka 107448debafeSMikulas Patocka if (!strcasecmp(argv[0], "flush")) 107548debafeSMikulas Patocka r = process_flush_mesg(argc, argv, wc); 107648debafeSMikulas Patocka else if (!strcasecmp(argv[0], "flush_on_suspend")) 107748debafeSMikulas Patocka r = process_flush_on_suspend_mesg(argc, argv, wc); 107893de44ebSMikulas Patocka else if (!strcasecmp(argv[0], "cleaner")) 107993de44ebSMikulas Patocka r = process_cleaner_mesg(argc, argv, wc); 108048debafeSMikulas Patocka else 108148debafeSMikulas Patocka DMERR("unrecognised message received: %s", argv[0]); 108248debafeSMikulas Patocka 108348debafeSMikulas Patocka return r; 108448debafeSMikulas Patocka } 108548debafeSMikulas Patocka 108648debafeSMikulas Patocka static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data) 108748debafeSMikulas Patocka { 108848debafeSMikulas Patocka void *buf; 108948debafeSMikulas Patocka unsigned long flags; 109048debafeSMikulas Patocka unsigned size; 109148debafeSMikulas Patocka int rw = bio_data_dir(bio); 109248debafeSMikulas Patocka unsigned remaining_size = wc->block_size; 109348debafeSMikulas Patocka 109448debafeSMikulas Patocka do { 109548debafeSMikulas Patocka struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); 109648debafeSMikulas Patocka buf = bvec_kmap_irq(&bv, &flags); 109748debafeSMikulas Patocka size = bv.bv_len; 109848debafeSMikulas Patocka if (unlikely(size > remaining_size)) 109948debafeSMikulas Patocka size = remaining_size; 110048debafeSMikulas Patocka 110148debafeSMikulas Patocka if (rw == READ) { 110248debafeSMikulas Patocka int r; 110348debafeSMikulas Patocka r = memcpy_mcsafe(buf, data, size); 110448debafeSMikulas Patocka flush_dcache_page(bio_page(bio)); 110548debafeSMikulas Patocka if (unlikely(r)) { 110648debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading data: %d", r); 110748debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 110848debafeSMikulas Patocka } 110948debafeSMikulas Patocka } else { 111048debafeSMikulas Patocka flush_dcache_page(bio_page(bio)); 111148debafeSMikulas Patocka memcpy_flushcache(data, buf, size); 111248debafeSMikulas Patocka } 111348debafeSMikulas Patocka 111448debafeSMikulas Patocka bvec_kunmap_irq(buf, &flags); 111548debafeSMikulas Patocka 111648debafeSMikulas Patocka data = (char *)data + size; 111748debafeSMikulas Patocka remaining_size -= size; 111848debafeSMikulas Patocka bio_advance(bio, size); 111948debafeSMikulas Patocka } while (unlikely(remaining_size)); 112048debafeSMikulas Patocka } 112148debafeSMikulas Patocka 112248debafeSMikulas Patocka static int writecache_flush_thread(void *data) 112348debafeSMikulas Patocka { 112448debafeSMikulas Patocka struct dm_writecache *wc = data; 112548debafeSMikulas Patocka 112648debafeSMikulas Patocka while (1) { 112748debafeSMikulas Patocka struct bio *bio; 112848debafeSMikulas Patocka 112948debafeSMikulas Patocka wc_lock(wc); 113048debafeSMikulas Patocka bio = bio_list_pop(&wc->flush_list); 113148debafeSMikulas Patocka if (!bio) { 113248debafeSMikulas Patocka set_current_state(TASK_INTERRUPTIBLE); 113348debafeSMikulas Patocka wc_unlock(wc); 113448debafeSMikulas Patocka 113548debafeSMikulas Patocka if (unlikely(kthread_should_stop())) { 113648debafeSMikulas Patocka set_current_state(TASK_RUNNING); 113748debafeSMikulas Patocka break; 113848debafeSMikulas Patocka } 113948debafeSMikulas Patocka 114048debafeSMikulas Patocka schedule(); 114148debafeSMikulas Patocka continue; 114248debafeSMikulas Patocka } 114348debafeSMikulas Patocka 114448debafeSMikulas Patocka if (bio_op(bio) == REQ_OP_DISCARD) { 114548debafeSMikulas Patocka writecache_discard(wc, bio->bi_iter.bi_sector, 114648debafeSMikulas Patocka bio_end_sector(bio)); 114748debafeSMikulas Patocka wc_unlock(wc); 114848debafeSMikulas Patocka bio_set_dev(bio, wc->dev->bdev); 114948debafeSMikulas Patocka generic_make_request(bio); 115048debafeSMikulas Patocka } else { 115148debafeSMikulas Patocka writecache_flush(wc); 115248debafeSMikulas Patocka wc_unlock(wc); 115348debafeSMikulas Patocka if (writecache_has_error(wc)) 115448debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 115548debafeSMikulas Patocka bio_endio(bio); 115648debafeSMikulas Patocka } 115748debafeSMikulas Patocka } 115848debafeSMikulas Patocka 115948debafeSMikulas Patocka return 0; 116048debafeSMikulas Patocka } 116148debafeSMikulas Patocka 116248debafeSMikulas Patocka static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio) 116348debafeSMikulas Patocka { 116448debafeSMikulas Patocka if (bio_list_empty(&wc->flush_list)) 116548debafeSMikulas Patocka wake_up_process(wc->flush_thread); 116648debafeSMikulas Patocka bio_list_add(&wc->flush_list, bio); 116748debafeSMikulas Patocka } 116848debafeSMikulas Patocka 116948debafeSMikulas Patocka static int writecache_map(struct dm_target *ti, struct bio *bio) 117048debafeSMikulas Patocka { 117148debafeSMikulas Patocka struct wc_entry *e; 117248debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 117348debafeSMikulas Patocka 117448debafeSMikulas Patocka bio->bi_private = NULL; 117548debafeSMikulas Patocka 117648debafeSMikulas Patocka wc_lock(wc); 117748debafeSMikulas Patocka 117848debafeSMikulas Patocka if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 117948debafeSMikulas Patocka if (writecache_has_error(wc)) 118048debafeSMikulas Patocka goto unlock_error; 118148debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 118248debafeSMikulas Patocka writecache_flush(wc); 118348debafeSMikulas Patocka if (writecache_has_error(wc)) 118448debafeSMikulas Patocka goto unlock_error; 118548debafeSMikulas Patocka goto unlock_submit; 118648debafeSMikulas Patocka } else { 118748debafeSMikulas Patocka writecache_offload_bio(wc, bio); 118848debafeSMikulas Patocka goto unlock_return; 118948debafeSMikulas Patocka } 119048debafeSMikulas Patocka } 119148debafeSMikulas Patocka 119248debafeSMikulas Patocka bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 119348debafeSMikulas Patocka 119448debafeSMikulas Patocka if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & 119548debafeSMikulas Patocka (wc->block_size / 512 - 1)) != 0)) { 119648debafeSMikulas Patocka DMERR("I/O is not aligned, sector %llu, size %u, block size %u", 119748debafeSMikulas Patocka (unsigned long long)bio->bi_iter.bi_sector, 119848debafeSMikulas Patocka bio->bi_iter.bi_size, wc->block_size); 119948debafeSMikulas Patocka goto unlock_error; 120048debafeSMikulas Patocka } 120148debafeSMikulas Patocka 120248debafeSMikulas Patocka if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { 120348debafeSMikulas Patocka if (writecache_has_error(wc)) 120448debafeSMikulas Patocka goto unlock_error; 120548debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 120648debafeSMikulas Patocka writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); 120748debafeSMikulas Patocka goto unlock_remap_origin; 120848debafeSMikulas Patocka } else { 120948debafeSMikulas Patocka writecache_offload_bio(wc, bio); 121048debafeSMikulas Patocka goto unlock_return; 121148debafeSMikulas Patocka } 121248debafeSMikulas Patocka } 121348debafeSMikulas Patocka 121448debafeSMikulas Patocka if (bio_data_dir(bio) == READ) { 121548debafeSMikulas Patocka read_next_block: 121648debafeSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 121748debafeSMikulas Patocka if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { 121848debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 121948debafeSMikulas Patocka bio_copy_block(wc, bio, memory_data(wc, e)); 122048debafeSMikulas Patocka if (bio->bi_iter.bi_size) 122148debafeSMikulas Patocka goto read_next_block; 122248debafeSMikulas Patocka goto unlock_submit; 122348debafeSMikulas Patocka } else { 122448debafeSMikulas Patocka dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); 122548debafeSMikulas Patocka bio_set_dev(bio, wc->ssd_dev->bdev); 122648debafeSMikulas Patocka bio->bi_iter.bi_sector = cache_sector(wc, e); 122748debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) 122848debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 122948debafeSMikulas Patocka goto unlock_remap; 123048debafeSMikulas Patocka } 123148debafeSMikulas Patocka } else { 123248debafeSMikulas Patocka if (e) { 123348debafeSMikulas Patocka sector_t next_boundary = 123448debafeSMikulas Patocka read_original_sector(wc, e) - bio->bi_iter.bi_sector; 123548debafeSMikulas Patocka if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { 123648debafeSMikulas Patocka dm_accept_partial_bio(bio, next_boundary); 123748debafeSMikulas Patocka } 123848debafeSMikulas Patocka } 123948debafeSMikulas Patocka goto unlock_remap_origin; 124048debafeSMikulas Patocka } 124148debafeSMikulas Patocka } else { 124248debafeSMikulas Patocka do { 1243d53f1fafSMikulas Patocka bool found_entry = false; 124448debafeSMikulas Patocka if (writecache_has_error(wc)) 124548debafeSMikulas Patocka goto unlock_error; 124648debafeSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); 124748debafeSMikulas Patocka if (e) { 124848debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) 124948debafeSMikulas Patocka goto bio_copy; 125048debafeSMikulas Patocka if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { 125148debafeSMikulas Patocka wc->overwrote_committed = true; 125248debafeSMikulas Patocka goto bio_copy; 125348debafeSMikulas Patocka } 1254d53f1fafSMikulas Patocka found_entry = true; 125593de44ebSMikulas Patocka } else { 125693de44ebSMikulas Patocka if (unlikely(wc->cleaner)) 125793de44ebSMikulas Patocka goto direct_write; 125848debafeSMikulas Patocka } 1259dcd19507SMikulas Patocka e = writecache_pop_from_freelist(wc, (sector_t)-1); 126048debafeSMikulas Patocka if (unlikely(!e)) { 1261d53f1fafSMikulas Patocka if (!found_entry) { 126293de44ebSMikulas Patocka direct_write: 1263d53f1fafSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 1264d53f1fafSMikulas Patocka if (e) { 1265d53f1fafSMikulas Patocka sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; 1266d53f1fafSMikulas Patocka BUG_ON(!next_boundary); 1267d53f1fafSMikulas Patocka if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { 1268d53f1fafSMikulas Patocka dm_accept_partial_bio(bio, next_boundary); 1269d53f1fafSMikulas Patocka } 1270d53f1fafSMikulas Patocka } 1271d53f1fafSMikulas Patocka goto unlock_remap_origin; 1272d53f1fafSMikulas Patocka } 127348debafeSMikulas Patocka writecache_wait_on_freelist(wc); 127448debafeSMikulas Patocka continue; 127548debafeSMikulas Patocka } 127648debafeSMikulas Patocka write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); 127748debafeSMikulas Patocka writecache_insert_entry(wc, e); 127848debafeSMikulas Patocka wc->uncommitted_blocks++; 127948debafeSMikulas Patocka bio_copy: 128048debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 128148debafeSMikulas Patocka bio_copy_block(wc, bio, memory_data(wc, e)); 128248debafeSMikulas Patocka } else { 1283dcd19507SMikulas Patocka unsigned bio_size = wc->block_size; 1284dcd19507SMikulas Patocka sector_t start_cache_sec = cache_sector(wc, e); 1285dcd19507SMikulas Patocka sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); 1286dcd19507SMikulas Patocka 1287dcd19507SMikulas Patocka while (bio_size < bio->bi_iter.bi_size) { 1288dcd19507SMikulas Patocka struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); 1289dcd19507SMikulas Patocka if (!f) 1290dcd19507SMikulas Patocka break; 1291dcd19507SMikulas Patocka write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + 1292dcd19507SMikulas Patocka (bio_size >> SECTOR_SHIFT), wc->seq_count); 1293dcd19507SMikulas Patocka writecache_insert_entry(wc, f); 1294dcd19507SMikulas Patocka wc->uncommitted_blocks++; 1295dcd19507SMikulas Patocka bio_size += wc->block_size; 1296dcd19507SMikulas Patocka current_cache_sec += wc->block_size >> SECTOR_SHIFT; 1297dcd19507SMikulas Patocka } 1298dcd19507SMikulas Patocka 129948debafeSMikulas Patocka bio_set_dev(bio, wc->ssd_dev->bdev); 1300dcd19507SMikulas Patocka bio->bi_iter.bi_sector = start_cache_sec; 1301dcd19507SMikulas Patocka dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); 1302dcd19507SMikulas Patocka 130348debafeSMikulas Patocka if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { 130448debafeSMikulas Patocka wc->uncommitted_blocks = 0; 130548debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->flush_work); 130648debafeSMikulas Patocka } else { 130748debafeSMikulas Patocka writecache_schedule_autocommit(wc); 130848debafeSMikulas Patocka } 130948debafeSMikulas Patocka goto unlock_remap; 131048debafeSMikulas Patocka } 131148debafeSMikulas Patocka } while (bio->bi_iter.bi_size); 131248debafeSMikulas Patocka 1313c1005322SMaged Mokhtar if (unlikely(bio->bi_opf & REQ_FUA || 1314c1005322SMaged Mokhtar wc->uncommitted_blocks >= wc->autocommit_blocks)) 131548debafeSMikulas Patocka writecache_flush(wc); 131648debafeSMikulas Patocka else 131748debafeSMikulas Patocka writecache_schedule_autocommit(wc); 131848debafeSMikulas Patocka goto unlock_submit; 131948debafeSMikulas Patocka } 132048debafeSMikulas Patocka 132148debafeSMikulas Patocka unlock_remap_origin: 132248debafeSMikulas Patocka bio_set_dev(bio, wc->dev->bdev); 132348debafeSMikulas Patocka wc_unlock(wc); 132448debafeSMikulas Patocka return DM_MAPIO_REMAPPED; 132548debafeSMikulas Patocka 132648debafeSMikulas Patocka unlock_remap: 132748debafeSMikulas Patocka /* make sure that writecache_end_io decrements bio_in_progress: */ 132848debafeSMikulas Patocka bio->bi_private = (void *)1; 132948debafeSMikulas Patocka atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); 133048debafeSMikulas Patocka wc_unlock(wc); 133148debafeSMikulas Patocka return DM_MAPIO_REMAPPED; 133248debafeSMikulas Patocka 133348debafeSMikulas Patocka unlock_submit: 133448debafeSMikulas Patocka wc_unlock(wc); 133548debafeSMikulas Patocka bio_endio(bio); 133648debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 133748debafeSMikulas Patocka 133848debafeSMikulas Patocka unlock_return: 133948debafeSMikulas Patocka wc_unlock(wc); 134048debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 134148debafeSMikulas Patocka 134248debafeSMikulas Patocka unlock_error: 134348debafeSMikulas Patocka wc_unlock(wc); 134448debafeSMikulas Patocka bio_io_error(bio); 134548debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 134648debafeSMikulas Patocka } 134748debafeSMikulas Patocka 134848debafeSMikulas Patocka static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) 134948debafeSMikulas Patocka { 135048debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 135148debafeSMikulas Patocka 135248debafeSMikulas Patocka if (bio->bi_private != NULL) { 135348debafeSMikulas Patocka int dir = bio_data_dir(bio); 135448debafeSMikulas Patocka if (atomic_dec_and_test(&wc->bio_in_progress[dir])) 135548debafeSMikulas Patocka if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir]))) 135648debafeSMikulas Patocka wake_up(&wc->bio_in_progress_wait[dir]); 135748debafeSMikulas Patocka } 135848debafeSMikulas Patocka return 0; 135948debafeSMikulas Patocka } 136048debafeSMikulas Patocka 136148debafeSMikulas Patocka static int writecache_iterate_devices(struct dm_target *ti, 136248debafeSMikulas Patocka iterate_devices_callout_fn fn, void *data) 136348debafeSMikulas Patocka { 136448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 136548debafeSMikulas Patocka 136648debafeSMikulas Patocka return fn(ti, wc->dev, 0, ti->len, data); 136748debafeSMikulas Patocka } 136848debafeSMikulas Patocka 136948debafeSMikulas Patocka static void writecache_io_hints(struct dm_target *ti, struct queue_limits *limits) 137048debafeSMikulas Patocka { 137148debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 137248debafeSMikulas Patocka 137348debafeSMikulas Patocka if (limits->logical_block_size < wc->block_size) 137448debafeSMikulas Patocka limits->logical_block_size = wc->block_size; 137548debafeSMikulas Patocka 137648debafeSMikulas Patocka if (limits->physical_block_size < wc->block_size) 137748debafeSMikulas Patocka limits->physical_block_size = wc->block_size; 137848debafeSMikulas Patocka 137948debafeSMikulas Patocka if (limits->io_min < wc->block_size) 138048debafeSMikulas Patocka limits->io_min = wc->block_size; 138148debafeSMikulas Patocka } 138248debafeSMikulas Patocka 138348debafeSMikulas Patocka 138448debafeSMikulas Patocka static void writecache_writeback_endio(struct bio *bio) 138548debafeSMikulas Patocka { 138648debafeSMikulas Patocka struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio); 138748debafeSMikulas Patocka struct dm_writecache *wc = wb->wc; 138848debafeSMikulas Patocka unsigned long flags; 138948debafeSMikulas Patocka 139048debafeSMikulas Patocka raw_spin_lock_irqsave(&wc->endio_list_lock, flags); 139148debafeSMikulas Patocka if (unlikely(list_empty(&wc->endio_list))) 139248debafeSMikulas Patocka wake_up_process(wc->endio_thread); 139348debafeSMikulas Patocka list_add_tail(&wb->endio_entry, &wc->endio_list); 139448debafeSMikulas Patocka raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags); 139548debafeSMikulas Patocka } 139648debafeSMikulas Patocka 139748debafeSMikulas Patocka static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr) 139848debafeSMikulas Patocka { 139948debafeSMikulas Patocka struct copy_struct *c = ptr; 140048debafeSMikulas Patocka struct dm_writecache *wc = c->wc; 140148debafeSMikulas Patocka 140248debafeSMikulas Patocka c->error = likely(!(read_err | write_err)) ? 0 : -EIO; 140348debafeSMikulas Patocka 140448debafeSMikulas Patocka raw_spin_lock_irq(&wc->endio_list_lock); 140548debafeSMikulas Patocka if (unlikely(list_empty(&wc->endio_list))) 140648debafeSMikulas Patocka wake_up_process(wc->endio_thread); 140748debafeSMikulas Patocka list_add_tail(&c->endio_entry, &wc->endio_list); 140848debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 140948debafeSMikulas Patocka } 141048debafeSMikulas Patocka 141148debafeSMikulas Patocka static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list) 141248debafeSMikulas Patocka { 141348debafeSMikulas Patocka unsigned i; 141448debafeSMikulas Patocka struct writeback_struct *wb; 141548debafeSMikulas Patocka struct wc_entry *e; 141648debafeSMikulas Patocka unsigned long n_walked = 0; 141748debafeSMikulas Patocka 141848debafeSMikulas Patocka do { 141948debafeSMikulas Patocka wb = list_entry(list->next, struct writeback_struct, endio_entry); 142048debafeSMikulas Patocka list_del(&wb->endio_entry); 142148debafeSMikulas Patocka 142248debafeSMikulas Patocka if (unlikely(wb->bio.bi_status != BLK_STS_OK)) 142348debafeSMikulas Patocka writecache_error(wc, blk_status_to_errno(wb->bio.bi_status), 142448debafeSMikulas Patocka "write error %d", wb->bio.bi_status); 142548debafeSMikulas Patocka i = 0; 142648debafeSMikulas Patocka do { 142748debafeSMikulas Patocka e = wb->wc_list[i]; 142848debafeSMikulas Patocka BUG_ON(!e->write_in_progress); 142948debafeSMikulas Patocka e->write_in_progress = false; 143048debafeSMikulas Patocka INIT_LIST_HEAD(&e->lru); 143148debafeSMikulas Patocka if (!writecache_has_error(wc)) 143248debafeSMikulas Patocka writecache_free_entry(wc, e); 143348debafeSMikulas Patocka BUG_ON(!wc->writeback_size); 143448debafeSMikulas Patocka wc->writeback_size--; 143548debafeSMikulas Patocka n_walked++; 143648debafeSMikulas Patocka if (unlikely(n_walked >= ENDIO_LATENCY)) { 1437aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 143848debafeSMikulas Patocka wc_unlock(wc); 143948debafeSMikulas Patocka wc_lock(wc); 144048debafeSMikulas Patocka n_walked = 0; 144148debafeSMikulas Patocka } 144248debafeSMikulas Patocka } while (++i < wb->wc_list_n); 144348debafeSMikulas Patocka 144448debafeSMikulas Patocka if (wb->wc_list != wb->wc_list_inline) 144548debafeSMikulas Patocka kfree(wb->wc_list); 144648debafeSMikulas Patocka bio_put(&wb->bio); 144748debafeSMikulas Patocka } while (!list_empty(list)); 144848debafeSMikulas Patocka } 144948debafeSMikulas Patocka 145048debafeSMikulas Patocka static void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list) 145148debafeSMikulas Patocka { 145248debafeSMikulas Patocka struct copy_struct *c; 145348debafeSMikulas Patocka struct wc_entry *e; 145448debafeSMikulas Patocka 145548debafeSMikulas Patocka do { 145648debafeSMikulas Patocka c = list_entry(list->next, struct copy_struct, endio_entry); 145748debafeSMikulas Patocka list_del(&c->endio_entry); 145848debafeSMikulas Patocka 145948debafeSMikulas Patocka if (unlikely(c->error)) 146048debafeSMikulas Patocka writecache_error(wc, c->error, "copy error"); 146148debafeSMikulas Patocka 146248debafeSMikulas Patocka e = c->e; 146348debafeSMikulas Patocka do { 146448debafeSMikulas Patocka BUG_ON(!e->write_in_progress); 146548debafeSMikulas Patocka e->write_in_progress = false; 146648debafeSMikulas Patocka INIT_LIST_HEAD(&e->lru); 146748debafeSMikulas Patocka if (!writecache_has_error(wc)) 146848debafeSMikulas Patocka writecache_free_entry(wc, e); 146948debafeSMikulas Patocka 147048debafeSMikulas Patocka BUG_ON(!wc->writeback_size); 147148debafeSMikulas Patocka wc->writeback_size--; 147248debafeSMikulas Patocka e++; 147348debafeSMikulas Patocka } while (--c->n_entries); 147448debafeSMikulas Patocka mempool_free(c, &wc->copy_pool); 147548debafeSMikulas Patocka } while (!list_empty(list)); 147648debafeSMikulas Patocka } 147748debafeSMikulas Patocka 147848debafeSMikulas Patocka static int writecache_endio_thread(void *data) 147948debafeSMikulas Patocka { 148048debafeSMikulas Patocka struct dm_writecache *wc = data; 148148debafeSMikulas Patocka 148248debafeSMikulas Patocka while (1) { 148348debafeSMikulas Patocka struct list_head list; 148448debafeSMikulas Patocka 148548debafeSMikulas Patocka raw_spin_lock_irq(&wc->endio_list_lock); 148648debafeSMikulas Patocka if (!list_empty(&wc->endio_list)) 148748debafeSMikulas Patocka goto pop_from_list; 148848debafeSMikulas Patocka set_current_state(TASK_INTERRUPTIBLE); 148948debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 149048debafeSMikulas Patocka 149148debafeSMikulas Patocka if (unlikely(kthread_should_stop())) { 149248debafeSMikulas Patocka set_current_state(TASK_RUNNING); 149348debafeSMikulas Patocka break; 149448debafeSMikulas Patocka } 149548debafeSMikulas Patocka 149648debafeSMikulas Patocka schedule(); 149748debafeSMikulas Patocka 149848debafeSMikulas Patocka continue; 149948debafeSMikulas Patocka 150048debafeSMikulas Patocka pop_from_list: 150148debafeSMikulas Patocka list = wc->endio_list; 150248debafeSMikulas Patocka list.next->prev = list.prev->next = &list; 150348debafeSMikulas Patocka INIT_LIST_HEAD(&wc->endio_list); 150448debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 150548debafeSMikulas Patocka 150648debafeSMikulas Patocka if (!WC_MODE_FUA(wc)) 150748debafeSMikulas Patocka writecache_disk_flush(wc, wc->dev); 150848debafeSMikulas Patocka 150948debafeSMikulas Patocka wc_lock(wc); 151048debafeSMikulas Patocka 151148debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 151248debafeSMikulas Patocka __writecache_endio_pmem(wc, &list); 151348debafeSMikulas Patocka } else { 151448debafeSMikulas Patocka __writecache_endio_ssd(wc, &list); 151548debafeSMikulas Patocka writecache_wait_for_ios(wc, READ); 151648debafeSMikulas Patocka } 151748debafeSMikulas Patocka 1518aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 151948debafeSMikulas Patocka 152048debafeSMikulas Patocka wc_unlock(wc); 152148debafeSMikulas Patocka } 152248debafeSMikulas Patocka 152348debafeSMikulas Patocka return 0; 152448debafeSMikulas Patocka } 152548debafeSMikulas Patocka 152648debafeSMikulas Patocka static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e, gfp_t gfp) 152748debafeSMikulas Patocka { 152848debafeSMikulas Patocka struct dm_writecache *wc = wb->wc; 152948debafeSMikulas Patocka unsigned block_size = wc->block_size; 153048debafeSMikulas Patocka void *address = memory_data(wc, e); 153148debafeSMikulas Patocka 153248debafeSMikulas Patocka persistent_memory_flush_cache(address, block_size); 153348debafeSMikulas Patocka return bio_add_page(&wb->bio, persistent_memory_page(address), 153448debafeSMikulas Patocka block_size, persistent_memory_page_offset(address)) != 0; 153548debafeSMikulas Patocka } 153648debafeSMikulas Patocka 153748debafeSMikulas Patocka struct writeback_list { 153848debafeSMikulas Patocka struct list_head list; 153948debafeSMikulas Patocka size_t size; 154048debafeSMikulas Patocka }; 154148debafeSMikulas Patocka 154248debafeSMikulas Patocka static void __writeback_throttle(struct dm_writecache *wc, struct writeback_list *wbl) 154348debafeSMikulas Patocka { 154448debafeSMikulas Patocka if (unlikely(wc->max_writeback_jobs)) { 154548debafeSMikulas Patocka if (READ_ONCE(wc->writeback_size) - wbl->size >= wc->max_writeback_jobs) { 154648debafeSMikulas Patocka wc_lock(wc); 154748debafeSMikulas Patocka while (wc->writeback_size - wbl->size >= wc->max_writeback_jobs) 154848debafeSMikulas Patocka writecache_wait_on_freelist(wc); 154948debafeSMikulas Patocka wc_unlock(wc); 155048debafeSMikulas Patocka } 155148debafeSMikulas Patocka } 155248debafeSMikulas Patocka cond_resched(); 155348debafeSMikulas Patocka } 155448debafeSMikulas Patocka 155548debafeSMikulas Patocka static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl) 155648debafeSMikulas Patocka { 155748debafeSMikulas Patocka struct wc_entry *e, *f; 155848debafeSMikulas Patocka struct bio *bio; 155948debafeSMikulas Patocka struct writeback_struct *wb; 156048debafeSMikulas Patocka unsigned max_pages; 156148debafeSMikulas Patocka 156248debafeSMikulas Patocka while (wbl->size) { 156348debafeSMikulas Patocka wbl->size--; 156448debafeSMikulas Patocka e = container_of(wbl->list.prev, struct wc_entry, lru); 156548debafeSMikulas Patocka list_del(&e->lru); 156648debafeSMikulas Patocka 156748debafeSMikulas Patocka max_pages = e->wc_list_contiguous; 156848debafeSMikulas Patocka 156948debafeSMikulas Patocka bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set); 157048debafeSMikulas Patocka wb = container_of(bio, struct writeback_struct, bio); 157148debafeSMikulas Patocka wb->wc = wc; 157209f2d656SHuaisheng Ye bio->bi_end_io = writecache_writeback_endio; 157309f2d656SHuaisheng Ye bio_set_dev(bio, wc->dev->bdev); 157409f2d656SHuaisheng Ye bio->bi_iter.bi_sector = read_original_sector(wc, e); 157548debafeSMikulas Patocka if (max_pages <= WB_LIST_INLINE || 157650a7d3baSKees Cook unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), 157748debafeSMikulas Patocka GFP_NOIO | __GFP_NORETRY | 157848debafeSMikulas Patocka __GFP_NOMEMALLOC | __GFP_NOWARN)))) { 157948debafeSMikulas Patocka wb->wc_list = wb->wc_list_inline; 158048debafeSMikulas Patocka max_pages = WB_LIST_INLINE; 158148debafeSMikulas Patocka } 158248debafeSMikulas Patocka 158348debafeSMikulas Patocka BUG_ON(!wc_add_block(wb, e, GFP_NOIO)); 158448debafeSMikulas Patocka 158548debafeSMikulas Patocka wb->wc_list[0] = e; 158648debafeSMikulas Patocka wb->wc_list_n = 1; 158748debafeSMikulas Patocka 158848debafeSMikulas Patocka while (wbl->size && wb->wc_list_n < max_pages) { 158948debafeSMikulas Patocka f = container_of(wbl->list.prev, struct wc_entry, lru); 159048debafeSMikulas Patocka if (read_original_sector(wc, f) != 159148debafeSMikulas Patocka read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) 159248debafeSMikulas Patocka break; 159348debafeSMikulas Patocka if (!wc_add_block(wb, f, GFP_NOWAIT | __GFP_NOWARN)) 159448debafeSMikulas Patocka break; 159548debafeSMikulas Patocka wbl->size--; 159648debafeSMikulas Patocka list_del(&f->lru); 159748debafeSMikulas Patocka wb->wc_list[wb->wc_list_n++] = f; 159848debafeSMikulas Patocka e = f; 159948debafeSMikulas Patocka } 160009f2d656SHuaisheng Ye bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA); 160148debafeSMikulas Patocka if (writecache_has_error(wc)) { 160248debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 160309f2d656SHuaisheng Ye bio_endio(bio); 160448debafeSMikulas Patocka } else { 160509f2d656SHuaisheng Ye submit_bio(bio); 160648debafeSMikulas Patocka } 160748debafeSMikulas Patocka 160848debafeSMikulas Patocka __writeback_throttle(wc, wbl); 160948debafeSMikulas Patocka } 161048debafeSMikulas Patocka } 161148debafeSMikulas Patocka 161248debafeSMikulas Patocka static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl) 161348debafeSMikulas Patocka { 161448debafeSMikulas Patocka struct wc_entry *e, *f; 161548debafeSMikulas Patocka struct dm_io_region from, to; 161648debafeSMikulas Patocka struct copy_struct *c; 161748debafeSMikulas Patocka 161848debafeSMikulas Patocka while (wbl->size) { 161948debafeSMikulas Patocka unsigned n_sectors; 162048debafeSMikulas Patocka 162148debafeSMikulas Patocka wbl->size--; 162248debafeSMikulas Patocka e = container_of(wbl->list.prev, struct wc_entry, lru); 162348debafeSMikulas Patocka list_del(&e->lru); 162448debafeSMikulas Patocka 162548debafeSMikulas Patocka n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT); 162648debafeSMikulas Patocka 162748debafeSMikulas Patocka from.bdev = wc->ssd_dev->bdev; 162848debafeSMikulas Patocka from.sector = cache_sector(wc, e); 162948debafeSMikulas Patocka from.count = n_sectors; 163048debafeSMikulas Patocka to.bdev = wc->dev->bdev; 163148debafeSMikulas Patocka to.sector = read_original_sector(wc, e); 163248debafeSMikulas Patocka to.count = n_sectors; 163348debafeSMikulas Patocka 163448debafeSMikulas Patocka c = mempool_alloc(&wc->copy_pool, GFP_NOIO); 163548debafeSMikulas Patocka c->wc = wc; 163648debafeSMikulas Patocka c->e = e; 163748debafeSMikulas Patocka c->n_entries = e->wc_list_contiguous; 163848debafeSMikulas Patocka 163948debafeSMikulas Patocka while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) { 164048debafeSMikulas Patocka wbl->size--; 164148debafeSMikulas Patocka f = container_of(wbl->list.prev, struct wc_entry, lru); 164248debafeSMikulas Patocka BUG_ON(f != e + 1); 164348debafeSMikulas Patocka list_del(&f->lru); 164448debafeSMikulas Patocka e = f; 164548debafeSMikulas Patocka } 164648debafeSMikulas Patocka 164748debafeSMikulas Patocka dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c); 164848debafeSMikulas Patocka 164948debafeSMikulas Patocka __writeback_throttle(wc, wbl); 165048debafeSMikulas Patocka } 165148debafeSMikulas Patocka } 165248debafeSMikulas Patocka 165348debafeSMikulas Patocka static void writecache_writeback(struct work_struct *work) 165448debafeSMikulas Patocka { 165548debafeSMikulas Patocka struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work); 165648debafeSMikulas Patocka struct blk_plug plug; 16578dd85873SMikulas Patocka struct wc_entry *f, *uninitialized_var(g), *e = NULL; 165848debafeSMikulas Patocka struct rb_node *node, *next_node; 165948debafeSMikulas Patocka struct list_head skipped; 166048debafeSMikulas Patocka struct writeback_list wbl; 166148debafeSMikulas Patocka unsigned long n_walked; 166248debafeSMikulas Patocka 166348debafeSMikulas Patocka wc_lock(wc); 166448debafeSMikulas Patocka restart: 166548debafeSMikulas Patocka if (writecache_has_error(wc)) { 166648debafeSMikulas Patocka wc_unlock(wc); 166748debafeSMikulas Patocka return; 166848debafeSMikulas Patocka } 166948debafeSMikulas Patocka 167048debafeSMikulas Patocka if (unlikely(wc->writeback_all)) { 167148debafeSMikulas Patocka if (writecache_wait_for_writeback(wc)) 167248debafeSMikulas Patocka goto restart; 167348debafeSMikulas Patocka } 167448debafeSMikulas Patocka 167548debafeSMikulas Patocka if (wc->overwrote_committed) { 167648debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 167748debafeSMikulas Patocka } 167848debafeSMikulas Patocka 167948debafeSMikulas Patocka n_walked = 0; 168048debafeSMikulas Patocka INIT_LIST_HEAD(&skipped); 168148debafeSMikulas Patocka INIT_LIST_HEAD(&wbl.list); 168248debafeSMikulas Patocka wbl.size = 0; 168348debafeSMikulas Patocka while (!list_empty(&wc->lru) && 168448debafeSMikulas Patocka (wc->writeback_all || 1685*3923d485SMikulas Patocka wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark || 1686*3923d485SMikulas Patocka (jiffies - container_of(wc->lru.prev, struct wc_entry, lru)->age >= 1687*3923d485SMikulas Patocka wc->max_age - wc->max_age / MAX_AGE_DIV))) { 168848debafeSMikulas Patocka 168948debafeSMikulas Patocka n_walked++; 169048debafeSMikulas Patocka if (unlikely(n_walked > WRITEBACK_LATENCY) && 169148debafeSMikulas Patocka likely(!wc->writeback_all) && likely(!dm_suspended(wc->ti))) { 169248debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 169348debafeSMikulas Patocka break; 169448debafeSMikulas Patocka } 169548debafeSMikulas Patocka 16965229b489SHuaisheng Ye if (unlikely(wc->writeback_all)) { 16975229b489SHuaisheng Ye if (unlikely(!e)) { 16985229b489SHuaisheng Ye writecache_flush(wc); 16995229b489SHuaisheng Ye e = container_of(rb_first(&wc->tree), struct wc_entry, rb_node); 17005229b489SHuaisheng Ye } else 17015229b489SHuaisheng Ye e = g; 17025229b489SHuaisheng Ye } else 170348debafeSMikulas Patocka e = container_of(wc->lru.prev, struct wc_entry, lru); 170448debafeSMikulas Patocka BUG_ON(e->write_in_progress); 170548debafeSMikulas Patocka if (unlikely(!writecache_entry_is_committed(wc, e))) { 170648debafeSMikulas Patocka writecache_flush(wc); 170748debafeSMikulas Patocka } 170848debafeSMikulas Patocka node = rb_prev(&e->rb_node); 170948debafeSMikulas Patocka if (node) { 171048debafeSMikulas Patocka f = container_of(node, struct wc_entry, rb_node); 171148debafeSMikulas Patocka if (unlikely(read_original_sector(wc, f) == 171248debafeSMikulas Patocka read_original_sector(wc, e))) { 171348debafeSMikulas Patocka BUG_ON(!f->write_in_progress); 171448debafeSMikulas Patocka list_del(&e->lru); 171548debafeSMikulas Patocka list_add(&e->lru, &skipped); 171648debafeSMikulas Patocka cond_resched(); 171748debafeSMikulas Patocka continue; 171848debafeSMikulas Patocka } 171948debafeSMikulas Patocka } 172048debafeSMikulas Patocka wc->writeback_size++; 172148debafeSMikulas Patocka list_del(&e->lru); 172248debafeSMikulas Patocka list_add(&e->lru, &wbl.list); 172348debafeSMikulas Patocka wbl.size++; 172448debafeSMikulas Patocka e->write_in_progress = true; 172548debafeSMikulas Patocka e->wc_list_contiguous = 1; 172648debafeSMikulas Patocka 172748debafeSMikulas Patocka f = e; 172848debafeSMikulas Patocka 172948debafeSMikulas Patocka while (1) { 173048debafeSMikulas Patocka next_node = rb_next(&f->rb_node); 173148debafeSMikulas Patocka if (unlikely(!next_node)) 173248debafeSMikulas Patocka break; 173348debafeSMikulas Patocka g = container_of(next_node, struct wc_entry, rb_node); 173462421b38SHuaisheng Ye if (unlikely(read_original_sector(wc, g) == 173562421b38SHuaisheng Ye read_original_sector(wc, f))) { 173648debafeSMikulas Patocka f = g; 173748debafeSMikulas Patocka continue; 173848debafeSMikulas Patocka } 173948debafeSMikulas Patocka if (read_original_sector(wc, g) != 174048debafeSMikulas Patocka read_original_sector(wc, f) + (wc->block_size >> SECTOR_SHIFT)) 174148debafeSMikulas Patocka break; 174248debafeSMikulas Patocka if (unlikely(g->write_in_progress)) 174348debafeSMikulas Patocka break; 174448debafeSMikulas Patocka if (unlikely(!writecache_entry_is_committed(wc, g))) 174548debafeSMikulas Patocka break; 174648debafeSMikulas Patocka 174748debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) { 174848debafeSMikulas Patocka if (g != f + 1) 174948debafeSMikulas Patocka break; 175048debafeSMikulas Patocka } 175148debafeSMikulas Patocka 175248debafeSMikulas Patocka n_walked++; 175348debafeSMikulas Patocka //if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all)) 175448debafeSMikulas Patocka // break; 175548debafeSMikulas Patocka 175648debafeSMikulas Patocka wc->writeback_size++; 175748debafeSMikulas Patocka list_del(&g->lru); 175848debafeSMikulas Patocka list_add(&g->lru, &wbl.list); 175948debafeSMikulas Patocka wbl.size++; 176048debafeSMikulas Patocka g->write_in_progress = true; 176148debafeSMikulas Patocka g->wc_list_contiguous = BIO_MAX_PAGES; 176248debafeSMikulas Patocka f = g; 176348debafeSMikulas Patocka e->wc_list_contiguous++; 17645229b489SHuaisheng Ye if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) { 17655229b489SHuaisheng Ye if (unlikely(wc->writeback_all)) { 17665229b489SHuaisheng Ye next_node = rb_next(&f->rb_node); 17675229b489SHuaisheng Ye if (likely(next_node)) 17685229b489SHuaisheng Ye g = container_of(next_node, struct wc_entry, rb_node); 17695229b489SHuaisheng Ye } 177048debafeSMikulas Patocka break; 177148debafeSMikulas Patocka } 17725229b489SHuaisheng Ye } 177348debafeSMikulas Patocka cond_resched(); 177448debafeSMikulas Patocka } 177548debafeSMikulas Patocka 177648debafeSMikulas Patocka if (!list_empty(&skipped)) { 177748debafeSMikulas Patocka list_splice_tail(&skipped, &wc->lru); 177848debafeSMikulas Patocka /* 177948debafeSMikulas Patocka * If we didn't do any progress, we must wait until some 178048debafeSMikulas Patocka * writeback finishes to avoid burning CPU in a loop 178148debafeSMikulas Patocka */ 178248debafeSMikulas Patocka if (unlikely(!wbl.size)) 178348debafeSMikulas Patocka writecache_wait_for_writeback(wc); 178448debafeSMikulas Patocka } 178548debafeSMikulas Patocka 178648debafeSMikulas Patocka wc_unlock(wc); 178748debafeSMikulas Patocka 178848debafeSMikulas Patocka blk_start_plug(&plug); 178948debafeSMikulas Patocka 179048debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 179148debafeSMikulas Patocka __writecache_writeback_pmem(wc, &wbl); 179248debafeSMikulas Patocka else 179348debafeSMikulas Patocka __writecache_writeback_ssd(wc, &wbl); 179448debafeSMikulas Patocka 179548debafeSMikulas Patocka blk_finish_plug(&plug); 179648debafeSMikulas Patocka 179748debafeSMikulas Patocka if (unlikely(wc->writeback_all)) { 179848debafeSMikulas Patocka wc_lock(wc); 179948debafeSMikulas Patocka while (writecache_wait_for_writeback(wc)); 180048debafeSMikulas Patocka wc_unlock(wc); 180148debafeSMikulas Patocka } 180248debafeSMikulas Patocka } 180348debafeSMikulas Patocka 180448debafeSMikulas Patocka static int calculate_memory_size(uint64_t device_size, unsigned block_size, 180548debafeSMikulas Patocka size_t *n_blocks_p, size_t *n_metadata_blocks_p) 180648debafeSMikulas Patocka { 180748debafeSMikulas Patocka uint64_t n_blocks, offset; 180848debafeSMikulas Patocka struct wc_entry e; 180948debafeSMikulas Patocka 181048debafeSMikulas Patocka n_blocks = device_size; 181148debafeSMikulas Patocka do_div(n_blocks, block_size + sizeof(struct wc_memory_entry)); 181248debafeSMikulas Patocka 181348debafeSMikulas Patocka while (1) { 181448debafeSMikulas Patocka if (!n_blocks) 181548debafeSMikulas Patocka return -ENOSPC; 181648debafeSMikulas Patocka /* Verify the following entries[n_blocks] won't overflow */ 181748debafeSMikulas Patocka if (n_blocks >= ((size_t)-sizeof(struct wc_memory_superblock) / 181848debafeSMikulas Patocka sizeof(struct wc_memory_entry))) 181948debafeSMikulas Patocka return -EFBIG; 182048debafeSMikulas Patocka offset = offsetof(struct wc_memory_superblock, entries[n_blocks]); 182148debafeSMikulas Patocka offset = (offset + block_size - 1) & ~(uint64_t)(block_size - 1); 182248debafeSMikulas Patocka if (offset + n_blocks * block_size <= device_size) 182348debafeSMikulas Patocka break; 182448debafeSMikulas Patocka n_blocks--; 182548debafeSMikulas Patocka } 182648debafeSMikulas Patocka 182748debafeSMikulas Patocka /* check if the bit field overflows */ 182848debafeSMikulas Patocka e.index = n_blocks; 182948debafeSMikulas Patocka if (e.index != n_blocks) 183048debafeSMikulas Patocka return -EFBIG; 183148debafeSMikulas Patocka 183248debafeSMikulas Patocka if (n_blocks_p) 183348debafeSMikulas Patocka *n_blocks_p = n_blocks; 183448debafeSMikulas Patocka if (n_metadata_blocks_p) 183548debafeSMikulas Patocka *n_metadata_blocks_p = offset >> __ffs(block_size); 183648debafeSMikulas Patocka return 0; 183748debafeSMikulas Patocka } 183848debafeSMikulas Patocka 183948debafeSMikulas Patocka static int init_memory(struct dm_writecache *wc) 184048debafeSMikulas Patocka { 184148debafeSMikulas Patocka size_t b; 184248debafeSMikulas Patocka int r; 184348debafeSMikulas Patocka 184448debafeSMikulas Patocka r = calculate_memory_size(wc->memory_map_size, wc->block_size, &wc->n_blocks, NULL); 184548debafeSMikulas Patocka if (r) 184648debafeSMikulas Patocka return r; 184748debafeSMikulas Patocka 184848debafeSMikulas Patocka r = writecache_alloc_entries(wc); 184948debafeSMikulas Patocka if (r) 185048debafeSMikulas Patocka return r; 185148debafeSMikulas Patocka 185248debafeSMikulas Patocka for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++) 185348debafeSMikulas Patocka pmem_assign(sb(wc)->padding[b], cpu_to_le64(0)); 185448debafeSMikulas Patocka pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION)); 185548debafeSMikulas Patocka pmem_assign(sb(wc)->block_size, cpu_to_le32(wc->block_size)); 185648debafeSMikulas Patocka pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks)); 185748debafeSMikulas Patocka pmem_assign(sb(wc)->seq_count, cpu_to_le64(0)); 185848debafeSMikulas Patocka 185948debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) 186048debafeSMikulas Patocka write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); 186148debafeSMikulas Patocka 186248debafeSMikulas Patocka writecache_flush_all_metadata(wc); 1863aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 186448debafeSMikulas Patocka pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); 186548debafeSMikulas Patocka writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); 1866aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 186748debafeSMikulas Patocka 186848debafeSMikulas Patocka return 0; 186948debafeSMikulas Patocka } 187048debafeSMikulas Patocka 187148debafeSMikulas Patocka static void writecache_dtr(struct dm_target *ti) 187248debafeSMikulas Patocka { 187348debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 187448debafeSMikulas Patocka 187548debafeSMikulas Patocka if (!wc) 187648debafeSMikulas Patocka return; 187748debafeSMikulas Patocka 187848debafeSMikulas Patocka if (wc->endio_thread) 187948debafeSMikulas Patocka kthread_stop(wc->endio_thread); 188048debafeSMikulas Patocka 188148debafeSMikulas Patocka if (wc->flush_thread) 188248debafeSMikulas Patocka kthread_stop(wc->flush_thread); 188348debafeSMikulas Patocka 188448debafeSMikulas Patocka bioset_exit(&wc->bio_set); 188548debafeSMikulas Patocka 188648debafeSMikulas Patocka mempool_exit(&wc->copy_pool); 188748debafeSMikulas Patocka 188848debafeSMikulas Patocka if (wc->writeback_wq) 188948debafeSMikulas Patocka destroy_workqueue(wc->writeback_wq); 189048debafeSMikulas Patocka 189148debafeSMikulas Patocka if (wc->dev) 189248debafeSMikulas Patocka dm_put_device(ti, wc->dev); 189348debafeSMikulas Patocka 189448debafeSMikulas Patocka if (wc->ssd_dev) 189548debafeSMikulas Patocka dm_put_device(ti, wc->ssd_dev); 189648debafeSMikulas Patocka 189748debafeSMikulas Patocka if (wc->entries) 189848debafeSMikulas Patocka vfree(wc->entries); 189948debafeSMikulas Patocka 190048debafeSMikulas Patocka if (wc->memory_map) { 190148debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 190248debafeSMikulas Patocka persistent_memory_release(wc); 190348debafeSMikulas Patocka else 190448debafeSMikulas Patocka vfree(wc->memory_map); 190548debafeSMikulas Patocka } 190648debafeSMikulas Patocka 190748debafeSMikulas Patocka if (wc->dm_kcopyd) 190848debafeSMikulas Patocka dm_kcopyd_client_destroy(wc->dm_kcopyd); 190948debafeSMikulas Patocka 191048debafeSMikulas Patocka if (wc->dm_io) 191148debafeSMikulas Patocka dm_io_client_destroy(wc->dm_io); 191248debafeSMikulas Patocka 191348debafeSMikulas Patocka if (wc->dirty_bitmap) 191448debafeSMikulas Patocka vfree(wc->dirty_bitmap); 191548debafeSMikulas Patocka 191648debafeSMikulas Patocka kfree(wc); 191748debafeSMikulas Patocka } 191848debafeSMikulas Patocka 191948debafeSMikulas Patocka static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) 192048debafeSMikulas Patocka { 192148debafeSMikulas Patocka struct dm_writecache *wc; 192248debafeSMikulas Patocka struct dm_arg_set as; 192348debafeSMikulas Patocka const char *string; 192448debafeSMikulas Patocka unsigned opt_params; 192548debafeSMikulas Patocka size_t offset, data_size; 192648debafeSMikulas Patocka int i, r; 192748debafeSMikulas Patocka char dummy; 192848debafeSMikulas Patocka int high_wm_percent = HIGH_WATERMARK; 192948debafeSMikulas Patocka int low_wm_percent = LOW_WATERMARK; 193048debafeSMikulas Patocka uint64_t x; 193148debafeSMikulas Patocka struct wc_memory_superblock s; 193248debafeSMikulas Patocka 193348debafeSMikulas Patocka static struct dm_arg _args[] = { 193448debafeSMikulas Patocka {0, 10, "Invalid number of feature args"}, 193548debafeSMikulas Patocka }; 193648debafeSMikulas Patocka 193748debafeSMikulas Patocka as.argc = argc; 193848debafeSMikulas Patocka as.argv = argv; 193948debafeSMikulas Patocka 194048debafeSMikulas Patocka wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL); 194148debafeSMikulas Patocka if (!wc) { 194248debafeSMikulas Patocka ti->error = "Cannot allocate writecache structure"; 194348debafeSMikulas Patocka r = -ENOMEM; 194448debafeSMikulas Patocka goto bad; 194548debafeSMikulas Patocka } 194648debafeSMikulas Patocka ti->private = wc; 194748debafeSMikulas Patocka wc->ti = ti; 194848debafeSMikulas Patocka 194948debafeSMikulas Patocka mutex_init(&wc->lock); 1950*3923d485SMikulas Patocka wc->max_age = MAX_AGE_UNSPECIFIED; 195148debafeSMikulas Patocka writecache_poison_lists(wc); 195248debafeSMikulas Patocka init_waitqueue_head(&wc->freelist_wait); 195348debafeSMikulas Patocka timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0); 1954*3923d485SMikulas Patocka timer_setup(&wc->max_age_timer, writecache_max_age_timer, 0); 195548debafeSMikulas Patocka 195648debafeSMikulas Patocka for (i = 0; i < 2; i++) { 195748debafeSMikulas Patocka atomic_set(&wc->bio_in_progress[i], 0); 195848debafeSMikulas Patocka init_waitqueue_head(&wc->bio_in_progress_wait[i]); 195948debafeSMikulas Patocka } 196048debafeSMikulas Patocka 196148debafeSMikulas Patocka wc->dm_io = dm_io_client_create(); 196248debafeSMikulas Patocka if (IS_ERR(wc->dm_io)) { 196348debafeSMikulas Patocka r = PTR_ERR(wc->dm_io); 196448debafeSMikulas Patocka ti->error = "Unable to allocate dm-io client"; 196548debafeSMikulas Patocka wc->dm_io = NULL; 196648debafeSMikulas Patocka goto bad; 196748debafeSMikulas Patocka } 196848debafeSMikulas Patocka 1969f87e033bSHuaisheng Ye wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1); 197048debafeSMikulas Patocka if (!wc->writeback_wq) { 197148debafeSMikulas Patocka r = -ENOMEM; 197248debafeSMikulas Patocka ti->error = "Could not allocate writeback workqueue"; 197348debafeSMikulas Patocka goto bad; 197448debafeSMikulas Patocka } 197548debafeSMikulas Patocka INIT_WORK(&wc->writeback_work, writecache_writeback); 197648debafeSMikulas Patocka INIT_WORK(&wc->flush_work, writecache_flush_work); 197748debafeSMikulas Patocka 197848debafeSMikulas Patocka raw_spin_lock_init(&wc->endio_list_lock); 197948debafeSMikulas Patocka INIT_LIST_HEAD(&wc->endio_list); 198048debafeSMikulas Patocka wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio"); 198148debafeSMikulas Patocka if (IS_ERR(wc->endio_thread)) { 198248debafeSMikulas Patocka r = PTR_ERR(wc->endio_thread); 198348debafeSMikulas Patocka wc->endio_thread = NULL; 198448debafeSMikulas Patocka ti->error = "Couldn't spawn endio thread"; 198548debafeSMikulas Patocka goto bad; 198648debafeSMikulas Patocka } 198748debafeSMikulas Patocka wake_up_process(wc->endio_thread); 198848debafeSMikulas Patocka 198948debafeSMikulas Patocka /* 199048debafeSMikulas Patocka * Parse the mode (pmem or ssd) 199148debafeSMikulas Patocka */ 199248debafeSMikulas Patocka string = dm_shift_arg(&as); 199348debafeSMikulas Patocka if (!string) 199448debafeSMikulas Patocka goto bad_arguments; 199548debafeSMikulas Patocka 199648debafeSMikulas Patocka if (!strcasecmp(string, "s")) { 199748debafeSMikulas Patocka wc->pmem_mode = false; 199848debafeSMikulas Patocka } else if (!strcasecmp(string, "p")) { 199948debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 200048debafeSMikulas Patocka wc->pmem_mode = true; 200148debafeSMikulas Patocka wc->writeback_fua = true; 200248debafeSMikulas Patocka #else 200348debafeSMikulas Patocka /* 200448debafeSMikulas Patocka * If the architecture doesn't support persistent memory or 200548debafeSMikulas Patocka * the kernel doesn't support any DAX drivers, this driver can 200648debafeSMikulas Patocka * only be used in SSD-only mode. 200748debafeSMikulas Patocka */ 200848debafeSMikulas Patocka r = -EOPNOTSUPP; 200948debafeSMikulas Patocka ti->error = "Persistent memory or DAX not supported on this system"; 201048debafeSMikulas Patocka goto bad; 201148debafeSMikulas Patocka #endif 201248debafeSMikulas Patocka } else { 201348debafeSMikulas Patocka goto bad_arguments; 201448debafeSMikulas Patocka } 201548debafeSMikulas Patocka 201648debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 201748debafeSMikulas Patocka r = bioset_init(&wc->bio_set, BIO_POOL_SIZE, 201848debafeSMikulas Patocka offsetof(struct writeback_struct, bio), 201948debafeSMikulas Patocka BIOSET_NEED_BVECS); 202048debafeSMikulas Patocka if (r) { 202148debafeSMikulas Patocka ti->error = "Could not allocate bio set"; 202248debafeSMikulas Patocka goto bad; 202348debafeSMikulas Patocka } 202448debafeSMikulas Patocka } else { 202548debafeSMikulas Patocka r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct)); 202648debafeSMikulas Patocka if (r) { 202748debafeSMikulas Patocka ti->error = "Could not allocate mempool"; 202848debafeSMikulas Patocka goto bad; 202948debafeSMikulas Patocka } 203048debafeSMikulas Patocka } 203148debafeSMikulas Patocka 203248debafeSMikulas Patocka /* 203348debafeSMikulas Patocka * Parse the origin data device 203448debafeSMikulas Patocka */ 203548debafeSMikulas Patocka string = dm_shift_arg(&as); 203648debafeSMikulas Patocka if (!string) 203748debafeSMikulas Patocka goto bad_arguments; 203848debafeSMikulas Patocka r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev); 203948debafeSMikulas Patocka if (r) { 204048debafeSMikulas Patocka ti->error = "Origin data device lookup failed"; 204148debafeSMikulas Patocka goto bad; 204248debafeSMikulas Patocka } 204348debafeSMikulas Patocka 204448debafeSMikulas Patocka /* 204548debafeSMikulas Patocka * Parse cache data device (be it pmem or ssd) 204648debafeSMikulas Patocka */ 204748debafeSMikulas Patocka string = dm_shift_arg(&as); 204848debafeSMikulas Patocka if (!string) 204948debafeSMikulas Patocka goto bad_arguments; 205048debafeSMikulas Patocka 205148debafeSMikulas Patocka r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev); 205248debafeSMikulas Patocka if (r) { 205348debafeSMikulas Patocka ti->error = "Cache data device lookup failed"; 205448debafeSMikulas Patocka goto bad; 205548debafeSMikulas Patocka } 205648debafeSMikulas Patocka wc->memory_map_size = i_size_read(wc->ssd_dev->bdev->bd_inode); 205748debafeSMikulas Patocka 205848debafeSMikulas Patocka /* 205948debafeSMikulas Patocka * Parse the cache block size 206048debafeSMikulas Patocka */ 206148debafeSMikulas Patocka string = dm_shift_arg(&as); 206248debafeSMikulas Patocka if (!string) 206348debafeSMikulas Patocka goto bad_arguments; 206448debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 || 206548debafeSMikulas Patocka wc->block_size < 512 || wc->block_size > PAGE_SIZE || 206648debafeSMikulas Patocka (wc->block_size & (wc->block_size - 1))) { 206748debafeSMikulas Patocka r = -EINVAL; 206848debafeSMikulas Patocka ti->error = "Invalid block size"; 206948debafeSMikulas Patocka goto bad; 207048debafeSMikulas Patocka } 207148debafeSMikulas Patocka wc->block_size_bits = __ffs(wc->block_size); 207248debafeSMikulas Patocka 207348debafeSMikulas Patocka wc->max_writeback_jobs = MAX_WRITEBACK_JOBS; 207448debafeSMikulas Patocka wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM; 207548debafeSMikulas Patocka wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC); 207648debafeSMikulas Patocka 207748debafeSMikulas Patocka /* 207848debafeSMikulas Patocka * Parse optional arguments 207948debafeSMikulas Patocka */ 208048debafeSMikulas Patocka r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); 208148debafeSMikulas Patocka if (r) 208248debafeSMikulas Patocka goto bad; 208348debafeSMikulas Patocka 208448debafeSMikulas Patocka while (opt_params) { 208548debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2086d284f824SMikulas Patocka if (!strcasecmp(string, "start_sector") && opt_params >= 1) { 2087d284f824SMikulas Patocka unsigned long long start_sector; 2088d284f824SMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2089d284f824SMikulas Patocka if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1) 2090d284f824SMikulas Patocka goto invalid_optional; 2091d284f824SMikulas Patocka wc->start_sector = start_sector; 2092d284f824SMikulas Patocka if (wc->start_sector != start_sector || 2093d284f824SMikulas Patocka wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT) 2094d284f824SMikulas Patocka goto invalid_optional; 2095d284f824SMikulas Patocka } else if (!strcasecmp(string, "high_watermark") && opt_params >= 1) { 209648debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 209748debafeSMikulas Patocka if (sscanf(string, "%d%c", &high_wm_percent, &dummy) != 1) 209848debafeSMikulas Patocka goto invalid_optional; 209948debafeSMikulas Patocka if (high_wm_percent < 0 || high_wm_percent > 100) 210048debafeSMikulas Patocka goto invalid_optional; 210148debafeSMikulas Patocka wc->high_wm_percent_set = true; 210248debafeSMikulas Patocka } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) { 210348debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 210448debafeSMikulas Patocka if (sscanf(string, "%d%c", &low_wm_percent, &dummy) != 1) 210548debafeSMikulas Patocka goto invalid_optional; 210648debafeSMikulas Patocka if (low_wm_percent < 0 || low_wm_percent > 100) 210748debafeSMikulas Patocka goto invalid_optional; 210848debafeSMikulas Patocka wc->low_wm_percent_set = true; 210948debafeSMikulas Patocka } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) { 211048debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 211148debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1) 211248debafeSMikulas Patocka goto invalid_optional; 211348debafeSMikulas Patocka wc->max_writeback_jobs_set = true; 211448debafeSMikulas Patocka } else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) { 211548debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 211648debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1) 211748debafeSMikulas Patocka goto invalid_optional; 211848debafeSMikulas Patocka wc->autocommit_blocks_set = true; 211948debafeSMikulas Patocka } else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) { 212048debafeSMikulas Patocka unsigned autocommit_msecs; 212148debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 212248debafeSMikulas Patocka if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1) 212348debafeSMikulas Patocka goto invalid_optional; 212448debafeSMikulas Patocka if (autocommit_msecs > 3600000) 212548debafeSMikulas Patocka goto invalid_optional; 212648debafeSMikulas Patocka wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs); 212748debafeSMikulas Patocka wc->autocommit_time_set = true; 2128*3923d485SMikulas Patocka } else if (!strcasecmp(string, "max_age") && opt_params >= 1) { 2129*3923d485SMikulas Patocka unsigned max_age_msecs; 2130*3923d485SMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2131*3923d485SMikulas Patocka if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1) 2132*3923d485SMikulas Patocka goto invalid_optional; 2133*3923d485SMikulas Patocka if (max_age_msecs > 86400000) 2134*3923d485SMikulas Patocka goto invalid_optional; 2135*3923d485SMikulas Patocka wc->max_age = msecs_to_jiffies(max_age_msecs); 213693de44ebSMikulas Patocka } else if (!strcasecmp(string, "cleaner")) { 213793de44ebSMikulas Patocka wc->cleaner = true; 213848debafeSMikulas Patocka } else if (!strcasecmp(string, "fua")) { 213948debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 214048debafeSMikulas Patocka wc->writeback_fua = true; 214148debafeSMikulas Patocka wc->writeback_fua_set = true; 214248debafeSMikulas Patocka } else goto invalid_optional; 214348debafeSMikulas Patocka } else if (!strcasecmp(string, "nofua")) { 214448debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 214548debafeSMikulas Patocka wc->writeback_fua = false; 214648debafeSMikulas Patocka wc->writeback_fua_set = true; 214748debafeSMikulas Patocka } else goto invalid_optional; 214848debafeSMikulas Patocka } else { 214948debafeSMikulas Patocka invalid_optional: 215048debafeSMikulas Patocka r = -EINVAL; 215148debafeSMikulas Patocka ti->error = "Invalid optional argument"; 215248debafeSMikulas Patocka goto bad; 215348debafeSMikulas Patocka } 215448debafeSMikulas Patocka } 215548debafeSMikulas Patocka 215648debafeSMikulas Patocka if (high_wm_percent < low_wm_percent) { 215748debafeSMikulas Patocka r = -EINVAL; 215848debafeSMikulas Patocka ti->error = "High watermark must be greater than or equal to low watermark"; 215948debafeSMikulas Patocka goto bad; 216048debafeSMikulas Patocka } 216148debafeSMikulas Patocka 2162d284f824SMikulas Patocka if (WC_MODE_PMEM(wc)) { 2163d284f824SMikulas Patocka r = persistent_memory_claim(wc); 2164d284f824SMikulas Patocka if (r) { 2165d284f824SMikulas Patocka ti->error = "Unable to map persistent memory for cache"; 2166d284f824SMikulas Patocka goto bad; 2167d284f824SMikulas Patocka } 2168d284f824SMikulas Patocka } else { 216948debafeSMikulas Patocka struct dm_io_region region; 217048debafeSMikulas Patocka struct dm_io_request req; 217148debafeSMikulas Patocka size_t n_blocks, n_metadata_blocks; 217248debafeSMikulas Patocka uint64_t n_bitmap_bits; 217348debafeSMikulas Patocka 2174d284f824SMikulas Patocka wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT; 2175d284f824SMikulas Patocka 217648debafeSMikulas Patocka bio_list_init(&wc->flush_list); 217748debafeSMikulas Patocka wc->flush_thread = kthread_create(writecache_flush_thread, wc, "dm_writecache_flush"); 217848debafeSMikulas Patocka if (IS_ERR(wc->flush_thread)) { 217948debafeSMikulas Patocka r = PTR_ERR(wc->flush_thread); 218048debafeSMikulas Patocka wc->flush_thread = NULL; 2181e8ea141aSShenghui Wang ti->error = "Couldn't spawn flush thread"; 218248debafeSMikulas Patocka goto bad; 218348debafeSMikulas Patocka } 218448debafeSMikulas Patocka wake_up_process(wc->flush_thread); 218548debafeSMikulas Patocka 218648debafeSMikulas Patocka r = calculate_memory_size(wc->memory_map_size, wc->block_size, 218748debafeSMikulas Patocka &n_blocks, &n_metadata_blocks); 218848debafeSMikulas Patocka if (r) { 218948debafeSMikulas Patocka ti->error = "Invalid device size"; 219048debafeSMikulas Patocka goto bad; 219148debafeSMikulas Patocka } 219248debafeSMikulas Patocka 219348debafeSMikulas Patocka n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) + 219448debafeSMikulas Patocka BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY; 219548debafeSMikulas Patocka /* this is limitation of test_bit functions */ 219648debafeSMikulas Patocka if (n_bitmap_bits > 1U << 31) { 219748debafeSMikulas Patocka r = -EFBIG; 219848debafeSMikulas Patocka ti->error = "Invalid device size"; 219948debafeSMikulas Patocka goto bad; 220048debafeSMikulas Patocka } 220148debafeSMikulas Patocka 220248debafeSMikulas Patocka wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits); 220348debafeSMikulas Patocka if (!wc->memory_map) { 220448debafeSMikulas Patocka r = -ENOMEM; 220548debafeSMikulas Patocka ti->error = "Unable to allocate memory for metadata"; 220648debafeSMikulas Patocka goto bad; 220748debafeSMikulas Patocka } 220848debafeSMikulas Patocka 220948debafeSMikulas Patocka wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle); 221048debafeSMikulas Patocka if (IS_ERR(wc->dm_kcopyd)) { 221148debafeSMikulas Patocka r = PTR_ERR(wc->dm_kcopyd); 221248debafeSMikulas Patocka ti->error = "Unable to allocate dm-kcopyd client"; 221348debafeSMikulas Patocka wc->dm_kcopyd = NULL; 221448debafeSMikulas Patocka goto bad; 221548debafeSMikulas Patocka } 221648debafeSMikulas Patocka 221748debafeSMikulas Patocka wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT); 221848debafeSMikulas Patocka wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) / 221948debafeSMikulas Patocka BITS_PER_LONG * sizeof(unsigned long); 222048debafeSMikulas Patocka wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size); 222148debafeSMikulas Patocka if (!wc->dirty_bitmap) { 222248debafeSMikulas Patocka r = -ENOMEM; 222348debafeSMikulas Patocka ti->error = "Unable to allocate dirty bitmap"; 222448debafeSMikulas Patocka goto bad; 222548debafeSMikulas Patocka } 222648debafeSMikulas Patocka 222748debafeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 2228d284f824SMikulas Patocka region.sector = wc->start_sector; 222948debafeSMikulas Patocka region.count = wc->metadata_sectors; 223048debafeSMikulas Patocka req.bi_op = REQ_OP_READ; 223148debafeSMikulas Patocka req.bi_op_flags = REQ_SYNC; 223248debafeSMikulas Patocka req.mem.type = DM_IO_VMA; 223348debafeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map; 223448debafeSMikulas Patocka req.client = wc->dm_io; 223548debafeSMikulas Patocka req.notify.fn = NULL; 223648debafeSMikulas Patocka 223748debafeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 223848debafeSMikulas Patocka if (r) { 223948debafeSMikulas Patocka ti->error = "Unable to read metadata"; 224048debafeSMikulas Patocka goto bad; 224148debafeSMikulas Patocka } 224248debafeSMikulas Patocka } 224348debafeSMikulas Patocka 224448debafeSMikulas Patocka r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); 224548debafeSMikulas Patocka if (r) { 224648debafeSMikulas Patocka ti->error = "Hardware memory error when reading superblock"; 224748debafeSMikulas Patocka goto bad; 224848debafeSMikulas Patocka } 224948debafeSMikulas Patocka if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) { 225048debafeSMikulas Patocka r = init_memory(wc); 225148debafeSMikulas Patocka if (r) { 225248debafeSMikulas Patocka ti->error = "Unable to initialize device"; 225348debafeSMikulas Patocka goto bad; 225448debafeSMikulas Patocka } 225548debafeSMikulas Patocka r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); 225648debafeSMikulas Patocka if (r) { 225748debafeSMikulas Patocka ti->error = "Hardware memory error when reading superblock"; 225848debafeSMikulas Patocka goto bad; 225948debafeSMikulas Patocka } 226048debafeSMikulas Patocka } 226148debafeSMikulas Patocka 226248debafeSMikulas Patocka if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) { 226348debafeSMikulas Patocka ti->error = "Invalid magic in the superblock"; 226448debafeSMikulas Patocka r = -EINVAL; 226548debafeSMikulas Patocka goto bad; 226648debafeSMikulas Patocka } 226748debafeSMikulas Patocka 226848debafeSMikulas Patocka if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) { 226948debafeSMikulas Patocka ti->error = "Invalid version in the superblock"; 227048debafeSMikulas Patocka r = -EINVAL; 227148debafeSMikulas Patocka goto bad; 227248debafeSMikulas Patocka } 227348debafeSMikulas Patocka 227448debafeSMikulas Patocka if (le32_to_cpu(s.block_size) != wc->block_size) { 227548debafeSMikulas Patocka ti->error = "Block size does not match superblock"; 227648debafeSMikulas Patocka r = -EINVAL; 227748debafeSMikulas Patocka goto bad; 227848debafeSMikulas Patocka } 227948debafeSMikulas Patocka 228048debafeSMikulas Patocka wc->n_blocks = le64_to_cpu(s.n_blocks); 228148debafeSMikulas Patocka 228248debafeSMikulas Patocka offset = wc->n_blocks * sizeof(struct wc_memory_entry); 228348debafeSMikulas Patocka if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) { 228448debafeSMikulas Patocka overflow: 228548debafeSMikulas Patocka ti->error = "Overflow in size calculation"; 228648debafeSMikulas Patocka r = -EINVAL; 228748debafeSMikulas Patocka goto bad; 228848debafeSMikulas Patocka } 228948debafeSMikulas Patocka offset += sizeof(struct wc_memory_superblock); 229048debafeSMikulas Patocka if (offset < sizeof(struct wc_memory_superblock)) 229148debafeSMikulas Patocka goto overflow; 229248debafeSMikulas Patocka offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1); 229348debafeSMikulas Patocka data_size = wc->n_blocks * (size_t)wc->block_size; 229448debafeSMikulas Patocka if (!offset || (data_size / wc->block_size != wc->n_blocks) || 229548debafeSMikulas Patocka (offset + data_size < offset)) 229648debafeSMikulas Patocka goto overflow; 229748debafeSMikulas Patocka if (offset + data_size > wc->memory_map_size) { 229848debafeSMikulas Patocka ti->error = "Memory area is too small"; 229948debafeSMikulas Patocka r = -EINVAL; 230048debafeSMikulas Patocka goto bad; 230148debafeSMikulas Patocka } 230248debafeSMikulas Patocka 230348debafeSMikulas Patocka wc->metadata_sectors = offset >> SECTOR_SHIFT; 230448debafeSMikulas Patocka wc->block_start = (char *)sb(wc) + offset; 230548debafeSMikulas Patocka 230648debafeSMikulas Patocka x = (uint64_t)wc->n_blocks * (100 - high_wm_percent); 230748debafeSMikulas Patocka x += 50; 230848debafeSMikulas Patocka do_div(x, 100); 230948debafeSMikulas Patocka wc->freelist_high_watermark = x; 231048debafeSMikulas Patocka x = (uint64_t)wc->n_blocks * (100 - low_wm_percent); 231148debafeSMikulas Patocka x += 50; 231248debafeSMikulas Patocka do_div(x, 100); 231348debafeSMikulas Patocka wc->freelist_low_watermark = x; 231448debafeSMikulas Patocka 231593de44ebSMikulas Patocka if (wc->cleaner) 231693de44ebSMikulas Patocka activate_cleaner(wc); 231793de44ebSMikulas Patocka 231848debafeSMikulas Patocka r = writecache_alloc_entries(wc); 231948debafeSMikulas Patocka if (r) { 232048debafeSMikulas Patocka ti->error = "Cannot allocate memory"; 232148debafeSMikulas Patocka goto bad; 232248debafeSMikulas Patocka } 232348debafeSMikulas Patocka 232448debafeSMikulas Patocka ti->num_flush_bios = 1; 232548debafeSMikulas Patocka ti->flush_supported = true; 232648debafeSMikulas Patocka ti->num_discard_bios = 1; 232748debafeSMikulas Patocka 232848debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 232948debafeSMikulas Patocka persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 233048debafeSMikulas Patocka 233148debafeSMikulas Patocka return 0; 233248debafeSMikulas Patocka 233348debafeSMikulas Patocka bad_arguments: 233448debafeSMikulas Patocka r = -EINVAL; 233548debafeSMikulas Patocka ti->error = "Bad arguments"; 233648debafeSMikulas Patocka bad: 233748debafeSMikulas Patocka writecache_dtr(ti); 233848debafeSMikulas Patocka return r; 233948debafeSMikulas Patocka } 234048debafeSMikulas Patocka 234148debafeSMikulas Patocka static void writecache_status(struct dm_target *ti, status_type_t type, 234248debafeSMikulas Patocka unsigned status_flags, char *result, unsigned maxlen) 234348debafeSMikulas Patocka { 234448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 234548debafeSMikulas Patocka unsigned extra_args; 234648debafeSMikulas Patocka unsigned sz = 0; 234748debafeSMikulas Patocka uint64_t x; 234848debafeSMikulas Patocka 234948debafeSMikulas Patocka switch (type) { 235048debafeSMikulas Patocka case STATUSTYPE_INFO: 235148debafeSMikulas Patocka DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc), 235248debafeSMikulas Patocka (unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size, 235348debafeSMikulas Patocka (unsigned long long)wc->writeback_size); 235448debafeSMikulas Patocka break; 235548debafeSMikulas Patocka case STATUSTYPE_TABLE: 235648debafeSMikulas Patocka DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', 235748debafeSMikulas Patocka wc->dev->name, wc->ssd_dev->name, wc->block_size); 235848debafeSMikulas Patocka extra_args = 0; 23599ff07e7dSMikulas Patocka if (wc->start_sector) 23609ff07e7dSMikulas Patocka extra_args += 2; 236193de44ebSMikulas Patocka if (wc->high_wm_percent_set && !wc->cleaner) 236248debafeSMikulas Patocka extra_args += 2; 236393de44ebSMikulas Patocka if (wc->low_wm_percent_set && !wc->cleaner) 236448debafeSMikulas Patocka extra_args += 2; 236548debafeSMikulas Patocka if (wc->max_writeback_jobs_set) 236648debafeSMikulas Patocka extra_args += 2; 236748debafeSMikulas Patocka if (wc->autocommit_blocks_set) 236848debafeSMikulas Patocka extra_args += 2; 236948debafeSMikulas Patocka if (wc->autocommit_time_set) 237048debafeSMikulas Patocka extra_args += 2; 237193de44ebSMikulas Patocka if (wc->cleaner) 237293de44ebSMikulas Patocka extra_args++; 237348debafeSMikulas Patocka if (wc->writeback_fua_set) 237448debafeSMikulas Patocka extra_args++; 237548debafeSMikulas Patocka 237648debafeSMikulas Patocka DMEMIT("%u", extra_args); 23779ff07e7dSMikulas Patocka if (wc->start_sector) 23789ff07e7dSMikulas Patocka DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); 237993de44ebSMikulas Patocka if (wc->high_wm_percent_set && !wc->cleaner) { 238048debafeSMikulas Patocka x = (uint64_t)wc->freelist_high_watermark * 100; 238148debafeSMikulas Patocka x += wc->n_blocks / 2; 238248debafeSMikulas Patocka do_div(x, (size_t)wc->n_blocks); 238348debafeSMikulas Patocka DMEMIT(" high_watermark %u", 100 - (unsigned)x); 238448debafeSMikulas Patocka } 238593de44ebSMikulas Patocka if (wc->low_wm_percent_set && !wc->cleaner) { 238648debafeSMikulas Patocka x = (uint64_t)wc->freelist_low_watermark * 100; 238748debafeSMikulas Patocka x += wc->n_blocks / 2; 238848debafeSMikulas Patocka do_div(x, (size_t)wc->n_blocks); 238948debafeSMikulas Patocka DMEMIT(" low_watermark %u", 100 - (unsigned)x); 239048debafeSMikulas Patocka } 239148debafeSMikulas Patocka if (wc->max_writeback_jobs_set) 239248debafeSMikulas Patocka DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs); 239348debafeSMikulas Patocka if (wc->autocommit_blocks_set) 239448debafeSMikulas Patocka DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks); 239548debafeSMikulas Patocka if (wc->autocommit_time_set) 239648debafeSMikulas Patocka DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies)); 2397*3923d485SMikulas Patocka if (wc->max_age != MAX_AGE_UNSPECIFIED) 2398*3923d485SMikulas Patocka DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age)); 239993de44ebSMikulas Patocka if (wc->cleaner) 240093de44ebSMikulas Patocka DMEMIT(" cleaner"); 240148debafeSMikulas Patocka if (wc->writeback_fua_set) 240248debafeSMikulas Patocka DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); 240348debafeSMikulas Patocka break; 240448debafeSMikulas Patocka } 240548debafeSMikulas Patocka } 240648debafeSMikulas Patocka 240748debafeSMikulas Patocka static struct target_type writecache_target = { 240848debafeSMikulas Patocka .name = "writecache", 240993de44ebSMikulas Patocka .version = {1, 3, 0}, 241048debafeSMikulas Patocka .module = THIS_MODULE, 241148debafeSMikulas Patocka .ctr = writecache_ctr, 241248debafeSMikulas Patocka .dtr = writecache_dtr, 241348debafeSMikulas Patocka .status = writecache_status, 241448debafeSMikulas Patocka .postsuspend = writecache_suspend, 241548debafeSMikulas Patocka .resume = writecache_resume, 241648debafeSMikulas Patocka .message = writecache_message, 241748debafeSMikulas Patocka .map = writecache_map, 241848debafeSMikulas Patocka .end_io = writecache_end_io, 241948debafeSMikulas Patocka .iterate_devices = writecache_iterate_devices, 242048debafeSMikulas Patocka .io_hints = writecache_io_hints, 242148debafeSMikulas Patocka }; 242248debafeSMikulas Patocka 242348debafeSMikulas Patocka static int __init dm_writecache_init(void) 242448debafeSMikulas Patocka { 242548debafeSMikulas Patocka int r; 242648debafeSMikulas Patocka 242748debafeSMikulas Patocka r = dm_register_target(&writecache_target); 242848debafeSMikulas Patocka if (r < 0) { 242948debafeSMikulas Patocka DMERR("register failed %d", r); 243048debafeSMikulas Patocka return r; 243148debafeSMikulas Patocka } 243248debafeSMikulas Patocka 243348debafeSMikulas Patocka return 0; 243448debafeSMikulas Patocka } 243548debafeSMikulas Patocka 243648debafeSMikulas Patocka static void __exit dm_writecache_exit(void) 243748debafeSMikulas Patocka { 243848debafeSMikulas Patocka dm_unregister_target(&writecache_target); 243948debafeSMikulas Patocka } 244048debafeSMikulas Patocka 244148debafeSMikulas Patocka module_init(dm_writecache_init); 244248debafeSMikulas Patocka module_exit(dm_writecache_exit); 244348debafeSMikulas Patocka 244448debafeSMikulas Patocka MODULE_DESCRIPTION(DM_NAME " writecache target"); 244548debafeSMikulas Patocka MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); 244648debafeSMikulas Patocka MODULE_LICENSE("GPL"); 2447