148debafeSMikulas Patocka // SPDX-License-Identifier: GPL-2.0 248debafeSMikulas Patocka /* 348debafeSMikulas Patocka * Copyright (C) 2018 Red Hat. All rights reserved. 448debafeSMikulas Patocka * 548debafeSMikulas Patocka * This file is released under the GPL. 648debafeSMikulas Patocka */ 748debafeSMikulas Patocka 848debafeSMikulas Patocka #include <linux/device-mapper.h> 948debafeSMikulas Patocka #include <linux/module.h> 1048debafeSMikulas Patocka #include <linux/init.h> 1148debafeSMikulas Patocka #include <linux/vmalloc.h> 1248debafeSMikulas Patocka #include <linux/kthread.h> 1348debafeSMikulas Patocka #include <linux/dm-io.h> 1448debafeSMikulas Patocka #include <linux/dm-kcopyd.h> 1548debafeSMikulas Patocka #include <linux/dax.h> 1648debafeSMikulas Patocka #include <linux/pfn_t.h> 1748debafeSMikulas Patocka #include <linux/libnvdimm.h> 1848debafeSMikulas Patocka 1948debafeSMikulas Patocka #define DM_MSG_PREFIX "writecache" 2048debafeSMikulas Patocka 2148debafeSMikulas Patocka #define HIGH_WATERMARK 50 2248debafeSMikulas Patocka #define LOW_WATERMARK 45 2348debafeSMikulas Patocka #define MAX_WRITEBACK_JOBS 0 2448debafeSMikulas Patocka #define ENDIO_LATENCY 16 2548debafeSMikulas Patocka #define WRITEBACK_LATENCY 64 2648debafeSMikulas Patocka #define AUTOCOMMIT_BLOCKS_SSD 65536 2748debafeSMikulas Patocka #define AUTOCOMMIT_BLOCKS_PMEM 64 2848debafeSMikulas Patocka #define AUTOCOMMIT_MSEC 1000 2948debafeSMikulas Patocka 3048debafeSMikulas Patocka #define BITMAP_GRANULARITY 65536 3148debafeSMikulas Patocka #if BITMAP_GRANULARITY < PAGE_SIZE 3248debafeSMikulas Patocka #undef BITMAP_GRANULARITY 3348debafeSMikulas Patocka #define BITMAP_GRANULARITY PAGE_SIZE 3448debafeSMikulas Patocka #endif 3548debafeSMikulas Patocka 3648debafeSMikulas Patocka #if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER) 3748debafeSMikulas Patocka #define DM_WRITECACHE_HAS_PMEM 3848debafeSMikulas Patocka #endif 3948debafeSMikulas Patocka 4048debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 4148debafeSMikulas Patocka #define pmem_assign(dest, src) \ 4248debafeSMikulas Patocka do { \ 4348debafeSMikulas Patocka typeof(dest) uniq = (src); \ 4448debafeSMikulas Patocka memcpy_flushcache(&(dest), &uniq, sizeof(dest)); \ 4548debafeSMikulas Patocka } while (0) 4648debafeSMikulas Patocka #else 4748debafeSMikulas Patocka #define pmem_assign(dest, src) ((dest) = (src)) 4848debafeSMikulas Patocka #endif 4948debafeSMikulas Patocka 5048debafeSMikulas Patocka #if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM) 5148debafeSMikulas Patocka #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 5248debafeSMikulas Patocka #endif 5348debafeSMikulas Patocka 5448debafeSMikulas Patocka #define MEMORY_SUPERBLOCK_MAGIC 0x23489321 5548debafeSMikulas Patocka #define MEMORY_SUPERBLOCK_VERSION 1 5648debafeSMikulas Patocka 5748debafeSMikulas Patocka struct wc_memory_entry { 5848debafeSMikulas Patocka __le64 original_sector; 5948debafeSMikulas Patocka __le64 seq_count; 6048debafeSMikulas Patocka }; 6148debafeSMikulas Patocka 6248debafeSMikulas Patocka struct wc_memory_superblock { 6348debafeSMikulas Patocka union { 6448debafeSMikulas Patocka struct { 6548debafeSMikulas Patocka __le32 magic; 6648debafeSMikulas Patocka __le32 version; 6748debafeSMikulas Patocka __le32 block_size; 6848debafeSMikulas Patocka __le32 pad; 6948debafeSMikulas Patocka __le64 n_blocks; 7048debafeSMikulas Patocka __le64 seq_count; 7148debafeSMikulas Patocka }; 7248debafeSMikulas Patocka __le64 padding[8]; 7348debafeSMikulas Patocka }; 7448debafeSMikulas Patocka struct wc_memory_entry entries[0]; 7548debafeSMikulas Patocka }; 7648debafeSMikulas Patocka 7748debafeSMikulas Patocka struct wc_entry { 7848debafeSMikulas Patocka struct rb_node rb_node; 7948debafeSMikulas Patocka struct list_head lru; 8048debafeSMikulas Patocka unsigned short wc_list_contiguous; 8148debafeSMikulas Patocka bool write_in_progress 8248debafeSMikulas Patocka #if BITS_PER_LONG == 64 8348debafeSMikulas Patocka :1 8448debafeSMikulas Patocka #endif 8548debafeSMikulas Patocka ; 8648debafeSMikulas Patocka unsigned long index 8748debafeSMikulas Patocka #if BITS_PER_LONG == 64 8848debafeSMikulas Patocka :47 8948debafeSMikulas Patocka #endif 9048debafeSMikulas Patocka ; 9148debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 9248debafeSMikulas Patocka uint64_t original_sector; 9348debafeSMikulas Patocka uint64_t seq_count; 9448debafeSMikulas Patocka #endif 9548debafeSMikulas Patocka }; 9648debafeSMikulas Patocka 9748debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 9848debafeSMikulas Patocka #define WC_MODE_PMEM(wc) ((wc)->pmem_mode) 9948debafeSMikulas Patocka #define WC_MODE_FUA(wc) ((wc)->writeback_fua) 10048debafeSMikulas Patocka #else 10148debafeSMikulas Patocka #define WC_MODE_PMEM(wc) false 10248debafeSMikulas Patocka #define WC_MODE_FUA(wc) false 10348debafeSMikulas Patocka #endif 10448debafeSMikulas Patocka #define WC_MODE_SORT_FREELIST(wc) (!WC_MODE_PMEM(wc)) 10548debafeSMikulas Patocka 10648debafeSMikulas Patocka struct dm_writecache { 10748debafeSMikulas Patocka struct mutex lock; 10848debafeSMikulas Patocka struct list_head lru; 10948debafeSMikulas Patocka union { 11048debafeSMikulas Patocka struct list_head freelist; 11148debafeSMikulas Patocka struct { 11248debafeSMikulas Patocka struct rb_root freetree; 11348debafeSMikulas Patocka struct wc_entry *current_free; 11448debafeSMikulas Patocka }; 11548debafeSMikulas Patocka }; 11648debafeSMikulas Patocka struct rb_root tree; 11748debafeSMikulas Patocka 11848debafeSMikulas Patocka size_t freelist_size; 11948debafeSMikulas Patocka size_t writeback_size; 12048debafeSMikulas Patocka size_t freelist_high_watermark; 12148debafeSMikulas Patocka size_t freelist_low_watermark; 12248debafeSMikulas Patocka 12348debafeSMikulas Patocka unsigned uncommitted_blocks; 12448debafeSMikulas Patocka unsigned autocommit_blocks; 12548debafeSMikulas Patocka unsigned max_writeback_jobs; 12648debafeSMikulas Patocka 12748debafeSMikulas Patocka int error; 12848debafeSMikulas Patocka 12948debafeSMikulas Patocka unsigned long autocommit_jiffies; 13048debafeSMikulas Patocka struct timer_list autocommit_timer; 13148debafeSMikulas Patocka struct wait_queue_head freelist_wait; 13248debafeSMikulas Patocka 13348debafeSMikulas Patocka atomic_t bio_in_progress[2]; 13448debafeSMikulas Patocka struct wait_queue_head bio_in_progress_wait[2]; 13548debafeSMikulas Patocka 13648debafeSMikulas Patocka struct dm_target *ti; 13748debafeSMikulas Patocka struct dm_dev *dev; 13848debafeSMikulas Patocka struct dm_dev *ssd_dev; 139d284f824SMikulas Patocka sector_t start_sector; 14048debafeSMikulas Patocka void *memory_map; 14148debafeSMikulas Patocka uint64_t memory_map_size; 14248debafeSMikulas Patocka size_t metadata_sectors; 14348debafeSMikulas Patocka size_t n_blocks; 14448debafeSMikulas Patocka uint64_t seq_count; 14548debafeSMikulas Patocka void *block_start; 14648debafeSMikulas Patocka struct wc_entry *entries; 14748debafeSMikulas Patocka unsigned block_size; 14848debafeSMikulas Patocka unsigned char block_size_bits; 14948debafeSMikulas Patocka 15048debafeSMikulas Patocka bool pmem_mode:1; 15148debafeSMikulas Patocka bool writeback_fua:1; 15248debafeSMikulas Patocka 15348debafeSMikulas Patocka bool overwrote_committed:1; 15448debafeSMikulas Patocka bool memory_vmapped:1; 15548debafeSMikulas Patocka 15648debafeSMikulas Patocka bool high_wm_percent_set:1; 15748debafeSMikulas Patocka bool low_wm_percent_set:1; 15848debafeSMikulas Patocka bool max_writeback_jobs_set:1; 15948debafeSMikulas Patocka bool autocommit_blocks_set:1; 16048debafeSMikulas Patocka bool autocommit_time_set:1; 16148debafeSMikulas Patocka bool writeback_fua_set:1; 16248debafeSMikulas Patocka bool flush_on_suspend:1; 16348debafeSMikulas Patocka 16448debafeSMikulas Patocka unsigned writeback_all; 16548debafeSMikulas Patocka struct workqueue_struct *writeback_wq; 16648debafeSMikulas Patocka struct work_struct writeback_work; 16748debafeSMikulas Patocka struct work_struct flush_work; 16848debafeSMikulas Patocka 16948debafeSMikulas Patocka struct dm_io_client *dm_io; 17048debafeSMikulas Patocka 17148debafeSMikulas Patocka raw_spinlock_t endio_list_lock; 17248debafeSMikulas Patocka struct list_head endio_list; 17348debafeSMikulas Patocka struct task_struct *endio_thread; 17448debafeSMikulas Patocka 17548debafeSMikulas Patocka struct task_struct *flush_thread; 17648debafeSMikulas Patocka struct bio_list flush_list; 17748debafeSMikulas Patocka 17848debafeSMikulas Patocka struct dm_kcopyd_client *dm_kcopyd; 17948debafeSMikulas Patocka unsigned long *dirty_bitmap; 18048debafeSMikulas Patocka unsigned dirty_bitmap_size; 18148debafeSMikulas Patocka 18248debafeSMikulas Patocka struct bio_set bio_set; 18348debafeSMikulas Patocka mempool_t copy_pool; 18448debafeSMikulas Patocka }; 18548debafeSMikulas Patocka 18648debafeSMikulas Patocka #define WB_LIST_INLINE 16 18748debafeSMikulas Patocka 18848debafeSMikulas Patocka struct writeback_struct { 18948debafeSMikulas Patocka struct list_head endio_entry; 19048debafeSMikulas Patocka struct dm_writecache *wc; 19148debafeSMikulas Patocka struct wc_entry **wc_list; 19248debafeSMikulas Patocka unsigned wc_list_n; 19348debafeSMikulas Patocka struct wc_entry *wc_list_inline[WB_LIST_INLINE]; 19448debafeSMikulas Patocka struct bio bio; 19548debafeSMikulas Patocka }; 19648debafeSMikulas Patocka 19748debafeSMikulas Patocka struct copy_struct { 19848debafeSMikulas Patocka struct list_head endio_entry; 19948debafeSMikulas Patocka struct dm_writecache *wc; 20048debafeSMikulas Patocka struct wc_entry *e; 20148debafeSMikulas Patocka unsigned n_entries; 20248debafeSMikulas Patocka int error; 20348debafeSMikulas Patocka }; 20448debafeSMikulas Patocka 20548debafeSMikulas Patocka DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle, 20648debafeSMikulas Patocka "A percentage of time allocated for data copying"); 20748debafeSMikulas Patocka 20848debafeSMikulas Patocka static void wc_lock(struct dm_writecache *wc) 20948debafeSMikulas Patocka { 21048debafeSMikulas Patocka mutex_lock(&wc->lock); 21148debafeSMikulas Patocka } 21248debafeSMikulas Patocka 21348debafeSMikulas Patocka static void wc_unlock(struct dm_writecache *wc) 21448debafeSMikulas Patocka { 21548debafeSMikulas Patocka mutex_unlock(&wc->lock); 21648debafeSMikulas Patocka } 21748debafeSMikulas Patocka 21848debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 21948debafeSMikulas Patocka static int persistent_memory_claim(struct dm_writecache *wc) 22048debafeSMikulas Patocka { 22148debafeSMikulas Patocka int r; 22248debafeSMikulas Patocka loff_t s; 22348debafeSMikulas Patocka long p, da; 22448debafeSMikulas Patocka pfn_t pfn; 22548debafeSMikulas Patocka int id; 22648debafeSMikulas Patocka struct page **pages; 22748debafeSMikulas Patocka 22848debafeSMikulas Patocka wc->memory_vmapped = false; 22948debafeSMikulas Patocka 23048debafeSMikulas Patocka if (!wc->ssd_dev->dax_dev) { 23148debafeSMikulas Patocka r = -EOPNOTSUPP; 23248debafeSMikulas Patocka goto err1; 23348debafeSMikulas Patocka } 23448debafeSMikulas Patocka s = wc->memory_map_size; 23548debafeSMikulas Patocka p = s >> PAGE_SHIFT; 23648debafeSMikulas Patocka if (!p) { 23748debafeSMikulas Patocka r = -EINVAL; 23848debafeSMikulas Patocka goto err1; 23948debafeSMikulas Patocka } 24048debafeSMikulas Patocka if (p != s >> PAGE_SHIFT) { 24148debafeSMikulas Patocka r = -EOVERFLOW; 24248debafeSMikulas Patocka goto err1; 24348debafeSMikulas Patocka } 24448debafeSMikulas Patocka 24548debafeSMikulas Patocka id = dax_read_lock(); 24648debafeSMikulas Patocka 24748debafeSMikulas Patocka da = dax_direct_access(wc->ssd_dev->dax_dev, 0, p, &wc->memory_map, &pfn); 24848debafeSMikulas Patocka if (da < 0) { 24948debafeSMikulas Patocka wc->memory_map = NULL; 25048debafeSMikulas Patocka r = da; 25148debafeSMikulas Patocka goto err2; 25248debafeSMikulas Patocka } 25348debafeSMikulas Patocka if (!pfn_t_has_page(pfn)) { 25448debafeSMikulas Patocka wc->memory_map = NULL; 25548debafeSMikulas Patocka r = -EOPNOTSUPP; 25648debafeSMikulas Patocka goto err2; 25748debafeSMikulas Patocka } 25848debafeSMikulas Patocka if (da != p) { 25948debafeSMikulas Patocka long i; 26048debafeSMikulas Patocka wc->memory_map = NULL; 26150a7d3baSKees Cook pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); 26248debafeSMikulas Patocka if (!pages) { 26348debafeSMikulas Patocka r = -ENOMEM; 26448debafeSMikulas Patocka goto err2; 26548debafeSMikulas Patocka } 26648debafeSMikulas Patocka i = 0; 26748debafeSMikulas Patocka do { 26848debafeSMikulas Patocka long daa; 26948debafeSMikulas Patocka daa = dax_direct_access(wc->ssd_dev->dax_dev, i, p - i, 270f742267aSHuaisheng Ye NULL, &pfn); 27148debafeSMikulas Patocka if (daa <= 0) { 27248debafeSMikulas Patocka r = daa ? daa : -EINVAL; 27348debafeSMikulas Patocka goto err3; 27448debafeSMikulas Patocka } 27548debafeSMikulas Patocka if (!pfn_t_has_page(pfn)) { 27648debafeSMikulas Patocka r = -EOPNOTSUPP; 27748debafeSMikulas Patocka goto err3; 27848debafeSMikulas Patocka } 27948debafeSMikulas Patocka while (daa-- && i < p) { 28048debafeSMikulas Patocka pages[i++] = pfn_t_to_page(pfn); 28148debafeSMikulas Patocka pfn.val++; 28248debafeSMikulas Patocka } 28348debafeSMikulas Patocka } while (i < p); 28448debafeSMikulas Patocka wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL); 28548debafeSMikulas Patocka if (!wc->memory_map) { 28648debafeSMikulas Patocka r = -ENOMEM; 28748debafeSMikulas Patocka goto err3; 28848debafeSMikulas Patocka } 28948debafeSMikulas Patocka kvfree(pages); 29048debafeSMikulas Patocka wc->memory_vmapped = true; 29148debafeSMikulas Patocka } 29248debafeSMikulas Patocka 29348debafeSMikulas Patocka dax_read_unlock(id); 294d284f824SMikulas Patocka 295d284f824SMikulas Patocka wc->memory_map += (size_t)wc->start_sector << SECTOR_SHIFT; 296d284f824SMikulas Patocka wc->memory_map_size -= (size_t)wc->start_sector << SECTOR_SHIFT; 297d284f824SMikulas Patocka 29848debafeSMikulas Patocka return 0; 29948debafeSMikulas Patocka err3: 30048debafeSMikulas Patocka kvfree(pages); 30148debafeSMikulas Patocka err2: 30248debafeSMikulas Patocka dax_read_unlock(id); 30348debafeSMikulas Patocka err1: 30448debafeSMikulas Patocka return r; 30548debafeSMikulas Patocka } 30648debafeSMikulas Patocka #else 30748debafeSMikulas Patocka static int persistent_memory_claim(struct dm_writecache *wc) 30848debafeSMikulas Patocka { 30948debafeSMikulas Patocka BUG(); 31048debafeSMikulas Patocka } 31148debafeSMikulas Patocka #endif 31248debafeSMikulas Patocka 31348debafeSMikulas Patocka static void persistent_memory_release(struct dm_writecache *wc) 31448debafeSMikulas Patocka { 31548debafeSMikulas Patocka if (wc->memory_vmapped) 316d284f824SMikulas Patocka vunmap(wc->memory_map - ((size_t)wc->start_sector << SECTOR_SHIFT)); 31748debafeSMikulas Patocka } 31848debafeSMikulas Patocka 31948debafeSMikulas Patocka static struct page *persistent_memory_page(void *addr) 32048debafeSMikulas Patocka { 32148debafeSMikulas Patocka if (is_vmalloc_addr(addr)) 32248debafeSMikulas Patocka return vmalloc_to_page(addr); 32348debafeSMikulas Patocka else 32448debafeSMikulas Patocka return virt_to_page(addr); 32548debafeSMikulas Patocka } 32648debafeSMikulas Patocka 32748debafeSMikulas Patocka static unsigned persistent_memory_page_offset(void *addr) 32848debafeSMikulas Patocka { 32948debafeSMikulas Patocka return (unsigned long)addr & (PAGE_SIZE - 1); 33048debafeSMikulas Patocka } 33148debafeSMikulas Patocka 33248debafeSMikulas Patocka static void persistent_memory_flush_cache(void *ptr, size_t size) 33348debafeSMikulas Patocka { 33448debafeSMikulas Patocka if (is_vmalloc_addr(ptr)) 33548debafeSMikulas Patocka flush_kernel_vmap_range(ptr, size); 33648debafeSMikulas Patocka } 33748debafeSMikulas Patocka 33848debafeSMikulas Patocka static void persistent_memory_invalidate_cache(void *ptr, size_t size) 33948debafeSMikulas Patocka { 34048debafeSMikulas Patocka if (is_vmalloc_addr(ptr)) 34148debafeSMikulas Patocka invalidate_kernel_vmap_range(ptr, size); 34248debafeSMikulas Patocka } 34348debafeSMikulas Patocka 34448debafeSMikulas Patocka static struct wc_memory_superblock *sb(struct dm_writecache *wc) 34548debafeSMikulas Patocka { 34648debafeSMikulas Patocka return wc->memory_map; 34748debafeSMikulas Patocka } 34848debafeSMikulas Patocka 34948debafeSMikulas Patocka static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e) 35048debafeSMikulas Patocka { 35148debafeSMikulas Patocka return &sb(wc)->entries[e->index]; 35248debafeSMikulas Patocka } 35348debafeSMikulas Patocka 35448debafeSMikulas Patocka static void *memory_data(struct dm_writecache *wc, struct wc_entry *e) 35548debafeSMikulas Patocka { 35648debafeSMikulas Patocka return (char *)wc->block_start + (e->index << wc->block_size_bits); 35748debafeSMikulas Patocka } 35848debafeSMikulas Patocka 35948debafeSMikulas Patocka static sector_t cache_sector(struct dm_writecache *wc, struct wc_entry *e) 36048debafeSMikulas Patocka { 361d284f824SMikulas Patocka return wc->start_sector + wc->metadata_sectors + 36248debafeSMikulas Patocka ((sector_t)e->index << (wc->block_size_bits - SECTOR_SHIFT)); 36348debafeSMikulas Patocka } 36448debafeSMikulas Patocka 36548debafeSMikulas Patocka static uint64_t read_original_sector(struct dm_writecache *wc, struct wc_entry *e) 36648debafeSMikulas Patocka { 36748debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 36848debafeSMikulas Patocka return e->original_sector; 36948debafeSMikulas Patocka #else 37048debafeSMikulas Patocka return le64_to_cpu(memory_entry(wc, e)->original_sector); 37148debafeSMikulas Patocka #endif 37248debafeSMikulas Patocka } 37348debafeSMikulas Patocka 37448debafeSMikulas Patocka static uint64_t read_seq_count(struct dm_writecache *wc, struct wc_entry *e) 37548debafeSMikulas Patocka { 37648debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 37748debafeSMikulas Patocka return e->seq_count; 37848debafeSMikulas Patocka #else 37948debafeSMikulas Patocka return le64_to_cpu(memory_entry(wc, e)->seq_count); 38048debafeSMikulas Patocka #endif 38148debafeSMikulas Patocka } 38248debafeSMikulas Patocka 38348debafeSMikulas Patocka static void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e) 38448debafeSMikulas Patocka { 38548debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 38648debafeSMikulas Patocka e->seq_count = -1; 38748debafeSMikulas Patocka #endif 38848debafeSMikulas Patocka pmem_assign(memory_entry(wc, e)->seq_count, cpu_to_le64(-1)); 38948debafeSMikulas Patocka } 39048debafeSMikulas Patocka 39148debafeSMikulas Patocka static void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e, 39248debafeSMikulas Patocka uint64_t original_sector, uint64_t seq_count) 39348debafeSMikulas Patocka { 39448debafeSMikulas Patocka struct wc_memory_entry me; 39548debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 39648debafeSMikulas Patocka e->original_sector = original_sector; 39748debafeSMikulas Patocka e->seq_count = seq_count; 39848debafeSMikulas Patocka #endif 39948debafeSMikulas Patocka me.original_sector = cpu_to_le64(original_sector); 40048debafeSMikulas Patocka me.seq_count = cpu_to_le64(seq_count); 40148debafeSMikulas Patocka pmem_assign(*memory_entry(wc, e), me); 40248debafeSMikulas Patocka } 40348debafeSMikulas Patocka 40448debafeSMikulas Patocka #define writecache_error(wc, err, msg, arg...) \ 40548debafeSMikulas Patocka do { \ 40648debafeSMikulas Patocka if (!cmpxchg(&(wc)->error, 0, err)) \ 40748debafeSMikulas Patocka DMERR(msg, ##arg); \ 40848debafeSMikulas Patocka wake_up(&(wc)->freelist_wait); \ 40948debafeSMikulas Patocka } while (0) 41048debafeSMikulas Patocka 41148debafeSMikulas Patocka #define writecache_has_error(wc) (unlikely(READ_ONCE((wc)->error))) 41248debafeSMikulas Patocka 41348debafeSMikulas Patocka static void writecache_flush_all_metadata(struct dm_writecache *wc) 41448debafeSMikulas Patocka { 41548debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) 41648debafeSMikulas Patocka memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size); 41748debafeSMikulas Patocka } 41848debafeSMikulas Patocka 41948debafeSMikulas Patocka static void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size) 42048debafeSMikulas Patocka { 42148debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) 42248debafeSMikulas Patocka __set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY, 42348debafeSMikulas Patocka wc->dirty_bitmap); 42448debafeSMikulas Patocka } 42548debafeSMikulas Patocka 42648debafeSMikulas Patocka static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev); 42748debafeSMikulas Patocka 42848debafeSMikulas Patocka struct io_notify { 42948debafeSMikulas Patocka struct dm_writecache *wc; 43048debafeSMikulas Patocka struct completion c; 43148debafeSMikulas Patocka atomic_t count; 43248debafeSMikulas Patocka }; 43348debafeSMikulas Patocka 43448debafeSMikulas Patocka static void writecache_notify_io(unsigned long error, void *context) 43548debafeSMikulas Patocka { 43648debafeSMikulas Patocka struct io_notify *endio = context; 43748debafeSMikulas Patocka 43848debafeSMikulas Patocka if (unlikely(error != 0)) 43948debafeSMikulas Patocka writecache_error(endio->wc, -EIO, "error writing metadata"); 44048debafeSMikulas Patocka BUG_ON(atomic_read(&endio->count) <= 0); 44148debafeSMikulas Patocka if (atomic_dec_and_test(&endio->count)) 44248debafeSMikulas Patocka complete(&endio->c); 44348debafeSMikulas Patocka } 44448debafeSMikulas Patocka 445aa950920SMikulas Patocka static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) 446aa950920SMikulas Patocka { 447aa950920SMikulas Patocka wait_event(wc->bio_in_progress_wait[direction], 448aa950920SMikulas Patocka !atomic_read(&wc->bio_in_progress[direction])); 449aa950920SMikulas Patocka } 450aa950920SMikulas Patocka 451aa950920SMikulas Patocka static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 45248debafeSMikulas Patocka { 45348debafeSMikulas Patocka struct dm_io_region region; 45448debafeSMikulas Patocka struct dm_io_request req; 45548debafeSMikulas Patocka struct io_notify endio = { 45648debafeSMikulas Patocka wc, 45748debafeSMikulas Patocka COMPLETION_INITIALIZER_ONSTACK(endio.c), 45848debafeSMikulas Patocka ATOMIC_INIT(1), 45948debafeSMikulas Patocka }; 4601e1132eaSMikulas Patocka unsigned bitmap_bits = wc->dirty_bitmap_size * 8; 46148debafeSMikulas Patocka unsigned i = 0; 46248debafeSMikulas Patocka 46348debafeSMikulas Patocka while (1) { 46448debafeSMikulas Patocka unsigned j; 46548debafeSMikulas Patocka i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i); 46648debafeSMikulas Patocka if (unlikely(i == bitmap_bits)) 46748debafeSMikulas Patocka break; 46848debafeSMikulas Patocka j = find_next_zero_bit(wc->dirty_bitmap, bitmap_bits, i); 46948debafeSMikulas Patocka 47048debafeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 47148debafeSMikulas Patocka region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 47248debafeSMikulas Patocka region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 47348debafeSMikulas Patocka 47448debafeSMikulas Patocka if (unlikely(region.sector >= wc->metadata_sectors)) 47548debafeSMikulas Patocka break; 47648debafeSMikulas Patocka if (unlikely(region.sector + region.count > wc->metadata_sectors)) 47748debafeSMikulas Patocka region.count = wc->metadata_sectors - region.sector; 47848debafeSMikulas Patocka 479d284f824SMikulas Patocka region.sector += wc->start_sector; 48048debafeSMikulas Patocka atomic_inc(&endio.count); 48148debafeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 48248debafeSMikulas Patocka req.bi_op_flags = REQ_SYNC; 48348debafeSMikulas Patocka req.mem.type = DM_IO_VMA; 48448debafeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY; 48548debafeSMikulas Patocka req.client = wc->dm_io; 48648debafeSMikulas Patocka req.notify.fn = writecache_notify_io; 48748debafeSMikulas Patocka req.notify.context = &endio; 48848debafeSMikulas Patocka 48948debafeSMikulas Patocka /* writing via async dm-io (implied by notify.fn above) won't return an error */ 49048debafeSMikulas Patocka (void) dm_io(&req, 1, ®ion, NULL); 49148debafeSMikulas Patocka i = j; 49248debafeSMikulas Patocka } 49348debafeSMikulas Patocka 49448debafeSMikulas Patocka writecache_notify_io(0, &endio); 49548debafeSMikulas Patocka wait_for_completion_io(&endio.c); 49648debafeSMikulas Patocka 497aa950920SMikulas Patocka if (wait_for_ios) 498aa950920SMikulas Patocka writecache_wait_for_ios(wc, WRITE); 499aa950920SMikulas Patocka 50048debafeSMikulas Patocka writecache_disk_flush(wc, wc->ssd_dev); 50148debafeSMikulas Patocka 50248debafeSMikulas Patocka memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); 50348debafeSMikulas Patocka } 50448debafeSMikulas Patocka 505aa950920SMikulas Patocka static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 50648debafeSMikulas Patocka { 50748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 50848debafeSMikulas Patocka wmb(); 50948debafeSMikulas Patocka else 510aa950920SMikulas Patocka ssd_commit_flushed(wc, wait_for_ios); 51148debafeSMikulas Patocka } 51248debafeSMikulas Patocka 51348debafeSMikulas Patocka static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) 51448debafeSMikulas Patocka { 51548debafeSMikulas Patocka int r; 51648debafeSMikulas Patocka struct dm_io_region region; 51748debafeSMikulas Patocka struct dm_io_request req; 51848debafeSMikulas Patocka 51948debafeSMikulas Patocka region.bdev = dev->bdev; 52048debafeSMikulas Patocka region.sector = 0; 52148debafeSMikulas Patocka region.count = 0; 52248debafeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 52348debafeSMikulas Patocka req.bi_op_flags = REQ_PREFLUSH; 52448debafeSMikulas Patocka req.mem.type = DM_IO_KMEM; 52548debafeSMikulas Patocka req.mem.ptr.addr = NULL; 52648debafeSMikulas Patocka req.client = wc->dm_io; 52748debafeSMikulas Patocka req.notify.fn = NULL; 52848debafeSMikulas Patocka 52948debafeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 53048debafeSMikulas Patocka if (unlikely(r)) 53148debafeSMikulas Patocka writecache_error(wc, r, "error flushing metadata: %d", r); 53248debafeSMikulas Patocka } 53348debafeSMikulas Patocka 53448debafeSMikulas Patocka #define WFE_RETURN_FOLLOWING 1 53548debafeSMikulas Patocka #define WFE_LOWEST_SEQ 2 53648debafeSMikulas Patocka 53748debafeSMikulas Patocka static struct wc_entry *writecache_find_entry(struct dm_writecache *wc, 53848debafeSMikulas Patocka uint64_t block, int flags) 53948debafeSMikulas Patocka { 54048debafeSMikulas Patocka struct wc_entry *e; 54148debafeSMikulas Patocka struct rb_node *node = wc->tree.rb_node; 54248debafeSMikulas Patocka 54348debafeSMikulas Patocka if (unlikely(!node)) 54448debafeSMikulas Patocka return NULL; 54548debafeSMikulas Patocka 54648debafeSMikulas Patocka while (1) { 54748debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 54848debafeSMikulas Patocka if (read_original_sector(wc, e) == block) 54948debafeSMikulas Patocka break; 550f8011d33SMikulas Patocka 55148debafeSMikulas Patocka node = (read_original_sector(wc, e) >= block ? 55248debafeSMikulas Patocka e->rb_node.rb_left : e->rb_node.rb_right); 55348debafeSMikulas Patocka if (unlikely(!node)) { 554f8011d33SMikulas Patocka if (!(flags & WFE_RETURN_FOLLOWING)) 55548debafeSMikulas Patocka return NULL; 55648debafeSMikulas Patocka if (read_original_sector(wc, e) >= block) { 557f8011d33SMikulas Patocka return e; 55848debafeSMikulas Patocka } else { 55948debafeSMikulas Patocka node = rb_next(&e->rb_node); 560f8011d33SMikulas Patocka if (unlikely(!node)) 56148debafeSMikulas Patocka return NULL; 56248debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 563f8011d33SMikulas Patocka return e; 56448debafeSMikulas Patocka } 56548debafeSMikulas Patocka } 56648debafeSMikulas Patocka } 56748debafeSMikulas Patocka 56848debafeSMikulas Patocka while (1) { 56948debafeSMikulas Patocka struct wc_entry *e2; 57048debafeSMikulas Patocka if (flags & WFE_LOWEST_SEQ) 57148debafeSMikulas Patocka node = rb_prev(&e->rb_node); 57248debafeSMikulas Patocka else 57348debafeSMikulas Patocka node = rb_next(&e->rb_node); 57484420b1eSHuaisheng Ye if (unlikely(!node)) 57548debafeSMikulas Patocka return e; 57648debafeSMikulas Patocka e2 = container_of(node, struct wc_entry, rb_node); 57748debafeSMikulas Patocka if (read_original_sector(wc, e2) != block) 57848debafeSMikulas Patocka return e; 57948debafeSMikulas Patocka e = e2; 58048debafeSMikulas Patocka } 58148debafeSMikulas Patocka } 58248debafeSMikulas Patocka 58348debafeSMikulas Patocka static void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *ins) 58448debafeSMikulas Patocka { 58548debafeSMikulas Patocka struct wc_entry *e; 58648debafeSMikulas Patocka struct rb_node **node = &wc->tree.rb_node, *parent = NULL; 58748debafeSMikulas Patocka 58848debafeSMikulas Patocka while (*node) { 58948debafeSMikulas Patocka e = container_of(*node, struct wc_entry, rb_node); 59048debafeSMikulas Patocka parent = &e->rb_node; 59148debafeSMikulas Patocka if (read_original_sector(wc, e) > read_original_sector(wc, ins)) 59248debafeSMikulas Patocka node = &parent->rb_left; 59348debafeSMikulas Patocka else 59448debafeSMikulas Patocka node = &parent->rb_right; 59548debafeSMikulas Patocka } 59648debafeSMikulas Patocka rb_link_node(&ins->rb_node, parent, node); 59748debafeSMikulas Patocka rb_insert_color(&ins->rb_node, &wc->tree); 59848debafeSMikulas Patocka list_add(&ins->lru, &wc->lru); 59948debafeSMikulas Patocka } 60048debafeSMikulas Patocka 60148debafeSMikulas Patocka static void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e) 60248debafeSMikulas Patocka { 60348debafeSMikulas Patocka list_del(&e->lru); 60448debafeSMikulas Patocka rb_erase(&e->rb_node, &wc->tree); 60548debafeSMikulas Patocka } 60648debafeSMikulas Patocka 60748debafeSMikulas Patocka static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry *e) 60848debafeSMikulas Patocka { 60948debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 61048debafeSMikulas Patocka struct rb_node **node = &wc->freetree.rb_node, *parent = NULL; 61148debafeSMikulas Patocka if (unlikely(!*node)) 61248debafeSMikulas Patocka wc->current_free = e; 61348debafeSMikulas Patocka while (*node) { 61448debafeSMikulas Patocka parent = *node; 61548debafeSMikulas Patocka if (&e->rb_node < *node) 61648debafeSMikulas Patocka node = &parent->rb_left; 61748debafeSMikulas Patocka else 61848debafeSMikulas Patocka node = &parent->rb_right; 61948debafeSMikulas Patocka } 62048debafeSMikulas Patocka rb_link_node(&e->rb_node, parent, node); 62148debafeSMikulas Patocka rb_insert_color(&e->rb_node, &wc->freetree); 62248debafeSMikulas Patocka } else { 62348debafeSMikulas Patocka list_add_tail(&e->lru, &wc->freelist); 62448debafeSMikulas Patocka } 62548debafeSMikulas Patocka wc->freelist_size++; 62648debafeSMikulas Patocka } 62748debafeSMikulas Patocka 628dcd19507SMikulas Patocka static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector) 62948debafeSMikulas Patocka { 63048debafeSMikulas Patocka struct wc_entry *e; 63148debafeSMikulas Patocka 63248debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 63348debafeSMikulas Patocka struct rb_node *next; 63448debafeSMikulas Patocka if (unlikely(!wc->current_free)) 63548debafeSMikulas Patocka return NULL; 63648debafeSMikulas Patocka e = wc->current_free; 637dcd19507SMikulas Patocka if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 638dcd19507SMikulas Patocka return NULL; 63948debafeSMikulas Patocka next = rb_next(&e->rb_node); 64048debafeSMikulas Patocka rb_erase(&e->rb_node, &wc->freetree); 64148debafeSMikulas Patocka if (unlikely(!next)) 64248debafeSMikulas Patocka next = rb_first(&wc->freetree); 64348debafeSMikulas Patocka wc->current_free = next ? container_of(next, struct wc_entry, rb_node) : NULL; 64448debafeSMikulas Patocka } else { 64548debafeSMikulas Patocka if (unlikely(list_empty(&wc->freelist))) 64648debafeSMikulas Patocka return NULL; 64748debafeSMikulas Patocka e = container_of(wc->freelist.next, struct wc_entry, lru); 648dcd19507SMikulas Patocka if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 649dcd19507SMikulas Patocka return NULL; 65048debafeSMikulas Patocka list_del(&e->lru); 65148debafeSMikulas Patocka } 65248debafeSMikulas Patocka wc->freelist_size--; 65348debafeSMikulas Patocka if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) 65448debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 65548debafeSMikulas Patocka 65648debafeSMikulas Patocka return e; 65748debafeSMikulas Patocka } 65848debafeSMikulas Patocka 65948debafeSMikulas Patocka static void writecache_free_entry(struct dm_writecache *wc, struct wc_entry *e) 66048debafeSMikulas Patocka { 66148debafeSMikulas Patocka writecache_unlink(wc, e); 66248debafeSMikulas Patocka writecache_add_to_freelist(wc, e); 66348debafeSMikulas Patocka clear_seq_count(wc, e); 66448debafeSMikulas Patocka writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 66548debafeSMikulas Patocka if (unlikely(waitqueue_active(&wc->freelist_wait))) 66648debafeSMikulas Patocka wake_up(&wc->freelist_wait); 66748debafeSMikulas Patocka } 66848debafeSMikulas Patocka 66948debafeSMikulas Patocka static void writecache_wait_on_freelist(struct dm_writecache *wc) 67048debafeSMikulas Patocka { 67148debafeSMikulas Patocka DEFINE_WAIT(wait); 67248debafeSMikulas Patocka 67348debafeSMikulas Patocka prepare_to_wait(&wc->freelist_wait, &wait, TASK_UNINTERRUPTIBLE); 67448debafeSMikulas Patocka wc_unlock(wc); 67548debafeSMikulas Patocka io_schedule(); 67648debafeSMikulas Patocka finish_wait(&wc->freelist_wait, &wait); 67748debafeSMikulas Patocka wc_lock(wc); 67848debafeSMikulas Patocka } 67948debafeSMikulas Patocka 68048debafeSMikulas Patocka static void writecache_poison_lists(struct dm_writecache *wc) 68148debafeSMikulas Patocka { 68248debafeSMikulas Patocka /* 68348debafeSMikulas Patocka * Catch incorrect access to these values while the device is suspended. 68448debafeSMikulas Patocka */ 68548debafeSMikulas Patocka memset(&wc->tree, -1, sizeof wc->tree); 68648debafeSMikulas Patocka wc->lru.next = LIST_POISON1; 68748debafeSMikulas Patocka wc->lru.prev = LIST_POISON2; 68848debafeSMikulas Patocka wc->freelist.next = LIST_POISON1; 68948debafeSMikulas Patocka wc->freelist.prev = LIST_POISON2; 69048debafeSMikulas Patocka } 69148debafeSMikulas Patocka 69248debafeSMikulas Patocka static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e) 69348debafeSMikulas Patocka { 69448debafeSMikulas Patocka writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 69548debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 69648debafeSMikulas Patocka writecache_flush_region(wc, memory_data(wc, e), wc->block_size); 69748debafeSMikulas Patocka } 69848debafeSMikulas Patocka 69948debafeSMikulas Patocka static bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e) 70048debafeSMikulas Patocka { 70148debafeSMikulas Patocka return read_seq_count(wc, e) < wc->seq_count; 70248debafeSMikulas Patocka } 70348debafeSMikulas Patocka 70448debafeSMikulas Patocka static void writecache_flush(struct dm_writecache *wc) 70548debafeSMikulas Patocka { 70648debafeSMikulas Patocka struct wc_entry *e, *e2; 70748debafeSMikulas Patocka bool need_flush_after_free; 70848debafeSMikulas Patocka 70948debafeSMikulas Patocka wc->uncommitted_blocks = 0; 71048debafeSMikulas Patocka del_timer(&wc->autocommit_timer); 71148debafeSMikulas Patocka 71248debafeSMikulas Patocka if (list_empty(&wc->lru)) 71348debafeSMikulas Patocka return; 71448debafeSMikulas Patocka 71548debafeSMikulas Patocka e = container_of(wc->lru.next, struct wc_entry, lru); 71648debafeSMikulas Patocka if (writecache_entry_is_committed(wc, e)) { 71748debafeSMikulas Patocka if (wc->overwrote_committed) { 71848debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 71948debafeSMikulas Patocka writecache_disk_flush(wc, wc->ssd_dev); 72048debafeSMikulas Patocka wc->overwrote_committed = false; 72148debafeSMikulas Patocka } 72248debafeSMikulas Patocka return; 72348debafeSMikulas Patocka } 72448debafeSMikulas Patocka while (1) { 72548debafeSMikulas Patocka writecache_flush_entry(wc, e); 72648debafeSMikulas Patocka if (unlikely(e->lru.next == &wc->lru)) 72748debafeSMikulas Patocka break; 72848debafeSMikulas Patocka e2 = container_of(e->lru.next, struct wc_entry, lru); 72948debafeSMikulas Patocka if (writecache_entry_is_committed(wc, e2)) 73048debafeSMikulas Patocka break; 73148debafeSMikulas Patocka e = e2; 73248debafeSMikulas Patocka cond_resched(); 73348debafeSMikulas Patocka } 734aa950920SMikulas Patocka writecache_commit_flushed(wc, true); 73548debafeSMikulas Patocka 73648debafeSMikulas Patocka wc->seq_count++; 73748debafeSMikulas Patocka pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); 73848debafeSMikulas Patocka writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count); 739aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 74048debafeSMikulas Patocka 74148debafeSMikulas Patocka wc->overwrote_committed = false; 74248debafeSMikulas Patocka 74348debafeSMikulas Patocka need_flush_after_free = false; 74448debafeSMikulas Patocka while (1) { 74548debafeSMikulas Patocka /* Free another committed entry with lower seq-count */ 74648debafeSMikulas Patocka struct rb_node *rb_node = rb_prev(&e->rb_node); 74748debafeSMikulas Patocka 74848debafeSMikulas Patocka if (rb_node) { 74948debafeSMikulas Patocka e2 = container_of(rb_node, struct wc_entry, rb_node); 75048debafeSMikulas Patocka if (read_original_sector(wc, e2) == read_original_sector(wc, e) && 75148debafeSMikulas Patocka likely(!e2->write_in_progress)) { 75248debafeSMikulas Patocka writecache_free_entry(wc, e2); 75348debafeSMikulas Patocka need_flush_after_free = true; 75448debafeSMikulas Patocka } 75548debafeSMikulas Patocka } 75648debafeSMikulas Patocka if (unlikely(e->lru.prev == &wc->lru)) 75748debafeSMikulas Patocka break; 75848debafeSMikulas Patocka e = container_of(e->lru.prev, struct wc_entry, lru); 75948debafeSMikulas Patocka cond_resched(); 76048debafeSMikulas Patocka } 76148debafeSMikulas Patocka 76248debafeSMikulas Patocka if (need_flush_after_free) 763aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 76448debafeSMikulas Patocka } 76548debafeSMikulas Patocka 76648debafeSMikulas Patocka static void writecache_flush_work(struct work_struct *work) 76748debafeSMikulas Patocka { 76848debafeSMikulas Patocka struct dm_writecache *wc = container_of(work, struct dm_writecache, flush_work); 76948debafeSMikulas Patocka 77048debafeSMikulas Patocka wc_lock(wc); 77148debafeSMikulas Patocka writecache_flush(wc); 77248debafeSMikulas Patocka wc_unlock(wc); 77348debafeSMikulas Patocka } 77448debafeSMikulas Patocka 77548debafeSMikulas Patocka static void writecache_autocommit_timer(struct timer_list *t) 77648debafeSMikulas Patocka { 77748debafeSMikulas Patocka struct dm_writecache *wc = from_timer(wc, t, autocommit_timer); 77848debafeSMikulas Patocka if (!writecache_has_error(wc)) 77948debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->flush_work); 78048debafeSMikulas Patocka } 78148debafeSMikulas Patocka 78248debafeSMikulas Patocka static void writecache_schedule_autocommit(struct dm_writecache *wc) 78348debafeSMikulas Patocka { 78448debafeSMikulas Patocka if (!timer_pending(&wc->autocommit_timer)) 78548debafeSMikulas Patocka mod_timer(&wc->autocommit_timer, jiffies + wc->autocommit_jiffies); 78648debafeSMikulas Patocka } 78748debafeSMikulas Patocka 78848debafeSMikulas Patocka static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_t end) 78948debafeSMikulas Patocka { 79048debafeSMikulas Patocka struct wc_entry *e; 79148debafeSMikulas Patocka bool discarded_something = false; 79248debafeSMikulas Patocka 79348debafeSMikulas Patocka e = writecache_find_entry(wc, start, WFE_RETURN_FOLLOWING | WFE_LOWEST_SEQ); 79448debafeSMikulas Patocka if (unlikely(!e)) 79548debafeSMikulas Patocka return; 79648debafeSMikulas Patocka 79748debafeSMikulas Patocka while (read_original_sector(wc, e) < end) { 79848debafeSMikulas Patocka struct rb_node *node = rb_next(&e->rb_node); 79948debafeSMikulas Patocka 80048debafeSMikulas Patocka if (likely(!e->write_in_progress)) { 80148debafeSMikulas Patocka if (!discarded_something) { 80248debafeSMikulas Patocka writecache_wait_for_ios(wc, READ); 80348debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 80448debafeSMikulas Patocka discarded_something = true; 80548debafeSMikulas Patocka } 80648debafeSMikulas Patocka writecache_free_entry(wc, e); 80748debafeSMikulas Patocka } 80848debafeSMikulas Patocka 80984420b1eSHuaisheng Ye if (unlikely(!node)) 81048debafeSMikulas Patocka break; 81148debafeSMikulas Patocka 81248debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 81348debafeSMikulas Patocka } 81448debafeSMikulas Patocka 81548debafeSMikulas Patocka if (discarded_something) 816aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 81748debafeSMikulas Patocka } 81848debafeSMikulas Patocka 81948debafeSMikulas Patocka static bool writecache_wait_for_writeback(struct dm_writecache *wc) 82048debafeSMikulas Patocka { 82148debafeSMikulas Patocka if (wc->writeback_size) { 82248debafeSMikulas Patocka writecache_wait_on_freelist(wc); 82348debafeSMikulas Patocka return true; 82448debafeSMikulas Patocka } 82548debafeSMikulas Patocka return false; 82648debafeSMikulas Patocka } 82748debafeSMikulas Patocka 82848debafeSMikulas Patocka static void writecache_suspend(struct dm_target *ti) 82948debafeSMikulas Patocka { 83048debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 83148debafeSMikulas Patocka bool flush_on_suspend; 83248debafeSMikulas Patocka 83348debafeSMikulas Patocka del_timer_sync(&wc->autocommit_timer); 83448debafeSMikulas Patocka 83548debafeSMikulas Patocka wc_lock(wc); 83648debafeSMikulas Patocka writecache_flush(wc); 83748debafeSMikulas Patocka flush_on_suspend = wc->flush_on_suspend; 83848debafeSMikulas Patocka if (flush_on_suspend) { 83948debafeSMikulas Patocka wc->flush_on_suspend = false; 84048debafeSMikulas Patocka wc->writeback_all++; 84148debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 84248debafeSMikulas Patocka } 84348debafeSMikulas Patocka wc_unlock(wc); 84448debafeSMikulas Patocka 845*adc0daadSMikulas Patocka drain_workqueue(wc->writeback_wq); 84648debafeSMikulas Patocka 84748debafeSMikulas Patocka wc_lock(wc); 84848debafeSMikulas Patocka if (flush_on_suspend) 84948debafeSMikulas Patocka wc->writeback_all--; 85048debafeSMikulas Patocka while (writecache_wait_for_writeback(wc)); 85148debafeSMikulas Patocka 85248debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 85348debafeSMikulas Patocka persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 85448debafeSMikulas Patocka 85548debafeSMikulas Patocka writecache_poison_lists(wc); 85648debafeSMikulas Patocka 85748debafeSMikulas Patocka wc_unlock(wc); 85848debafeSMikulas Patocka } 85948debafeSMikulas Patocka 86048debafeSMikulas Patocka static int writecache_alloc_entries(struct dm_writecache *wc) 86148debafeSMikulas Patocka { 86248debafeSMikulas Patocka size_t b; 86348debafeSMikulas Patocka 86448debafeSMikulas Patocka if (wc->entries) 86548debafeSMikulas Patocka return 0; 86650a7d3baSKees Cook wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks)); 86748debafeSMikulas Patocka if (!wc->entries) 86848debafeSMikulas Patocka return -ENOMEM; 86948debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 87048debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 87148debafeSMikulas Patocka e->index = b; 87248debafeSMikulas Patocka e->write_in_progress = false; 87348debafeSMikulas Patocka } 87448debafeSMikulas Patocka 87548debafeSMikulas Patocka return 0; 87648debafeSMikulas Patocka } 87748debafeSMikulas Patocka 87848debafeSMikulas Patocka static void writecache_resume(struct dm_target *ti) 87948debafeSMikulas Patocka { 88048debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 88148debafeSMikulas Patocka size_t b; 88248debafeSMikulas Patocka bool need_flush = false; 88348debafeSMikulas Patocka __le64 sb_seq_count; 88448debafeSMikulas Patocka int r; 88548debafeSMikulas Patocka 88648debafeSMikulas Patocka wc_lock(wc); 88748debafeSMikulas Patocka 88848debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 88948debafeSMikulas Patocka persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); 89048debafeSMikulas Patocka 89148debafeSMikulas Patocka wc->tree = RB_ROOT; 89248debafeSMikulas Patocka INIT_LIST_HEAD(&wc->lru); 89348debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 89448debafeSMikulas Patocka wc->freetree = RB_ROOT; 89548debafeSMikulas Patocka wc->current_free = NULL; 89648debafeSMikulas Patocka } else { 89748debafeSMikulas Patocka INIT_LIST_HEAD(&wc->freelist); 89848debafeSMikulas Patocka } 89948debafeSMikulas Patocka wc->freelist_size = 0; 90048debafeSMikulas Patocka 90148debafeSMikulas Patocka r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t)); 90248debafeSMikulas Patocka if (r) { 90348debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading superblock: %d", r); 90448debafeSMikulas Patocka sb_seq_count = cpu_to_le64(0); 90548debafeSMikulas Patocka } 90648debafeSMikulas Patocka wc->seq_count = le64_to_cpu(sb_seq_count); 90748debafeSMikulas Patocka 90848debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 90948debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 91048debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 91148debafeSMikulas Patocka struct wc_memory_entry wme; 91248debafeSMikulas Patocka if (writecache_has_error(wc)) { 91348debafeSMikulas Patocka e->original_sector = -1; 91448debafeSMikulas Patocka e->seq_count = -1; 91548debafeSMikulas Patocka continue; 91648debafeSMikulas Patocka } 91748debafeSMikulas Patocka r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 91848debafeSMikulas Patocka if (r) { 91948debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", 92048debafeSMikulas Patocka (unsigned long)b, r); 92148debafeSMikulas Patocka e->original_sector = -1; 92248debafeSMikulas Patocka e->seq_count = -1; 92348debafeSMikulas Patocka } else { 92448debafeSMikulas Patocka e->original_sector = le64_to_cpu(wme.original_sector); 92548debafeSMikulas Patocka e->seq_count = le64_to_cpu(wme.seq_count); 92648debafeSMikulas Patocka } 92748debafeSMikulas Patocka } 92848debafeSMikulas Patocka #endif 92948debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 93048debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 93148debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) { 93248debafeSMikulas Patocka if (read_seq_count(wc, e) != -1) { 93348debafeSMikulas Patocka erase_this: 93448debafeSMikulas Patocka clear_seq_count(wc, e); 93548debafeSMikulas Patocka need_flush = true; 93648debafeSMikulas Patocka } 93748debafeSMikulas Patocka writecache_add_to_freelist(wc, e); 93848debafeSMikulas Patocka } else { 93948debafeSMikulas Patocka struct wc_entry *old; 94048debafeSMikulas Patocka 94148debafeSMikulas Patocka old = writecache_find_entry(wc, read_original_sector(wc, e), 0); 94248debafeSMikulas Patocka if (!old) { 94348debafeSMikulas Patocka writecache_insert_entry(wc, e); 94448debafeSMikulas Patocka } else { 94548debafeSMikulas Patocka if (read_seq_count(wc, old) == read_seq_count(wc, e)) { 94648debafeSMikulas Patocka writecache_error(wc, -EINVAL, 94748debafeSMikulas Patocka "two identical entries, position %llu, sector %llu, sequence %llu", 94848debafeSMikulas Patocka (unsigned long long)b, (unsigned long long)read_original_sector(wc, e), 94948debafeSMikulas Patocka (unsigned long long)read_seq_count(wc, e)); 95048debafeSMikulas Patocka } 95148debafeSMikulas Patocka if (read_seq_count(wc, old) > read_seq_count(wc, e)) { 95248debafeSMikulas Patocka goto erase_this; 95348debafeSMikulas Patocka } else { 95448debafeSMikulas Patocka writecache_free_entry(wc, old); 95548debafeSMikulas Patocka writecache_insert_entry(wc, e); 95648debafeSMikulas Patocka need_flush = true; 95748debafeSMikulas Patocka } 95848debafeSMikulas Patocka } 95948debafeSMikulas Patocka } 96048debafeSMikulas Patocka cond_resched(); 96148debafeSMikulas Patocka } 96248debafeSMikulas Patocka 96348debafeSMikulas Patocka if (need_flush) { 96448debafeSMikulas Patocka writecache_flush_all_metadata(wc); 965aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 96648debafeSMikulas Patocka } 96748debafeSMikulas Patocka 96848debafeSMikulas Patocka wc_unlock(wc); 96948debafeSMikulas Patocka } 97048debafeSMikulas Patocka 97148debafeSMikulas Patocka static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 97248debafeSMikulas Patocka { 97348debafeSMikulas Patocka if (argc != 1) 97448debafeSMikulas Patocka return -EINVAL; 97548debafeSMikulas Patocka 97648debafeSMikulas Patocka wc_lock(wc); 97748debafeSMikulas Patocka if (dm_suspended(wc->ti)) { 97848debafeSMikulas Patocka wc_unlock(wc); 97948debafeSMikulas Patocka return -EBUSY; 98048debafeSMikulas Patocka } 98148debafeSMikulas Patocka if (writecache_has_error(wc)) { 98248debafeSMikulas Patocka wc_unlock(wc); 98348debafeSMikulas Patocka return -EIO; 98448debafeSMikulas Patocka } 98548debafeSMikulas Patocka 98648debafeSMikulas Patocka writecache_flush(wc); 98748debafeSMikulas Patocka wc->writeback_all++; 98848debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 98948debafeSMikulas Patocka wc_unlock(wc); 99048debafeSMikulas Patocka 99148debafeSMikulas Patocka flush_workqueue(wc->writeback_wq); 99248debafeSMikulas Patocka 99348debafeSMikulas Patocka wc_lock(wc); 99448debafeSMikulas Patocka wc->writeback_all--; 99548debafeSMikulas Patocka if (writecache_has_error(wc)) { 99648debafeSMikulas Patocka wc_unlock(wc); 99748debafeSMikulas Patocka return -EIO; 99848debafeSMikulas Patocka } 99948debafeSMikulas Patocka wc_unlock(wc); 100048debafeSMikulas Patocka 100148debafeSMikulas Patocka return 0; 100248debafeSMikulas Patocka } 100348debafeSMikulas Patocka 100448debafeSMikulas Patocka static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 100548debafeSMikulas Patocka { 100648debafeSMikulas Patocka if (argc != 1) 100748debafeSMikulas Patocka return -EINVAL; 100848debafeSMikulas Patocka 100948debafeSMikulas Patocka wc_lock(wc); 101048debafeSMikulas Patocka wc->flush_on_suspend = true; 101148debafeSMikulas Patocka wc_unlock(wc); 101248debafeSMikulas Patocka 101348debafeSMikulas Patocka return 0; 101448debafeSMikulas Patocka } 101548debafeSMikulas Patocka 101648debafeSMikulas Patocka static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, 101748debafeSMikulas Patocka char *result, unsigned maxlen) 101848debafeSMikulas Patocka { 101948debafeSMikulas Patocka int r = -EINVAL; 102048debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 102148debafeSMikulas Patocka 102248debafeSMikulas Patocka if (!strcasecmp(argv[0], "flush")) 102348debafeSMikulas Patocka r = process_flush_mesg(argc, argv, wc); 102448debafeSMikulas Patocka else if (!strcasecmp(argv[0], "flush_on_suspend")) 102548debafeSMikulas Patocka r = process_flush_on_suspend_mesg(argc, argv, wc); 102648debafeSMikulas Patocka else 102748debafeSMikulas Patocka DMERR("unrecognised message received: %s", argv[0]); 102848debafeSMikulas Patocka 102948debafeSMikulas Patocka return r; 103048debafeSMikulas Patocka } 103148debafeSMikulas Patocka 103248debafeSMikulas Patocka static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data) 103348debafeSMikulas Patocka { 103448debafeSMikulas Patocka void *buf; 103548debafeSMikulas Patocka unsigned long flags; 103648debafeSMikulas Patocka unsigned size; 103748debafeSMikulas Patocka int rw = bio_data_dir(bio); 103848debafeSMikulas Patocka unsigned remaining_size = wc->block_size; 103948debafeSMikulas Patocka 104048debafeSMikulas Patocka do { 104148debafeSMikulas Patocka struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); 104248debafeSMikulas Patocka buf = bvec_kmap_irq(&bv, &flags); 104348debafeSMikulas Patocka size = bv.bv_len; 104448debafeSMikulas Patocka if (unlikely(size > remaining_size)) 104548debafeSMikulas Patocka size = remaining_size; 104648debafeSMikulas Patocka 104748debafeSMikulas Patocka if (rw == READ) { 104848debafeSMikulas Patocka int r; 104948debafeSMikulas Patocka r = memcpy_mcsafe(buf, data, size); 105048debafeSMikulas Patocka flush_dcache_page(bio_page(bio)); 105148debafeSMikulas Patocka if (unlikely(r)) { 105248debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading data: %d", r); 105348debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 105448debafeSMikulas Patocka } 105548debafeSMikulas Patocka } else { 105648debafeSMikulas Patocka flush_dcache_page(bio_page(bio)); 105748debafeSMikulas Patocka memcpy_flushcache(data, buf, size); 105848debafeSMikulas Patocka } 105948debafeSMikulas Patocka 106048debafeSMikulas Patocka bvec_kunmap_irq(buf, &flags); 106148debafeSMikulas Patocka 106248debafeSMikulas Patocka data = (char *)data + size; 106348debafeSMikulas Patocka remaining_size -= size; 106448debafeSMikulas Patocka bio_advance(bio, size); 106548debafeSMikulas Patocka } while (unlikely(remaining_size)); 106648debafeSMikulas Patocka } 106748debafeSMikulas Patocka 106848debafeSMikulas Patocka static int writecache_flush_thread(void *data) 106948debafeSMikulas Patocka { 107048debafeSMikulas Patocka struct dm_writecache *wc = data; 107148debafeSMikulas Patocka 107248debafeSMikulas Patocka while (1) { 107348debafeSMikulas Patocka struct bio *bio; 107448debafeSMikulas Patocka 107548debafeSMikulas Patocka wc_lock(wc); 107648debafeSMikulas Patocka bio = bio_list_pop(&wc->flush_list); 107748debafeSMikulas Patocka if (!bio) { 107848debafeSMikulas Patocka set_current_state(TASK_INTERRUPTIBLE); 107948debafeSMikulas Patocka wc_unlock(wc); 108048debafeSMikulas Patocka 108148debafeSMikulas Patocka if (unlikely(kthread_should_stop())) { 108248debafeSMikulas Patocka set_current_state(TASK_RUNNING); 108348debafeSMikulas Patocka break; 108448debafeSMikulas Patocka } 108548debafeSMikulas Patocka 108648debafeSMikulas Patocka schedule(); 108748debafeSMikulas Patocka continue; 108848debafeSMikulas Patocka } 108948debafeSMikulas Patocka 109048debafeSMikulas Patocka if (bio_op(bio) == REQ_OP_DISCARD) { 109148debafeSMikulas Patocka writecache_discard(wc, bio->bi_iter.bi_sector, 109248debafeSMikulas Patocka bio_end_sector(bio)); 109348debafeSMikulas Patocka wc_unlock(wc); 109448debafeSMikulas Patocka bio_set_dev(bio, wc->dev->bdev); 109548debafeSMikulas Patocka generic_make_request(bio); 109648debafeSMikulas Patocka } else { 109748debafeSMikulas Patocka writecache_flush(wc); 109848debafeSMikulas Patocka wc_unlock(wc); 109948debafeSMikulas Patocka if (writecache_has_error(wc)) 110048debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 110148debafeSMikulas Patocka bio_endio(bio); 110248debafeSMikulas Patocka } 110348debafeSMikulas Patocka } 110448debafeSMikulas Patocka 110548debafeSMikulas Patocka return 0; 110648debafeSMikulas Patocka } 110748debafeSMikulas Patocka 110848debafeSMikulas Patocka static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio) 110948debafeSMikulas Patocka { 111048debafeSMikulas Patocka if (bio_list_empty(&wc->flush_list)) 111148debafeSMikulas Patocka wake_up_process(wc->flush_thread); 111248debafeSMikulas Patocka bio_list_add(&wc->flush_list, bio); 111348debafeSMikulas Patocka } 111448debafeSMikulas Patocka 111548debafeSMikulas Patocka static int writecache_map(struct dm_target *ti, struct bio *bio) 111648debafeSMikulas Patocka { 111748debafeSMikulas Patocka struct wc_entry *e; 111848debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 111948debafeSMikulas Patocka 112048debafeSMikulas Patocka bio->bi_private = NULL; 112148debafeSMikulas Patocka 112248debafeSMikulas Patocka wc_lock(wc); 112348debafeSMikulas Patocka 112448debafeSMikulas Patocka if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 112548debafeSMikulas Patocka if (writecache_has_error(wc)) 112648debafeSMikulas Patocka goto unlock_error; 112748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 112848debafeSMikulas Patocka writecache_flush(wc); 112948debafeSMikulas Patocka if (writecache_has_error(wc)) 113048debafeSMikulas Patocka goto unlock_error; 113148debafeSMikulas Patocka goto unlock_submit; 113248debafeSMikulas Patocka } else { 113348debafeSMikulas Patocka writecache_offload_bio(wc, bio); 113448debafeSMikulas Patocka goto unlock_return; 113548debafeSMikulas Patocka } 113648debafeSMikulas Patocka } 113748debafeSMikulas Patocka 113848debafeSMikulas Patocka bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 113948debafeSMikulas Patocka 114048debafeSMikulas Patocka if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & 114148debafeSMikulas Patocka (wc->block_size / 512 - 1)) != 0)) { 114248debafeSMikulas Patocka DMERR("I/O is not aligned, sector %llu, size %u, block size %u", 114348debafeSMikulas Patocka (unsigned long long)bio->bi_iter.bi_sector, 114448debafeSMikulas Patocka bio->bi_iter.bi_size, wc->block_size); 114548debafeSMikulas Patocka goto unlock_error; 114648debafeSMikulas Patocka } 114748debafeSMikulas Patocka 114848debafeSMikulas Patocka if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { 114948debafeSMikulas Patocka if (writecache_has_error(wc)) 115048debafeSMikulas Patocka goto unlock_error; 115148debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 115248debafeSMikulas Patocka writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); 115348debafeSMikulas Patocka goto unlock_remap_origin; 115448debafeSMikulas Patocka } else { 115548debafeSMikulas Patocka writecache_offload_bio(wc, bio); 115648debafeSMikulas Patocka goto unlock_return; 115748debafeSMikulas Patocka } 115848debafeSMikulas Patocka } 115948debafeSMikulas Patocka 116048debafeSMikulas Patocka if (bio_data_dir(bio) == READ) { 116148debafeSMikulas Patocka read_next_block: 116248debafeSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 116348debafeSMikulas Patocka if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { 116448debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 116548debafeSMikulas Patocka bio_copy_block(wc, bio, memory_data(wc, e)); 116648debafeSMikulas Patocka if (bio->bi_iter.bi_size) 116748debafeSMikulas Patocka goto read_next_block; 116848debafeSMikulas Patocka goto unlock_submit; 116948debafeSMikulas Patocka } else { 117048debafeSMikulas Patocka dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); 117148debafeSMikulas Patocka bio_set_dev(bio, wc->ssd_dev->bdev); 117248debafeSMikulas Patocka bio->bi_iter.bi_sector = cache_sector(wc, e); 117348debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) 117448debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 117548debafeSMikulas Patocka goto unlock_remap; 117648debafeSMikulas Patocka } 117748debafeSMikulas Patocka } else { 117848debafeSMikulas Patocka if (e) { 117948debafeSMikulas Patocka sector_t next_boundary = 118048debafeSMikulas Patocka read_original_sector(wc, e) - bio->bi_iter.bi_sector; 118148debafeSMikulas Patocka if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { 118248debafeSMikulas Patocka dm_accept_partial_bio(bio, next_boundary); 118348debafeSMikulas Patocka } 118448debafeSMikulas Patocka } 118548debafeSMikulas Patocka goto unlock_remap_origin; 118648debafeSMikulas Patocka } 118748debafeSMikulas Patocka } else { 118848debafeSMikulas Patocka do { 118948debafeSMikulas Patocka if (writecache_has_error(wc)) 119048debafeSMikulas Patocka goto unlock_error; 119148debafeSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); 119248debafeSMikulas Patocka if (e) { 119348debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) 119448debafeSMikulas Patocka goto bio_copy; 119548debafeSMikulas Patocka if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { 119648debafeSMikulas Patocka wc->overwrote_committed = true; 119748debafeSMikulas Patocka goto bio_copy; 119848debafeSMikulas Patocka } 119948debafeSMikulas Patocka } 1200dcd19507SMikulas Patocka e = writecache_pop_from_freelist(wc, (sector_t)-1); 120148debafeSMikulas Patocka if (unlikely(!e)) { 120248debafeSMikulas Patocka writecache_wait_on_freelist(wc); 120348debafeSMikulas Patocka continue; 120448debafeSMikulas Patocka } 120548debafeSMikulas Patocka write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); 120648debafeSMikulas Patocka writecache_insert_entry(wc, e); 120748debafeSMikulas Patocka wc->uncommitted_blocks++; 120848debafeSMikulas Patocka bio_copy: 120948debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 121048debafeSMikulas Patocka bio_copy_block(wc, bio, memory_data(wc, e)); 121148debafeSMikulas Patocka } else { 1212dcd19507SMikulas Patocka unsigned bio_size = wc->block_size; 1213dcd19507SMikulas Patocka sector_t start_cache_sec = cache_sector(wc, e); 1214dcd19507SMikulas Patocka sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); 1215dcd19507SMikulas Patocka 1216dcd19507SMikulas Patocka while (bio_size < bio->bi_iter.bi_size) { 1217dcd19507SMikulas Patocka struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); 1218dcd19507SMikulas Patocka if (!f) 1219dcd19507SMikulas Patocka break; 1220dcd19507SMikulas Patocka write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + 1221dcd19507SMikulas Patocka (bio_size >> SECTOR_SHIFT), wc->seq_count); 1222dcd19507SMikulas Patocka writecache_insert_entry(wc, f); 1223dcd19507SMikulas Patocka wc->uncommitted_blocks++; 1224dcd19507SMikulas Patocka bio_size += wc->block_size; 1225dcd19507SMikulas Patocka current_cache_sec += wc->block_size >> SECTOR_SHIFT; 1226dcd19507SMikulas Patocka } 1227dcd19507SMikulas Patocka 122848debafeSMikulas Patocka bio_set_dev(bio, wc->ssd_dev->bdev); 1229dcd19507SMikulas Patocka bio->bi_iter.bi_sector = start_cache_sec; 1230dcd19507SMikulas Patocka dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); 1231dcd19507SMikulas Patocka 123248debafeSMikulas Patocka if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { 123348debafeSMikulas Patocka wc->uncommitted_blocks = 0; 123448debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->flush_work); 123548debafeSMikulas Patocka } else { 123648debafeSMikulas Patocka writecache_schedule_autocommit(wc); 123748debafeSMikulas Patocka } 123848debafeSMikulas Patocka goto unlock_remap; 123948debafeSMikulas Patocka } 124048debafeSMikulas Patocka } while (bio->bi_iter.bi_size); 124148debafeSMikulas Patocka 1242c1005322SMaged Mokhtar if (unlikely(bio->bi_opf & REQ_FUA || 1243c1005322SMaged Mokhtar wc->uncommitted_blocks >= wc->autocommit_blocks)) 124448debafeSMikulas Patocka writecache_flush(wc); 124548debafeSMikulas Patocka else 124648debafeSMikulas Patocka writecache_schedule_autocommit(wc); 124748debafeSMikulas Patocka goto unlock_submit; 124848debafeSMikulas Patocka } 124948debafeSMikulas Patocka 125048debafeSMikulas Patocka unlock_remap_origin: 125148debafeSMikulas Patocka bio_set_dev(bio, wc->dev->bdev); 125248debafeSMikulas Patocka wc_unlock(wc); 125348debafeSMikulas Patocka return DM_MAPIO_REMAPPED; 125448debafeSMikulas Patocka 125548debafeSMikulas Patocka unlock_remap: 125648debafeSMikulas Patocka /* make sure that writecache_end_io decrements bio_in_progress: */ 125748debafeSMikulas Patocka bio->bi_private = (void *)1; 125848debafeSMikulas Patocka atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); 125948debafeSMikulas Patocka wc_unlock(wc); 126048debafeSMikulas Patocka return DM_MAPIO_REMAPPED; 126148debafeSMikulas Patocka 126248debafeSMikulas Patocka unlock_submit: 126348debafeSMikulas Patocka wc_unlock(wc); 126448debafeSMikulas Patocka bio_endio(bio); 126548debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 126648debafeSMikulas Patocka 126748debafeSMikulas Patocka unlock_return: 126848debafeSMikulas Patocka wc_unlock(wc); 126948debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 127048debafeSMikulas Patocka 127148debafeSMikulas Patocka unlock_error: 127248debafeSMikulas Patocka wc_unlock(wc); 127348debafeSMikulas Patocka bio_io_error(bio); 127448debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 127548debafeSMikulas Patocka } 127648debafeSMikulas Patocka 127748debafeSMikulas Patocka static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) 127848debafeSMikulas Patocka { 127948debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 128048debafeSMikulas Patocka 128148debafeSMikulas Patocka if (bio->bi_private != NULL) { 128248debafeSMikulas Patocka int dir = bio_data_dir(bio); 128348debafeSMikulas Patocka if (atomic_dec_and_test(&wc->bio_in_progress[dir])) 128448debafeSMikulas Patocka if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir]))) 128548debafeSMikulas Patocka wake_up(&wc->bio_in_progress_wait[dir]); 128648debafeSMikulas Patocka } 128748debafeSMikulas Patocka return 0; 128848debafeSMikulas Patocka } 128948debafeSMikulas Patocka 129048debafeSMikulas Patocka static int writecache_iterate_devices(struct dm_target *ti, 129148debafeSMikulas Patocka iterate_devices_callout_fn fn, void *data) 129248debafeSMikulas Patocka { 129348debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 129448debafeSMikulas Patocka 129548debafeSMikulas Patocka return fn(ti, wc->dev, 0, ti->len, data); 129648debafeSMikulas Patocka } 129748debafeSMikulas Patocka 129848debafeSMikulas Patocka static void writecache_io_hints(struct dm_target *ti, struct queue_limits *limits) 129948debafeSMikulas Patocka { 130048debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 130148debafeSMikulas Patocka 130248debafeSMikulas Patocka if (limits->logical_block_size < wc->block_size) 130348debafeSMikulas Patocka limits->logical_block_size = wc->block_size; 130448debafeSMikulas Patocka 130548debafeSMikulas Patocka if (limits->physical_block_size < wc->block_size) 130648debafeSMikulas Patocka limits->physical_block_size = wc->block_size; 130748debafeSMikulas Patocka 130848debafeSMikulas Patocka if (limits->io_min < wc->block_size) 130948debafeSMikulas Patocka limits->io_min = wc->block_size; 131048debafeSMikulas Patocka } 131148debafeSMikulas Patocka 131248debafeSMikulas Patocka 131348debafeSMikulas Patocka static void writecache_writeback_endio(struct bio *bio) 131448debafeSMikulas Patocka { 131548debafeSMikulas Patocka struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio); 131648debafeSMikulas Patocka struct dm_writecache *wc = wb->wc; 131748debafeSMikulas Patocka unsigned long flags; 131848debafeSMikulas Patocka 131948debafeSMikulas Patocka raw_spin_lock_irqsave(&wc->endio_list_lock, flags); 132048debafeSMikulas Patocka if (unlikely(list_empty(&wc->endio_list))) 132148debafeSMikulas Patocka wake_up_process(wc->endio_thread); 132248debafeSMikulas Patocka list_add_tail(&wb->endio_entry, &wc->endio_list); 132348debafeSMikulas Patocka raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags); 132448debafeSMikulas Patocka } 132548debafeSMikulas Patocka 132648debafeSMikulas Patocka static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr) 132748debafeSMikulas Patocka { 132848debafeSMikulas Patocka struct copy_struct *c = ptr; 132948debafeSMikulas Patocka struct dm_writecache *wc = c->wc; 133048debafeSMikulas Patocka 133148debafeSMikulas Patocka c->error = likely(!(read_err | write_err)) ? 0 : -EIO; 133248debafeSMikulas Patocka 133348debafeSMikulas Patocka raw_spin_lock_irq(&wc->endio_list_lock); 133448debafeSMikulas Patocka if (unlikely(list_empty(&wc->endio_list))) 133548debafeSMikulas Patocka wake_up_process(wc->endio_thread); 133648debafeSMikulas Patocka list_add_tail(&c->endio_entry, &wc->endio_list); 133748debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 133848debafeSMikulas Patocka } 133948debafeSMikulas Patocka 134048debafeSMikulas Patocka static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list) 134148debafeSMikulas Patocka { 134248debafeSMikulas Patocka unsigned i; 134348debafeSMikulas Patocka struct writeback_struct *wb; 134448debafeSMikulas Patocka struct wc_entry *e; 134548debafeSMikulas Patocka unsigned long n_walked = 0; 134648debafeSMikulas Patocka 134748debafeSMikulas Patocka do { 134848debafeSMikulas Patocka wb = list_entry(list->next, struct writeback_struct, endio_entry); 134948debafeSMikulas Patocka list_del(&wb->endio_entry); 135048debafeSMikulas Patocka 135148debafeSMikulas Patocka if (unlikely(wb->bio.bi_status != BLK_STS_OK)) 135248debafeSMikulas Patocka writecache_error(wc, blk_status_to_errno(wb->bio.bi_status), 135348debafeSMikulas Patocka "write error %d", wb->bio.bi_status); 135448debafeSMikulas Patocka i = 0; 135548debafeSMikulas Patocka do { 135648debafeSMikulas Patocka e = wb->wc_list[i]; 135748debafeSMikulas Patocka BUG_ON(!e->write_in_progress); 135848debafeSMikulas Patocka e->write_in_progress = false; 135948debafeSMikulas Patocka INIT_LIST_HEAD(&e->lru); 136048debafeSMikulas Patocka if (!writecache_has_error(wc)) 136148debafeSMikulas Patocka writecache_free_entry(wc, e); 136248debafeSMikulas Patocka BUG_ON(!wc->writeback_size); 136348debafeSMikulas Patocka wc->writeback_size--; 136448debafeSMikulas Patocka n_walked++; 136548debafeSMikulas Patocka if (unlikely(n_walked >= ENDIO_LATENCY)) { 1366aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 136748debafeSMikulas Patocka wc_unlock(wc); 136848debafeSMikulas Patocka wc_lock(wc); 136948debafeSMikulas Patocka n_walked = 0; 137048debafeSMikulas Patocka } 137148debafeSMikulas Patocka } while (++i < wb->wc_list_n); 137248debafeSMikulas Patocka 137348debafeSMikulas Patocka if (wb->wc_list != wb->wc_list_inline) 137448debafeSMikulas Patocka kfree(wb->wc_list); 137548debafeSMikulas Patocka bio_put(&wb->bio); 137648debafeSMikulas Patocka } while (!list_empty(list)); 137748debafeSMikulas Patocka } 137848debafeSMikulas Patocka 137948debafeSMikulas Patocka static void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list) 138048debafeSMikulas Patocka { 138148debafeSMikulas Patocka struct copy_struct *c; 138248debafeSMikulas Patocka struct wc_entry *e; 138348debafeSMikulas Patocka 138448debafeSMikulas Patocka do { 138548debafeSMikulas Patocka c = list_entry(list->next, struct copy_struct, endio_entry); 138648debafeSMikulas Patocka list_del(&c->endio_entry); 138748debafeSMikulas Patocka 138848debafeSMikulas Patocka if (unlikely(c->error)) 138948debafeSMikulas Patocka writecache_error(wc, c->error, "copy error"); 139048debafeSMikulas Patocka 139148debafeSMikulas Patocka e = c->e; 139248debafeSMikulas Patocka do { 139348debafeSMikulas Patocka BUG_ON(!e->write_in_progress); 139448debafeSMikulas Patocka e->write_in_progress = false; 139548debafeSMikulas Patocka INIT_LIST_HEAD(&e->lru); 139648debafeSMikulas Patocka if (!writecache_has_error(wc)) 139748debafeSMikulas Patocka writecache_free_entry(wc, e); 139848debafeSMikulas Patocka 139948debafeSMikulas Patocka BUG_ON(!wc->writeback_size); 140048debafeSMikulas Patocka wc->writeback_size--; 140148debafeSMikulas Patocka e++; 140248debafeSMikulas Patocka } while (--c->n_entries); 140348debafeSMikulas Patocka mempool_free(c, &wc->copy_pool); 140448debafeSMikulas Patocka } while (!list_empty(list)); 140548debafeSMikulas Patocka } 140648debafeSMikulas Patocka 140748debafeSMikulas Patocka static int writecache_endio_thread(void *data) 140848debafeSMikulas Patocka { 140948debafeSMikulas Patocka struct dm_writecache *wc = data; 141048debafeSMikulas Patocka 141148debafeSMikulas Patocka while (1) { 141248debafeSMikulas Patocka struct list_head list; 141348debafeSMikulas Patocka 141448debafeSMikulas Patocka raw_spin_lock_irq(&wc->endio_list_lock); 141548debafeSMikulas Patocka if (!list_empty(&wc->endio_list)) 141648debafeSMikulas Patocka goto pop_from_list; 141748debafeSMikulas Patocka set_current_state(TASK_INTERRUPTIBLE); 141848debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 141948debafeSMikulas Patocka 142048debafeSMikulas Patocka if (unlikely(kthread_should_stop())) { 142148debafeSMikulas Patocka set_current_state(TASK_RUNNING); 142248debafeSMikulas Patocka break; 142348debafeSMikulas Patocka } 142448debafeSMikulas Patocka 142548debafeSMikulas Patocka schedule(); 142648debafeSMikulas Patocka 142748debafeSMikulas Patocka continue; 142848debafeSMikulas Patocka 142948debafeSMikulas Patocka pop_from_list: 143048debafeSMikulas Patocka list = wc->endio_list; 143148debafeSMikulas Patocka list.next->prev = list.prev->next = &list; 143248debafeSMikulas Patocka INIT_LIST_HEAD(&wc->endio_list); 143348debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 143448debafeSMikulas Patocka 143548debafeSMikulas Patocka if (!WC_MODE_FUA(wc)) 143648debafeSMikulas Patocka writecache_disk_flush(wc, wc->dev); 143748debafeSMikulas Patocka 143848debafeSMikulas Patocka wc_lock(wc); 143948debafeSMikulas Patocka 144048debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 144148debafeSMikulas Patocka __writecache_endio_pmem(wc, &list); 144248debafeSMikulas Patocka } else { 144348debafeSMikulas Patocka __writecache_endio_ssd(wc, &list); 144448debafeSMikulas Patocka writecache_wait_for_ios(wc, READ); 144548debafeSMikulas Patocka } 144648debafeSMikulas Patocka 1447aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 144848debafeSMikulas Patocka 144948debafeSMikulas Patocka wc_unlock(wc); 145048debafeSMikulas Patocka } 145148debafeSMikulas Patocka 145248debafeSMikulas Patocka return 0; 145348debafeSMikulas Patocka } 145448debafeSMikulas Patocka 145548debafeSMikulas Patocka static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e, gfp_t gfp) 145648debafeSMikulas Patocka { 145748debafeSMikulas Patocka struct dm_writecache *wc = wb->wc; 145848debafeSMikulas Patocka unsigned block_size = wc->block_size; 145948debafeSMikulas Patocka void *address = memory_data(wc, e); 146048debafeSMikulas Patocka 146148debafeSMikulas Patocka persistent_memory_flush_cache(address, block_size); 146248debafeSMikulas Patocka return bio_add_page(&wb->bio, persistent_memory_page(address), 146348debafeSMikulas Patocka block_size, persistent_memory_page_offset(address)) != 0; 146448debafeSMikulas Patocka } 146548debafeSMikulas Patocka 146648debafeSMikulas Patocka struct writeback_list { 146748debafeSMikulas Patocka struct list_head list; 146848debafeSMikulas Patocka size_t size; 146948debafeSMikulas Patocka }; 147048debafeSMikulas Patocka 147148debafeSMikulas Patocka static void __writeback_throttle(struct dm_writecache *wc, struct writeback_list *wbl) 147248debafeSMikulas Patocka { 147348debafeSMikulas Patocka if (unlikely(wc->max_writeback_jobs)) { 147448debafeSMikulas Patocka if (READ_ONCE(wc->writeback_size) - wbl->size >= wc->max_writeback_jobs) { 147548debafeSMikulas Patocka wc_lock(wc); 147648debafeSMikulas Patocka while (wc->writeback_size - wbl->size >= wc->max_writeback_jobs) 147748debafeSMikulas Patocka writecache_wait_on_freelist(wc); 147848debafeSMikulas Patocka wc_unlock(wc); 147948debafeSMikulas Patocka } 148048debafeSMikulas Patocka } 148148debafeSMikulas Patocka cond_resched(); 148248debafeSMikulas Patocka } 148348debafeSMikulas Patocka 148448debafeSMikulas Patocka static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl) 148548debafeSMikulas Patocka { 148648debafeSMikulas Patocka struct wc_entry *e, *f; 148748debafeSMikulas Patocka struct bio *bio; 148848debafeSMikulas Patocka struct writeback_struct *wb; 148948debafeSMikulas Patocka unsigned max_pages; 149048debafeSMikulas Patocka 149148debafeSMikulas Patocka while (wbl->size) { 149248debafeSMikulas Patocka wbl->size--; 149348debafeSMikulas Patocka e = container_of(wbl->list.prev, struct wc_entry, lru); 149448debafeSMikulas Patocka list_del(&e->lru); 149548debafeSMikulas Patocka 149648debafeSMikulas Patocka max_pages = e->wc_list_contiguous; 149748debafeSMikulas Patocka 149848debafeSMikulas Patocka bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set); 149948debafeSMikulas Patocka wb = container_of(bio, struct writeback_struct, bio); 150048debafeSMikulas Patocka wb->wc = wc; 150109f2d656SHuaisheng Ye bio->bi_end_io = writecache_writeback_endio; 150209f2d656SHuaisheng Ye bio_set_dev(bio, wc->dev->bdev); 150309f2d656SHuaisheng Ye bio->bi_iter.bi_sector = read_original_sector(wc, e); 150448debafeSMikulas Patocka if (max_pages <= WB_LIST_INLINE || 150550a7d3baSKees Cook unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), 150648debafeSMikulas Patocka GFP_NOIO | __GFP_NORETRY | 150748debafeSMikulas Patocka __GFP_NOMEMALLOC | __GFP_NOWARN)))) { 150848debafeSMikulas Patocka wb->wc_list = wb->wc_list_inline; 150948debafeSMikulas Patocka max_pages = WB_LIST_INLINE; 151048debafeSMikulas Patocka } 151148debafeSMikulas Patocka 151248debafeSMikulas Patocka BUG_ON(!wc_add_block(wb, e, GFP_NOIO)); 151348debafeSMikulas Patocka 151448debafeSMikulas Patocka wb->wc_list[0] = e; 151548debafeSMikulas Patocka wb->wc_list_n = 1; 151648debafeSMikulas Patocka 151748debafeSMikulas Patocka while (wbl->size && wb->wc_list_n < max_pages) { 151848debafeSMikulas Patocka f = container_of(wbl->list.prev, struct wc_entry, lru); 151948debafeSMikulas Patocka if (read_original_sector(wc, f) != 152048debafeSMikulas Patocka read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) 152148debafeSMikulas Patocka break; 152248debafeSMikulas Patocka if (!wc_add_block(wb, f, GFP_NOWAIT | __GFP_NOWARN)) 152348debafeSMikulas Patocka break; 152448debafeSMikulas Patocka wbl->size--; 152548debafeSMikulas Patocka list_del(&f->lru); 152648debafeSMikulas Patocka wb->wc_list[wb->wc_list_n++] = f; 152748debafeSMikulas Patocka e = f; 152848debafeSMikulas Patocka } 152909f2d656SHuaisheng Ye bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA); 153048debafeSMikulas Patocka if (writecache_has_error(wc)) { 153148debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 153209f2d656SHuaisheng Ye bio_endio(bio); 153348debafeSMikulas Patocka } else { 153409f2d656SHuaisheng Ye submit_bio(bio); 153548debafeSMikulas Patocka } 153648debafeSMikulas Patocka 153748debafeSMikulas Patocka __writeback_throttle(wc, wbl); 153848debafeSMikulas Patocka } 153948debafeSMikulas Patocka } 154048debafeSMikulas Patocka 154148debafeSMikulas Patocka static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl) 154248debafeSMikulas Patocka { 154348debafeSMikulas Patocka struct wc_entry *e, *f; 154448debafeSMikulas Patocka struct dm_io_region from, to; 154548debafeSMikulas Patocka struct copy_struct *c; 154648debafeSMikulas Patocka 154748debafeSMikulas Patocka while (wbl->size) { 154848debafeSMikulas Patocka unsigned n_sectors; 154948debafeSMikulas Patocka 155048debafeSMikulas Patocka wbl->size--; 155148debafeSMikulas Patocka e = container_of(wbl->list.prev, struct wc_entry, lru); 155248debafeSMikulas Patocka list_del(&e->lru); 155348debafeSMikulas Patocka 155448debafeSMikulas Patocka n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT); 155548debafeSMikulas Patocka 155648debafeSMikulas Patocka from.bdev = wc->ssd_dev->bdev; 155748debafeSMikulas Patocka from.sector = cache_sector(wc, e); 155848debafeSMikulas Patocka from.count = n_sectors; 155948debafeSMikulas Patocka to.bdev = wc->dev->bdev; 156048debafeSMikulas Patocka to.sector = read_original_sector(wc, e); 156148debafeSMikulas Patocka to.count = n_sectors; 156248debafeSMikulas Patocka 156348debafeSMikulas Patocka c = mempool_alloc(&wc->copy_pool, GFP_NOIO); 156448debafeSMikulas Patocka c->wc = wc; 156548debafeSMikulas Patocka c->e = e; 156648debafeSMikulas Patocka c->n_entries = e->wc_list_contiguous; 156748debafeSMikulas Patocka 156848debafeSMikulas Patocka while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) { 156948debafeSMikulas Patocka wbl->size--; 157048debafeSMikulas Patocka f = container_of(wbl->list.prev, struct wc_entry, lru); 157148debafeSMikulas Patocka BUG_ON(f != e + 1); 157248debafeSMikulas Patocka list_del(&f->lru); 157348debafeSMikulas Patocka e = f; 157448debafeSMikulas Patocka } 157548debafeSMikulas Patocka 157648debafeSMikulas Patocka dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c); 157748debafeSMikulas Patocka 157848debafeSMikulas Patocka __writeback_throttle(wc, wbl); 157948debafeSMikulas Patocka } 158048debafeSMikulas Patocka } 158148debafeSMikulas Patocka 158248debafeSMikulas Patocka static void writecache_writeback(struct work_struct *work) 158348debafeSMikulas Patocka { 158448debafeSMikulas Patocka struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work); 158548debafeSMikulas Patocka struct blk_plug plug; 15868dd85873SMikulas Patocka struct wc_entry *f, *uninitialized_var(g), *e = NULL; 158748debafeSMikulas Patocka struct rb_node *node, *next_node; 158848debafeSMikulas Patocka struct list_head skipped; 158948debafeSMikulas Patocka struct writeback_list wbl; 159048debafeSMikulas Patocka unsigned long n_walked; 159148debafeSMikulas Patocka 159248debafeSMikulas Patocka wc_lock(wc); 159348debafeSMikulas Patocka restart: 159448debafeSMikulas Patocka if (writecache_has_error(wc)) { 159548debafeSMikulas Patocka wc_unlock(wc); 159648debafeSMikulas Patocka return; 159748debafeSMikulas Patocka } 159848debafeSMikulas Patocka 159948debafeSMikulas Patocka if (unlikely(wc->writeback_all)) { 160048debafeSMikulas Patocka if (writecache_wait_for_writeback(wc)) 160148debafeSMikulas Patocka goto restart; 160248debafeSMikulas Patocka } 160348debafeSMikulas Patocka 160448debafeSMikulas Patocka if (wc->overwrote_committed) { 160548debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 160648debafeSMikulas Patocka } 160748debafeSMikulas Patocka 160848debafeSMikulas Patocka n_walked = 0; 160948debafeSMikulas Patocka INIT_LIST_HEAD(&skipped); 161048debafeSMikulas Patocka INIT_LIST_HEAD(&wbl.list); 161148debafeSMikulas Patocka wbl.size = 0; 161248debafeSMikulas Patocka while (!list_empty(&wc->lru) && 161348debafeSMikulas Patocka (wc->writeback_all || 161448debafeSMikulas Patocka wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark)) { 161548debafeSMikulas Patocka 161648debafeSMikulas Patocka n_walked++; 161748debafeSMikulas Patocka if (unlikely(n_walked > WRITEBACK_LATENCY) && 161848debafeSMikulas Patocka likely(!wc->writeback_all) && likely(!dm_suspended(wc->ti))) { 161948debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 162048debafeSMikulas Patocka break; 162148debafeSMikulas Patocka } 162248debafeSMikulas Patocka 16235229b489SHuaisheng Ye if (unlikely(wc->writeback_all)) { 16245229b489SHuaisheng Ye if (unlikely(!e)) { 16255229b489SHuaisheng Ye writecache_flush(wc); 16265229b489SHuaisheng Ye e = container_of(rb_first(&wc->tree), struct wc_entry, rb_node); 16275229b489SHuaisheng Ye } else 16285229b489SHuaisheng Ye e = g; 16295229b489SHuaisheng Ye } else 163048debafeSMikulas Patocka e = container_of(wc->lru.prev, struct wc_entry, lru); 163148debafeSMikulas Patocka BUG_ON(e->write_in_progress); 163248debafeSMikulas Patocka if (unlikely(!writecache_entry_is_committed(wc, e))) { 163348debafeSMikulas Patocka writecache_flush(wc); 163448debafeSMikulas Patocka } 163548debafeSMikulas Patocka node = rb_prev(&e->rb_node); 163648debafeSMikulas Patocka if (node) { 163748debafeSMikulas Patocka f = container_of(node, struct wc_entry, rb_node); 163848debafeSMikulas Patocka if (unlikely(read_original_sector(wc, f) == 163948debafeSMikulas Patocka read_original_sector(wc, e))) { 164048debafeSMikulas Patocka BUG_ON(!f->write_in_progress); 164148debafeSMikulas Patocka list_del(&e->lru); 164248debafeSMikulas Patocka list_add(&e->lru, &skipped); 164348debafeSMikulas Patocka cond_resched(); 164448debafeSMikulas Patocka continue; 164548debafeSMikulas Patocka } 164648debafeSMikulas Patocka } 164748debafeSMikulas Patocka wc->writeback_size++; 164848debafeSMikulas Patocka list_del(&e->lru); 164948debafeSMikulas Patocka list_add(&e->lru, &wbl.list); 165048debafeSMikulas Patocka wbl.size++; 165148debafeSMikulas Patocka e->write_in_progress = true; 165248debafeSMikulas Patocka e->wc_list_contiguous = 1; 165348debafeSMikulas Patocka 165448debafeSMikulas Patocka f = e; 165548debafeSMikulas Patocka 165648debafeSMikulas Patocka while (1) { 165748debafeSMikulas Patocka next_node = rb_next(&f->rb_node); 165848debafeSMikulas Patocka if (unlikely(!next_node)) 165948debafeSMikulas Patocka break; 166048debafeSMikulas Patocka g = container_of(next_node, struct wc_entry, rb_node); 166162421b38SHuaisheng Ye if (unlikely(read_original_sector(wc, g) == 166262421b38SHuaisheng Ye read_original_sector(wc, f))) { 166348debafeSMikulas Patocka f = g; 166448debafeSMikulas Patocka continue; 166548debafeSMikulas Patocka } 166648debafeSMikulas Patocka if (read_original_sector(wc, g) != 166748debafeSMikulas Patocka read_original_sector(wc, f) + (wc->block_size >> SECTOR_SHIFT)) 166848debafeSMikulas Patocka break; 166948debafeSMikulas Patocka if (unlikely(g->write_in_progress)) 167048debafeSMikulas Patocka break; 167148debafeSMikulas Patocka if (unlikely(!writecache_entry_is_committed(wc, g))) 167248debafeSMikulas Patocka break; 167348debafeSMikulas Patocka 167448debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) { 167548debafeSMikulas Patocka if (g != f + 1) 167648debafeSMikulas Patocka break; 167748debafeSMikulas Patocka } 167848debafeSMikulas Patocka 167948debafeSMikulas Patocka n_walked++; 168048debafeSMikulas Patocka //if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all)) 168148debafeSMikulas Patocka // break; 168248debafeSMikulas Patocka 168348debafeSMikulas Patocka wc->writeback_size++; 168448debafeSMikulas Patocka list_del(&g->lru); 168548debafeSMikulas Patocka list_add(&g->lru, &wbl.list); 168648debafeSMikulas Patocka wbl.size++; 168748debafeSMikulas Patocka g->write_in_progress = true; 168848debafeSMikulas Patocka g->wc_list_contiguous = BIO_MAX_PAGES; 168948debafeSMikulas Patocka f = g; 169048debafeSMikulas Patocka e->wc_list_contiguous++; 16915229b489SHuaisheng Ye if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) { 16925229b489SHuaisheng Ye if (unlikely(wc->writeback_all)) { 16935229b489SHuaisheng Ye next_node = rb_next(&f->rb_node); 16945229b489SHuaisheng Ye if (likely(next_node)) 16955229b489SHuaisheng Ye g = container_of(next_node, struct wc_entry, rb_node); 16965229b489SHuaisheng Ye } 169748debafeSMikulas Patocka break; 169848debafeSMikulas Patocka } 16995229b489SHuaisheng Ye } 170048debafeSMikulas Patocka cond_resched(); 170148debafeSMikulas Patocka } 170248debafeSMikulas Patocka 170348debafeSMikulas Patocka if (!list_empty(&skipped)) { 170448debafeSMikulas Patocka list_splice_tail(&skipped, &wc->lru); 170548debafeSMikulas Patocka /* 170648debafeSMikulas Patocka * If we didn't do any progress, we must wait until some 170748debafeSMikulas Patocka * writeback finishes to avoid burning CPU in a loop 170848debafeSMikulas Patocka */ 170948debafeSMikulas Patocka if (unlikely(!wbl.size)) 171048debafeSMikulas Patocka writecache_wait_for_writeback(wc); 171148debafeSMikulas Patocka } 171248debafeSMikulas Patocka 171348debafeSMikulas Patocka wc_unlock(wc); 171448debafeSMikulas Patocka 171548debafeSMikulas Patocka blk_start_plug(&plug); 171648debafeSMikulas Patocka 171748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 171848debafeSMikulas Patocka __writecache_writeback_pmem(wc, &wbl); 171948debafeSMikulas Patocka else 172048debafeSMikulas Patocka __writecache_writeback_ssd(wc, &wbl); 172148debafeSMikulas Patocka 172248debafeSMikulas Patocka blk_finish_plug(&plug); 172348debafeSMikulas Patocka 172448debafeSMikulas Patocka if (unlikely(wc->writeback_all)) { 172548debafeSMikulas Patocka wc_lock(wc); 172648debafeSMikulas Patocka while (writecache_wait_for_writeback(wc)); 172748debafeSMikulas Patocka wc_unlock(wc); 172848debafeSMikulas Patocka } 172948debafeSMikulas Patocka } 173048debafeSMikulas Patocka 173148debafeSMikulas Patocka static int calculate_memory_size(uint64_t device_size, unsigned block_size, 173248debafeSMikulas Patocka size_t *n_blocks_p, size_t *n_metadata_blocks_p) 173348debafeSMikulas Patocka { 173448debafeSMikulas Patocka uint64_t n_blocks, offset; 173548debafeSMikulas Patocka struct wc_entry e; 173648debafeSMikulas Patocka 173748debafeSMikulas Patocka n_blocks = device_size; 173848debafeSMikulas Patocka do_div(n_blocks, block_size + sizeof(struct wc_memory_entry)); 173948debafeSMikulas Patocka 174048debafeSMikulas Patocka while (1) { 174148debafeSMikulas Patocka if (!n_blocks) 174248debafeSMikulas Patocka return -ENOSPC; 174348debafeSMikulas Patocka /* Verify the following entries[n_blocks] won't overflow */ 174448debafeSMikulas Patocka if (n_blocks >= ((size_t)-sizeof(struct wc_memory_superblock) / 174548debafeSMikulas Patocka sizeof(struct wc_memory_entry))) 174648debafeSMikulas Patocka return -EFBIG; 174748debafeSMikulas Patocka offset = offsetof(struct wc_memory_superblock, entries[n_blocks]); 174848debafeSMikulas Patocka offset = (offset + block_size - 1) & ~(uint64_t)(block_size - 1); 174948debafeSMikulas Patocka if (offset + n_blocks * block_size <= device_size) 175048debafeSMikulas Patocka break; 175148debafeSMikulas Patocka n_blocks--; 175248debafeSMikulas Patocka } 175348debafeSMikulas Patocka 175448debafeSMikulas Patocka /* check if the bit field overflows */ 175548debafeSMikulas Patocka e.index = n_blocks; 175648debafeSMikulas Patocka if (e.index != n_blocks) 175748debafeSMikulas Patocka return -EFBIG; 175848debafeSMikulas Patocka 175948debafeSMikulas Patocka if (n_blocks_p) 176048debafeSMikulas Patocka *n_blocks_p = n_blocks; 176148debafeSMikulas Patocka if (n_metadata_blocks_p) 176248debafeSMikulas Patocka *n_metadata_blocks_p = offset >> __ffs(block_size); 176348debafeSMikulas Patocka return 0; 176448debafeSMikulas Patocka } 176548debafeSMikulas Patocka 176648debafeSMikulas Patocka static int init_memory(struct dm_writecache *wc) 176748debafeSMikulas Patocka { 176848debafeSMikulas Patocka size_t b; 176948debafeSMikulas Patocka int r; 177048debafeSMikulas Patocka 177148debafeSMikulas Patocka r = calculate_memory_size(wc->memory_map_size, wc->block_size, &wc->n_blocks, NULL); 177248debafeSMikulas Patocka if (r) 177348debafeSMikulas Patocka return r; 177448debafeSMikulas Patocka 177548debafeSMikulas Patocka r = writecache_alloc_entries(wc); 177648debafeSMikulas Patocka if (r) 177748debafeSMikulas Patocka return r; 177848debafeSMikulas Patocka 177948debafeSMikulas Patocka for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++) 178048debafeSMikulas Patocka pmem_assign(sb(wc)->padding[b], cpu_to_le64(0)); 178148debafeSMikulas Patocka pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION)); 178248debafeSMikulas Patocka pmem_assign(sb(wc)->block_size, cpu_to_le32(wc->block_size)); 178348debafeSMikulas Patocka pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks)); 178448debafeSMikulas Patocka pmem_assign(sb(wc)->seq_count, cpu_to_le64(0)); 178548debafeSMikulas Patocka 178648debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) 178748debafeSMikulas Patocka write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); 178848debafeSMikulas Patocka 178948debafeSMikulas Patocka writecache_flush_all_metadata(wc); 1790aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 179148debafeSMikulas Patocka pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); 179248debafeSMikulas Patocka writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); 1793aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 179448debafeSMikulas Patocka 179548debafeSMikulas Patocka return 0; 179648debafeSMikulas Patocka } 179748debafeSMikulas Patocka 179848debafeSMikulas Patocka static void writecache_dtr(struct dm_target *ti) 179948debafeSMikulas Patocka { 180048debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 180148debafeSMikulas Patocka 180248debafeSMikulas Patocka if (!wc) 180348debafeSMikulas Patocka return; 180448debafeSMikulas Patocka 180548debafeSMikulas Patocka if (wc->endio_thread) 180648debafeSMikulas Patocka kthread_stop(wc->endio_thread); 180748debafeSMikulas Patocka 180848debafeSMikulas Patocka if (wc->flush_thread) 180948debafeSMikulas Patocka kthread_stop(wc->flush_thread); 181048debafeSMikulas Patocka 181148debafeSMikulas Patocka bioset_exit(&wc->bio_set); 181248debafeSMikulas Patocka 181348debafeSMikulas Patocka mempool_exit(&wc->copy_pool); 181448debafeSMikulas Patocka 181548debafeSMikulas Patocka if (wc->writeback_wq) 181648debafeSMikulas Patocka destroy_workqueue(wc->writeback_wq); 181748debafeSMikulas Patocka 181848debafeSMikulas Patocka if (wc->dev) 181948debafeSMikulas Patocka dm_put_device(ti, wc->dev); 182048debafeSMikulas Patocka 182148debafeSMikulas Patocka if (wc->ssd_dev) 182248debafeSMikulas Patocka dm_put_device(ti, wc->ssd_dev); 182348debafeSMikulas Patocka 182448debafeSMikulas Patocka if (wc->entries) 182548debafeSMikulas Patocka vfree(wc->entries); 182648debafeSMikulas Patocka 182748debafeSMikulas Patocka if (wc->memory_map) { 182848debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 182948debafeSMikulas Patocka persistent_memory_release(wc); 183048debafeSMikulas Patocka else 183148debafeSMikulas Patocka vfree(wc->memory_map); 183248debafeSMikulas Patocka } 183348debafeSMikulas Patocka 183448debafeSMikulas Patocka if (wc->dm_kcopyd) 183548debafeSMikulas Patocka dm_kcopyd_client_destroy(wc->dm_kcopyd); 183648debafeSMikulas Patocka 183748debafeSMikulas Patocka if (wc->dm_io) 183848debafeSMikulas Patocka dm_io_client_destroy(wc->dm_io); 183948debafeSMikulas Patocka 184048debafeSMikulas Patocka if (wc->dirty_bitmap) 184148debafeSMikulas Patocka vfree(wc->dirty_bitmap); 184248debafeSMikulas Patocka 184348debafeSMikulas Patocka kfree(wc); 184448debafeSMikulas Patocka } 184548debafeSMikulas Patocka 184648debafeSMikulas Patocka static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) 184748debafeSMikulas Patocka { 184848debafeSMikulas Patocka struct dm_writecache *wc; 184948debafeSMikulas Patocka struct dm_arg_set as; 185048debafeSMikulas Patocka const char *string; 185148debafeSMikulas Patocka unsigned opt_params; 185248debafeSMikulas Patocka size_t offset, data_size; 185348debafeSMikulas Patocka int i, r; 185448debafeSMikulas Patocka char dummy; 185548debafeSMikulas Patocka int high_wm_percent = HIGH_WATERMARK; 185648debafeSMikulas Patocka int low_wm_percent = LOW_WATERMARK; 185748debafeSMikulas Patocka uint64_t x; 185848debafeSMikulas Patocka struct wc_memory_superblock s; 185948debafeSMikulas Patocka 186048debafeSMikulas Patocka static struct dm_arg _args[] = { 186148debafeSMikulas Patocka {0, 10, "Invalid number of feature args"}, 186248debafeSMikulas Patocka }; 186348debafeSMikulas Patocka 186448debafeSMikulas Patocka as.argc = argc; 186548debafeSMikulas Patocka as.argv = argv; 186648debafeSMikulas Patocka 186748debafeSMikulas Patocka wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL); 186848debafeSMikulas Patocka if (!wc) { 186948debafeSMikulas Patocka ti->error = "Cannot allocate writecache structure"; 187048debafeSMikulas Patocka r = -ENOMEM; 187148debafeSMikulas Patocka goto bad; 187248debafeSMikulas Patocka } 187348debafeSMikulas Patocka ti->private = wc; 187448debafeSMikulas Patocka wc->ti = ti; 187548debafeSMikulas Patocka 187648debafeSMikulas Patocka mutex_init(&wc->lock); 187748debafeSMikulas Patocka writecache_poison_lists(wc); 187848debafeSMikulas Patocka init_waitqueue_head(&wc->freelist_wait); 187948debafeSMikulas Patocka timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0); 188048debafeSMikulas Patocka 188148debafeSMikulas Patocka for (i = 0; i < 2; i++) { 188248debafeSMikulas Patocka atomic_set(&wc->bio_in_progress[i], 0); 188348debafeSMikulas Patocka init_waitqueue_head(&wc->bio_in_progress_wait[i]); 188448debafeSMikulas Patocka } 188548debafeSMikulas Patocka 188648debafeSMikulas Patocka wc->dm_io = dm_io_client_create(); 188748debafeSMikulas Patocka if (IS_ERR(wc->dm_io)) { 188848debafeSMikulas Patocka r = PTR_ERR(wc->dm_io); 188948debafeSMikulas Patocka ti->error = "Unable to allocate dm-io client"; 189048debafeSMikulas Patocka wc->dm_io = NULL; 189148debafeSMikulas Patocka goto bad; 189248debafeSMikulas Patocka } 189348debafeSMikulas Patocka 1894f87e033bSHuaisheng Ye wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1); 189548debafeSMikulas Patocka if (!wc->writeback_wq) { 189648debafeSMikulas Patocka r = -ENOMEM; 189748debafeSMikulas Patocka ti->error = "Could not allocate writeback workqueue"; 189848debafeSMikulas Patocka goto bad; 189948debafeSMikulas Patocka } 190048debafeSMikulas Patocka INIT_WORK(&wc->writeback_work, writecache_writeback); 190148debafeSMikulas Patocka INIT_WORK(&wc->flush_work, writecache_flush_work); 190248debafeSMikulas Patocka 190348debafeSMikulas Patocka raw_spin_lock_init(&wc->endio_list_lock); 190448debafeSMikulas Patocka INIT_LIST_HEAD(&wc->endio_list); 190548debafeSMikulas Patocka wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio"); 190648debafeSMikulas Patocka if (IS_ERR(wc->endio_thread)) { 190748debafeSMikulas Patocka r = PTR_ERR(wc->endio_thread); 190848debafeSMikulas Patocka wc->endio_thread = NULL; 190948debafeSMikulas Patocka ti->error = "Couldn't spawn endio thread"; 191048debafeSMikulas Patocka goto bad; 191148debafeSMikulas Patocka } 191248debafeSMikulas Patocka wake_up_process(wc->endio_thread); 191348debafeSMikulas Patocka 191448debafeSMikulas Patocka /* 191548debafeSMikulas Patocka * Parse the mode (pmem or ssd) 191648debafeSMikulas Patocka */ 191748debafeSMikulas Patocka string = dm_shift_arg(&as); 191848debafeSMikulas Patocka if (!string) 191948debafeSMikulas Patocka goto bad_arguments; 192048debafeSMikulas Patocka 192148debafeSMikulas Patocka if (!strcasecmp(string, "s")) { 192248debafeSMikulas Patocka wc->pmem_mode = false; 192348debafeSMikulas Patocka } else if (!strcasecmp(string, "p")) { 192448debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 192548debafeSMikulas Patocka wc->pmem_mode = true; 192648debafeSMikulas Patocka wc->writeback_fua = true; 192748debafeSMikulas Patocka #else 192848debafeSMikulas Patocka /* 192948debafeSMikulas Patocka * If the architecture doesn't support persistent memory or 193048debafeSMikulas Patocka * the kernel doesn't support any DAX drivers, this driver can 193148debafeSMikulas Patocka * only be used in SSD-only mode. 193248debafeSMikulas Patocka */ 193348debafeSMikulas Patocka r = -EOPNOTSUPP; 193448debafeSMikulas Patocka ti->error = "Persistent memory or DAX not supported on this system"; 193548debafeSMikulas Patocka goto bad; 193648debafeSMikulas Patocka #endif 193748debafeSMikulas Patocka } else { 193848debafeSMikulas Patocka goto bad_arguments; 193948debafeSMikulas Patocka } 194048debafeSMikulas Patocka 194148debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 194248debafeSMikulas Patocka r = bioset_init(&wc->bio_set, BIO_POOL_SIZE, 194348debafeSMikulas Patocka offsetof(struct writeback_struct, bio), 194448debafeSMikulas Patocka BIOSET_NEED_BVECS); 194548debafeSMikulas Patocka if (r) { 194648debafeSMikulas Patocka ti->error = "Could not allocate bio set"; 194748debafeSMikulas Patocka goto bad; 194848debafeSMikulas Patocka } 194948debafeSMikulas Patocka } else { 195048debafeSMikulas Patocka r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct)); 195148debafeSMikulas Patocka if (r) { 195248debafeSMikulas Patocka ti->error = "Could not allocate mempool"; 195348debafeSMikulas Patocka goto bad; 195448debafeSMikulas Patocka } 195548debafeSMikulas Patocka } 195648debafeSMikulas Patocka 195748debafeSMikulas Patocka /* 195848debafeSMikulas Patocka * Parse the origin data device 195948debafeSMikulas Patocka */ 196048debafeSMikulas Patocka string = dm_shift_arg(&as); 196148debafeSMikulas Patocka if (!string) 196248debafeSMikulas Patocka goto bad_arguments; 196348debafeSMikulas Patocka r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev); 196448debafeSMikulas Patocka if (r) { 196548debafeSMikulas Patocka ti->error = "Origin data device lookup failed"; 196648debafeSMikulas Patocka goto bad; 196748debafeSMikulas Patocka } 196848debafeSMikulas Patocka 196948debafeSMikulas Patocka /* 197048debafeSMikulas Patocka * Parse cache data device (be it pmem or ssd) 197148debafeSMikulas Patocka */ 197248debafeSMikulas Patocka string = dm_shift_arg(&as); 197348debafeSMikulas Patocka if (!string) 197448debafeSMikulas Patocka goto bad_arguments; 197548debafeSMikulas Patocka 197648debafeSMikulas Patocka r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev); 197748debafeSMikulas Patocka if (r) { 197848debafeSMikulas Patocka ti->error = "Cache data device lookup failed"; 197948debafeSMikulas Patocka goto bad; 198048debafeSMikulas Patocka } 198148debafeSMikulas Patocka wc->memory_map_size = i_size_read(wc->ssd_dev->bdev->bd_inode); 198248debafeSMikulas Patocka 198348debafeSMikulas Patocka /* 198448debafeSMikulas Patocka * Parse the cache block size 198548debafeSMikulas Patocka */ 198648debafeSMikulas Patocka string = dm_shift_arg(&as); 198748debafeSMikulas Patocka if (!string) 198848debafeSMikulas Patocka goto bad_arguments; 198948debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 || 199048debafeSMikulas Patocka wc->block_size < 512 || wc->block_size > PAGE_SIZE || 199148debafeSMikulas Patocka (wc->block_size & (wc->block_size - 1))) { 199248debafeSMikulas Patocka r = -EINVAL; 199348debafeSMikulas Patocka ti->error = "Invalid block size"; 199448debafeSMikulas Patocka goto bad; 199548debafeSMikulas Patocka } 199648debafeSMikulas Patocka wc->block_size_bits = __ffs(wc->block_size); 199748debafeSMikulas Patocka 199848debafeSMikulas Patocka wc->max_writeback_jobs = MAX_WRITEBACK_JOBS; 199948debafeSMikulas Patocka wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM; 200048debafeSMikulas Patocka wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC); 200148debafeSMikulas Patocka 200248debafeSMikulas Patocka /* 200348debafeSMikulas Patocka * Parse optional arguments 200448debafeSMikulas Patocka */ 200548debafeSMikulas Patocka r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); 200648debafeSMikulas Patocka if (r) 200748debafeSMikulas Patocka goto bad; 200848debafeSMikulas Patocka 200948debafeSMikulas Patocka while (opt_params) { 201048debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2011d284f824SMikulas Patocka if (!strcasecmp(string, "start_sector") && opt_params >= 1) { 2012d284f824SMikulas Patocka unsigned long long start_sector; 2013d284f824SMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2014d284f824SMikulas Patocka if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1) 2015d284f824SMikulas Patocka goto invalid_optional; 2016d284f824SMikulas Patocka wc->start_sector = start_sector; 2017d284f824SMikulas Patocka if (wc->start_sector != start_sector || 2018d284f824SMikulas Patocka wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT) 2019d284f824SMikulas Patocka goto invalid_optional; 2020d284f824SMikulas Patocka } else if (!strcasecmp(string, "high_watermark") && opt_params >= 1) { 202148debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 202248debafeSMikulas Patocka if (sscanf(string, "%d%c", &high_wm_percent, &dummy) != 1) 202348debafeSMikulas Patocka goto invalid_optional; 202448debafeSMikulas Patocka if (high_wm_percent < 0 || high_wm_percent > 100) 202548debafeSMikulas Patocka goto invalid_optional; 202648debafeSMikulas Patocka wc->high_wm_percent_set = true; 202748debafeSMikulas Patocka } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) { 202848debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 202948debafeSMikulas Patocka if (sscanf(string, "%d%c", &low_wm_percent, &dummy) != 1) 203048debafeSMikulas Patocka goto invalid_optional; 203148debafeSMikulas Patocka if (low_wm_percent < 0 || low_wm_percent > 100) 203248debafeSMikulas Patocka goto invalid_optional; 203348debafeSMikulas Patocka wc->low_wm_percent_set = true; 203448debafeSMikulas Patocka } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) { 203548debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 203648debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1) 203748debafeSMikulas Patocka goto invalid_optional; 203848debafeSMikulas Patocka wc->max_writeback_jobs_set = true; 203948debafeSMikulas Patocka } else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) { 204048debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 204148debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1) 204248debafeSMikulas Patocka goto invalid_optional; 204348debafeSMikulas Patocka wc->autocommit_blocks_set = true; 204448debafeSMikulas Patocka } else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) { 204548debafeSMikulas Patocka unsigned autocommit_msecs; 204648debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 204748debafeSMikulas Patocka if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1) 204848debafeSMikulas Patocka goto invalid_optional; 204948debafeSMikulas Patocka if (autocommit_msecs > 3600000) 205048debafeSMikulas Patocka goto invalid_optional; 205148debafeSMikulas Patocka wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs); 205248debafeSMikulas Patocka wc->autocommit_time_set = true; 205348debafeSMikulas Patocka } else if (!strcasecmp(string, "fua")) { 205448debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 205548debafeSMikulas Patocka wc->writeback_fua = true; 205648debafeSMikulas Patocka wc->writeback_fua_set = true; 205748debafeSMikulas Patocka } else goto invalid_optional; 205848debafeSMikulas Patocka } else if (!strcasecmp(string, "nofua")) { 205948debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 206048debafeSMikulas Patocka wc->writeback_fua = false; 206148debafeSMikulas Patocka wc->writeback_fua_set = true; 206248debafeSMikulas Patocka } else goto invalid_optional; 206348debafeSMikulas Patocka } else { 206448debafeSMikulas Patocka invalid_optional: 206548debafeSMikulas Patocka r = -EINVAL; 206648debafeSMikulas Patocka ti->error = "Invalid optional argument"; 206748debafeSMikulas Patocka goto bad; 206848debafeSMikulas Patocka } 206948debafeSMikulas Patocka } 207048debafeSMikulas Patocka 207148debafeSMikulas Patocka if (high_wm_percent < low_wm_percent) { 207248debafeSMikulas Patocka r = -EINVAL; 207348debafeSMikulas Patocka ti->error = "High watermark must be greater than or equal to low watermark"; 207448debafeSMikulas Patocka goto bad; 207548debafeSMikulas Patocka } 207648debafeSMikulas Patocka 2077d284f824SMikulas Patocka if (WC_MODE_PMEM(wc)) { 2078d284f824SMikulas Patocka r = persistent_memory_claim(wc); 2079d284f824SMikulas Patocka if (r) { 2080d284f824SMikulas Patocka ti->error = "Unable to map persistent memory for cache"; 2081d284f824SMikulas Patocka goto bad; 2082d284f824SMikulas Patocka } 2083d284f824SMikulas Patocka } else { 208448debafeSMikulas Patocka struct dm_io_region region; 208548debafeSMikulas Patocka struct dm_io_request req; 208648debafeSMikulas Patocka size_t n_blocks, n_metadata_blocks; 208748debafeSMikulas Patocka uint64_t n_bitmap_bits; 208848debafeSMikulas Patocka 2089d284f824SMikulas Patocka wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT; 2090d284f824SMikulas Patocka 209148debafeSMikulas Patocka bio_list_init(&wc->flush_list); 209248debafeSMikulas Patocka wc->flush_thread = kthread_create(writecache_flush_thread, wc, "dm_writecache_flush"); 209348debafeSMikulas Patocka if (IS_ERR(wc->flush_thread)) { 209448debafeSMikulas Patocka r = PTR_ERR(wc->flush_thread); 209548debafeSMikulas Patocka wc->flush_thread = NULL; 2096e8ea141aSShenghui Wang ti->error = "Couldn't spawn flush thread"; 209748debafeSMikulas Patocka goto bad; 209848debafeSMikulas Patocka } 209948debafeSMikulas Patocka wake_up_process(wc->flush_thread); 210048debafeSMikulas Patocka 210148debafeSMikulas Patocka r = calculate_memory_size(wc->memory_map_size, wc->block_size, 210248debafeSMikulas Patocka &n_blocks, &n_metadata_blocks); 210348debafeSMikulas Patocka if (r) { 210448debafeSMikulas Patocka ti->error = "Invalid device size"; 210548debafeSMikulas Patocka goto bad; 210648debafeSMikulas Patocka } 210748debafeSMikulas Patocka 210848debafeSMikulas Patocka n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) + 210948debafeSMikulas Patocka BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY; 211048debafeSMikulas Patocka /* this is limitation of test_bit functions */ 211148debafeSMikulas Patocka if (n_bitmap_bits > 1U << 31) { 211248debafeSMikulas Patocka r = -EFBIG; 211348debafeSMikulas Patocka ti->error = "Invalid device size"; 211448debafeSMikulas Patocka goto bad; 211548debafeSMikulas Patocka } 211648debafeSMikulas Patocka 211748debafeSMikulas Patocka wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits); 211848debafeSMikulas Patocka if (!wc->memory_map) { 211948debafeSMikulas Patocka r = -ENOMEM; 212048debafeSMikulas Patocka ti->error = "Unable to allocate memory for metadata"; 212148debafeSMikulas Patocka goto bad; 212248debafeSMikulas Patocka } 212348debafeSMikulas Patocka 212448debafeSMikulas Patocka wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle); 212548debafeSMikulas Patocka if (IS_ERR(wc->dm_kcopyd)) { 212648debafeSMikulas Patocka r = PTR_ERR(wc->dm_kcopyd); 212748debafeSMikulas Patocka ti->error = "Unable to allocate dm-kcopyd client"; 212848debafeSMikulas Patocka wc->dm_kcopyd = NULL; 212948debafeSMikulas Patocka goto bad; 213048debafeSMikulas Patocka } 213148debafeSMikulas Patocka 213248debafeSMikulas Patocka wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT); 213348debafeSMikulas Patocka wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) / 213448debafeSMikulas Patocka BITS_PER_LONG * sizeof(unsigned long); 213548debafeSMikulas Patocka wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size); 213648debafeSMikulas Patocka if (!wc->dirty_bitmap) { 213748debafeSMikulas Patocka r = -ENOMEM; 213848debafeSMikulas Patocka ti->error = "Unable to allocate dirty bitmap"; 213948debafeSMikulas Patocka goto bad; 214048debafeSMikulas Patocka } 214148debafeSMikulas Patocka 214248debafeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 2143d284f824SMikulas Patocka region.sector = wc->start_sector; 214448debafeSMikulas Patocka region.count = wc->metadata_sectors; 214548debafeSMikulas Patocka req.bi_op = REQ_OP_READ; 214648debafeSMikulas Patocka req.bi_op_flags = REQ_SYNC; 214748debafeSMikulas Patocka req.mem.type = DM_IO_VMA; 214848debafeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map; 214948debafeSMikulas Patocka req.client = wc->dm_io; 215048debafeSMikulas Patocka req.notify.fn = NULL; 215148debafeSMikulas Patocka 215248debafeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 215348debafeSMikulas Patocka if (r) { 215448debafeSMikulas Patocka ti->error = "Unable to read metadata"; 215548debafeSMikulas Patocka goto bad; 215648debafeSMikulas Patocka } 215748debafeSMikulas Patocka } 215848debafeSMikulas Patocka 215948debafeSMikulas Patocka r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); 216048debafeSMikulas Patocka if (r) { 216148debafeSMikulas Patocka ti->error = "Hardware memory error when reading superblock"; 216248debafeSMikulas Patocka goto bad; 216348debafeSMikulas Patocka } 216448debafeSMikulas Patocka if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) { 216548debafeSMikulas Patocka r = init_memory(wc); 216648debafeSMikulas Patocka if (r) { 216748debafeSMikulas Patocka ti->error = "Unable to initialize device"; 216848debafeSMikulas Patocka goto bad; 216948debafeSMikulas Patocka } 217048debafeSMikulas Patocka r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); 217148debafeSMikulas Patocka if (r) { 217248debafeSMikulas Patocka ti->error = "Hardware memory error when reading superblock"; 217348debafeSMikulas Patocka goto bad; 217448debafeSMikulas Patocka } 217548debafeSMikulas Patocka } 217648debafeSMikulas Patocka 217748debafeSMikulas Patocka if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) { 217848debafeSMikulas Patocka ti->error = "Invalid magic in the superblock"; 217948debafeSMikulas Patocka r = -EINVAL; 218048debafeSMikulas Patocka goto bad; 218148debafeSMikulas Patocka } 218248debafeSMikulas Patocka 218348debafeSMikulas Patocka if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) { 218448debafeSMikulas Patocka ti->error = "Invalid version in the superblock"; 218548debafeSMikulas Patocka r = -EINVAL; 218648debafeSMikulas Patocka goto bad; 218748debafeSMikulas Patocka } 218848debafeSMikulas Patocka 218948debafeSMikulas Patocka if (le32_to_cpu(s.block_size) != wc->block_size) { 219048debafeSMikulas Patocka ti->error = "Block size does not match superblock"; 219148debafeSMikulas Patocka r = -EINVAL; 219248debafeSMikulas Patocka goto bad; 219348debafeSMikulas Patocka } 219448debafeSMikulas Patocka 219548debafeSMikulas Patocka wc->n_blocks = le64_to_cpu(s.n_blocks); 219648debafeSMikulas Patocka 219748debafeSMikulas Patocka offset = wc->n_blocks * sizeof(struct wc_memory_entry); 219848debafeSMikulas Patocka if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) { 219948debafeSMikulas Patocka overflow: 220048debafeSMikulas Patocka ti->error = "Overflow in size calculation"; 220148debafeSMikulas Patocka r = -EINVAL; 220248debafeSMikulas Patocka goto bad; 220348debafeSMikulas Patocka } 220448debafeSMikulas Patocka offset += sizeof(struct wc_memory_superblock); 220548debafeSMikulas Patocka if (offset < sizeof(struct wc_memory_superblock)) 220648debafeSMikulas Patocka goto overflow; 220748debafeSMikulas Patocka offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1); 220848debafeSMikulas Patocka data_size = wc->n_blocks * (size_t)wc->block_size; 220948debafeSMikulas Patocka if (!offset || (data_size / wc->block_size != wc->n_blocks) || 221048debafeSMikulas Patocka (offset + data_size < offset)) 221148debafeSMikulas Patocka goto overflow; 221248debafeSMikulas Patocka if (offset + data_size > wc->memory_map_size) { 221348debafeSMikulas Patocka ti->error = "Memory area is too small"; 221448debafeSMikulas Patocka r = -EINVAL; 221548debafeSMikulas Patocka goto bad; 221648debafeSMikulas Patocka } 221748debafeSMikulas Patocka 221848debafeSMikulas Patocka wc->metadata_sectors = offset >> SECTOR_SHIFT; 221948debafeSMikulas Patocka wc->block_start = (char *)sb(wc) + offset; 222048debafeSMikulas Patocka 222148debafeSMikulas Patocka x = (uint64_t)wc->n_blocks * (100 - high_wm_percent); 222248debafeSMikulas Patocka x += 50; 222348debafeSMikulas Patocka do_div(x, 100); 222448debafeSMikulas Patocka wc->freelist_high_watermark = x; 222548debafeSMikulas Patocka x = (uint64_t)wc->n_blocks * (100 - low_wm_percent); 222648debafeSMikulas Patocka x += 50; 222748debafeSMikulas Patocka do_div(x, 100); 222848debafeSMikulas Patocka wc->freelist_low_watermark = x; 222948debafeSMikulas Patocka 223048debafeSMikulas Patocka r = writecache_alloc_entries(wc); 223148debafeSMikulas Patocka if (r) { 223248debafeSMikulas Patocka ti->error = "Cannot allocate memory"; 223348debafeSMikulas Patocka goto bad; 223448debafeSMikulas Patocka } 223548debafeSMikulas Patocka 223648debafeSMikulas Patocka ti->num_flush_bios = 1; 223748debafeSMikulas Patocka ti->flush_supported = true; 223848debafeSMikulas Patocka ti->num_discard_bios = 1; 223948debafeSMikulas Patocka 224048debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 224148debafeSMikulas Patocka persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 224248debafeSMikulas Patocka 224348debafeSMikulas Patocka return 0; 224448debafeSMikulas Patocka 224548debafeSMikulas Patocka bad_arguments: 224648debafeSMikulas Patocka r = -EINVAL; 224748debafeSMikulas Patocka ti->error = "Bad arguments"; 224848debafeSMikulas Patocka bad: 224948debafeSMikulas Patocka writecache_dtr(ti); 225048debafeSMikulas Patocka return r; 225148debafeSMikulas Patocka } 225248debafeSMikulas Patocka 225348debafeSMikulas Patocka static void writecache_status(struct dm_target *ti, status_type_t type, 225448debafeSMikulas Patocka unsigned status_flags, char *result, unsigned maxlen) 225548debafeSMikulas Patocka { 225648debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 225748debafeSMikulas Patocka unsigned extra_args; 225848debafeSMikulas Patocka unsigned sz = 0; 225948debafeSMikulas Patocka uint64_t x; 226048debafeSMikulas Patocka 226148debafeSMikulas Patocka switch (type) { 226248debafeSMikulas Patocka case STATUSTYPE_INFO: 226348debafeSMikulas Patocka DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc), 226448debafeSMikulas Patocka (unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size, 226548debafeSMikulas Patocka (unsigned long long)wc->writeback_size); 226648debafeSMikulas Patocka break; 226748debafeSMikulas Patocka case STATUSTYPE_TABLE: 226848debafeSMikulas Patocka DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', 226948debafeSMikulas Patocka wc->dev->name, wc->ssd_dev->name, wc->block_size); 227048debafeSMikulas Patocka extra_args = 0; 22719ff07e7dSMikulas Patocka if (wc->start_sector) 22729ff07e7dSMikulas Patocka extra_args += 2; 227348debafeSMikulas Patocka if (wc->high_wm_percent_set) 227448debafeSMikulas Patocka extra_args += 2; 227548debafeSMikulas Patocka if (wc->low_wm_percent_set) 227648debafeSMikulas Patocka extra_args += 2; 227748debafeSMikulas Patocka if (wc->max_writeback_jobs_set) 227848debafeSMikulas Patocka extra_args += 2; 227948debafeSMikulas Patocka if (wc->autocommit_blocks_set) 228048debafeSMikulas Patocka extra_args += 2; 228148debafeSMikulas Patocka if (wc->autocommit_time_set) 228248debafeSMikulas Patocka extra_args += 2; 228348debafeSMikulas Patocka if (wc->writeback_fua_set) 228448debafeSMikulas Patocka extra_args++; 228548debafeSMikulas Patocka 228648debafeSMikulas Patocka DMEMIT("%u", extra_args); 22879ff07e7dSMikulas Patocka if (wc->start_sector) 22889ff07e7dSMikulas Patocka DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); 228948debafeSMikulas Patocka if (wc->high_wm_percent_set) { 229048debafeSMikulas Patocka x = (uint64_t)wc->freelist_high_watermark * 100; 229148debafeSMikulas Patocka x += wc->n_blocks / 2; 229248debafeSMikulas Patocka do_div(x, (size_t)wc->n_blocks); 229348debafeSMikulas Patocka DMEMIT(" high_watermark %u", 100 - (unsigned)x); 229448debafeSMikulas Patocka } 229548debafeSMikulas Patocka if (wc->low_wm_percent_set) { 229648debafeSMikulas Patocka x = (uint64_t)wc->freelist_low_watermark * 100; 229748debafeSMikulas Patocka x += wc->n_blocks / 2; 229848debafeSMikulas Patocka do_div(x, (size_t)wc->n_blocks); 229948debafeSMikulas Patocka DMEMIT(" low_watermark %u", 100 - (unsigned)x); 230048debafeSMikulas Patocka } 230148debafeSMikulas Patocka if (wc->max_writeback_jobs_set) 230248debafeSMikulas Patocka DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs); 230348debafeSMikulas Patocka if (wc->autocommit_blocks_set) 230448debafeSMikulas Patocka DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks); 230548debafeSMikulas Patocka if (wc->autocommit_time_set) 230648debafeSMikulas Patocka DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies)); 230748debafeSMikulas Patocka if (wc->writeback_fua_set) 230848debafeSMikulas Patocka DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); 230948debafeSMikulas Patocka break; 231048debafeSMikulas Patocka } 231148debafeSMikulas Patocka } 231248debafeSMikulas Patocka 231348debafeSMikulas Patocka static struct target_type writecache_target = { 231448debafeSMikulas Patocka .name = "writecache", 23159ff07e7dSMikulas Patocka .version = {1, 1, 1}, 231648debafeSMikulas Patocka .module = THIS_MODULE, 231748debafeSMikulas Patocka .ctr = writecache_ctr, 231848debafeSMikulas Patocka .dtr = writecache_dtr, 231948debafeSMikulas Patocka .status = writecache_status, 232048debafeSMikulas Patocka .postsuspend = writecache_suspend, 232148debafeSMikulas Patocka .resume = writecache_resume, 232248debafeSMikulas Patocka .message = writecache_message, 232348debafeSMikulas Patocka .map = writecache_map, 232448debafeSMikulas Patocka .end_io = writecache_end_io, 232548debafeSMikulas Patocka .iterate_devices = writecache_iterate_devices, 232648debafeSMikulas Patocka .io_hints = writecache_io_hints, 232748debafeSMikulas Patocka }; 232848debafeSMikulas Patocka 232948debafeSMikulas Patocka static int __init dm_writecache_init(void) 233048debafeSMikulas Patocka { 233148debafeSMikulas Patocka int r; 233248debafeSMikulas Patocka 233348debafeSMikulas Patocka r = dm_register_target(&writecache_target); 233448debafeSMikulas Patocka if (r < 0) { 233548debafeSMikulas Patocka DMERR("register failed %d", r); 233648debafeSMikulas Patocka return r; 233748debafeSMikulas Patocka } 233848debafeSMikulas Patocka 233948debafeSMikulas Patocka return 0; 234048debafeSMikulas Patocka } 234148debafeSMikulas Patocka 234248debafeSMikulas Patocka static void __exit dm_writecache_exit(void) 234348debafeSMikulas Patocka { 234448debafeSMikulas Patocka dm_unregister_target(&writecache_target); 234548debafeSMikulas Patocka } 234648debafeSMikulas Patocka 234748debafeSMikulas Patocka module_init(dm_writecache_init); 234848debafeSMikulas Patocka module_exit(dm_writecache_exit); 234948debafeSMikulas Patocka 235048debafeSMikulas Patocka MODULE_DESCRIPTION(DM_NAME " writecache target"); 235148debafeSMikulas Patocka MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); 235248debafeSMikulas Patocka MODULE_LICENSE("GPL"); 2353