148debafeSMikulas Patocka // SPDX-License-Identifier: GPL-2.0 248debafeSMikulas Patocka /* 348debafeSMikulas Patocka * Copyright (C) 2018 Red Hat. All rights reserved. 448debafeSMikulas Patocka * 548debafeSMikulas Patocka * This file is released under the GPL. 648debafeSMikulas Patocka */ 748debafeSMikulas Patocka 848debafeSMikulas Patocka #include <linux/device-mapper.h> 948debafeSMikulas Patocka #include <linux/module.h> 1048debafeSMikulas Patocka #include <linux/init.h> 1148debafeSMikulas Patocka #include <linux/vmalloc.h> 1248debafeSMikulas Patocka #include <linux/kthread.h> 1348debafeSMikulas Patocka #include <linux/dm-io.h> 1448debafeSMikulas Patocka #include <linux/dm-kcopyd.h> 1548debafeSMikulas Patocka #include <linux/dax.h> 1648debafeSMikulas Patocka #include <linux/pfn_t.h> 1748debafeSMikulas Patocka #include <linux/libnvdimm.h> 1848debafeSMikulas Patocka 1948debafeSMikulas Patocka #define DM_MSG_PREFIX "writecache" 2048debafeSMikulas Patocka 2148debafeSMikulas Patocka #define HIGH_WATERMARK 50 2248debafeSMikulas Patocka #define LOW_WATERMARK 45 2348debafeSMikulas Patocka #define MAX_WRITEBACK_JOBS 0 2448debafeSMikulas Patocka #define ENDIO_LATENCY 16 2548debafeSMikulas Patocka #define WRITEBACK_LATENCY 64 2648debafeSMikulas Patocka #define AUTOCOMMIT_BLOCKS_SSD 65536 2748debafeSMikulas Patocka #define AUTOCOMMIT_BLOCKS_PMEM 64 2848debafeSMikulas Patocka #define AUTOCOMMIT_MSEC 1000 293923d485SMikulas Patocka #define MAX_AGE_DIV 16 303923d485SMikulas Patocka #define MAX_AGE_UNSPECIFIED -1UL 3148debafeSMikulas Patocka 3248debafeSMikulas Patocka #define BITMAP_GRANULARITY 65536 3348debafeSMikulas Patocka #if BITMAP_GRANULARITY < PAGE_SIZE 3448debafeSMikulas Patocka #undef BITMAP_GRANULARITY 3548debafeSMikulas Patocka #define BITMAP_GRANULARITY PAGE_SIZE 3648debafeSMikulas Patocka #endif 3748debafeSMikulas Patocka 3848debafeSMikulas Patocka #if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER) 3948debafeSMikulas Patocka #define DM_WRITECACHE_HAS_PMEM 4048debafeSMikulas Patocka #endif 4148debafeSMikulas Patocka 4248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 4348debafeSMikulas Patocka #define pmem_assign(dest, src) \ 4448debafeSMikulas Patocka do { \ 4548debafeSMikulas Patocka typeof(dest) uniq = (src); \ 4648debafeSMikulas Patocka memcpy_flushcache(&(dest), &uniq, sizeof(dest)); \ 4748debafeSMikulas Patocka } while (0) 4848debafeSMikulas Patocka #else 4948debafeSMikulas Patocka #define pmem_assign(dest, src) ((dest) = (src)) 5048debafeSMikulas Patocka #endif 5148debafeSMikulas Patocka 5248debafeSMikulas Patocka #if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM) 5348debafeSMikulas Patocka #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 5448debafeSMikulas Patocka #endif 5548debafeSMikulas Patocka 5648debafeSMikulas Patocka #define MEMORY_SUPERBLOCK_MAGIC 0x23489321 5748debafeSMikulas Patocka #define MEMORY_SUPERBLOCK_VERSION 1 5848debafeSMikulas Patocka 5948debafeSMikulas Patocka struct wc_memory_entry { 6048debafeSMikulas Patocka __le64 original_sector; 6148debafeSMikulas Patocka __le64 seq_count; 6248debafeSMikulas Patocka }; 6348debafeSMikulas Patocka 6448debafeSMikulas Patocka struct wc_memory_superblock { 6548debafeSMikulas Patocka union { 6648debafeSMikulas Patocka struct { 6748debafeSMikulas Patocka __le32 magic; 6848debafeSMikulas Patocka __le32 version; 6948debafeSMikulas Patocka __le32 block_size; 7048debafeSMikulas Patocka __le32 pad; 7148debafeSMikulas Patocka __le64 n_blocks; 7248debafeSMikulas Patocka __le64 seq_count; 7348debafeSMikulas Patocka }; 7448debafeSMikulas Patocka __le64 padding[8]; 7548debafeSMikulas Patocka }; 7648debafeSMikulas Patocka struct wc_memory_entry entries[0]; 7748debafeSMikulas Patocka }; 7848debafeSMikulas Patocka 7948debafeSMikulas Patocka struct wc_entry { 8048debafeSMikulas Patocka struct rb_node rb_node; 8148debafeSMikulas Patocka struct list_head lru; 8248debafeSMikulas Patocka unsigned short wc_list_contiguous; 8348debafeSMikulas Patocka bool write_in_progress 8448debafeSMikulas Patocka #if BITS_PER_LONG == 64 8548debafeSMikulas Patocka :1 8648debafeSMikulas Patocka #endif 8748debafeSMikulas Patocka ; 8848debafeSMikulas Patocka unsigned long index 8948debafeSMikulas Patocka #if BITS_PER_LONG == 64 9048debafeSMikulas Patocka :47 9148debafeSMikulas Patocka #endif 9248debafeSMikulas Patocka ; 933923d485SMikulas Patocka unsigned long age; 9448debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 9548debafeSMikulas Patocka uint64_t original_sector; 9648debafeSMikulas Patocka uint64_t seq_count; 9748debafeSMikulas Patocka #endif 9848debafeSMikulas Patocka }; 9948debafeSMikulas Patocka 10048debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 10148debafeSMikulas Patocka #define WC_MODE_PMEM(wc) ((wc)->pmem_mode) 10248debafeSMikulas Patocka #define WC_MODE_FUA(wc) ((wc)->writeback_fua) 10348debafeSMikulas Patocka #else 10448debafeSMikulas Patocka #define WC_MODE_PMEM(wc) false 10548debafeSMikulas Patocka #define WC_MODE_FUA(wc) false 10648debafeSMikulas Patocka #endif 10748debafeSMikulas Patocka #define WC_MODE_SORT_FREELIST(wc) (!WC_MODE_PMEM(wc)) 10848debafeSMikulas Patocka 10948debafeSMikulas Patocka struct dm_writecache { 11048debafeSMikulas Patocka struct mutex lock; 11148debafeSMikulas Patocka struct list_head lru; 11248debafeSMikulas Patocka union { 11348debafeSMikulas Patocka struct list_head freelist; 11448debafeSMikulas Patocka struct { 11548debafeSMikulas Patocka struct rb_root freetree; 11648debafeSMikulas Patocka struct wc_entry *current_free; 11748debafeSMikulas Patocka }; 11848debafeSMikulas Patocka }; 11948debafeSMikulas Patocka struct rb_root tree; 12048debafeSMikulas Patocka 12148debafeSMikulas Patocka size_t freelist_size; 12248debafeSMikulas Patocka size_t writeback_size; 12348debafeSMikulas Patocka size_t freelist_high_watermark; 12448debafeSMikulas Patocka size_t freelist_low_watermark; 1253923d485SMikulas Patocka unsigned long max_age; 12648debafeSMikulas Patocka 12748debafeSMikulas Patocka unsigned uncommitted_blocks; 12848debafeSMikulas Patocka unsigned autocommit_blocks; 12948debafeSMikulas Patocka unsigned max_writeback_jobs; 13048debafeSMikulas Patocka 13148debafeSMikulas Patocka int error; 13248debafeSMikulas Patocka 13348debafeSMikulas Patocka unsigned long autocommit_jiffies; 13448debafeSMikulas Patocka struct timer_list autocommit_timer; 13548debafeSMikulas Patocka struct wait_queue_head freelist_wait; 13648debafeSMikulas Patocka 1373923d485SMikulas Patocka struct timer_list max_age_timer; 1383923d485SMikulas Patocka 13948debafeSMikulas Patocka atomic_t bio_in_progress[2]; 14048debafeSMikulas Patocka struct wait_queue_head bio_in_progress_wait[2]; 14148debafeSMikulas Patocka 14248debafeSMikulas Patocka struct dm_target *ti; 14348debafeSMikulas Patocka struct dm_dev *dev; 14448debafeSMikulas Patocka struct dm_dev *ssd_dev; 145d284f824SMikulas Patocka sector_t start_sector; 14648debafeSMikulas Patocka void *memory_map; 14748debafeSMikulas Patocka uint64_t memory_map_size; 14848debafeSMikulas Patocka size_t metadata_sectors; 14948debafeSMikulas Patocka size_t n_blocks; 15048debafeSMikulas Patocka uint64_t seq_count; 15148debafeSMikulas Patocka void *block_start; 15248debafeSMikulas Patocka struct wc_entry *entries; 15348debafeSMikulas Patocka unsigned block_size; 15448debafeSMikulas Patocka unsigned char block_size_bits; 15548debafeSMikulas Patocka 15648debafeSMikulas Patocka bool pmem_mode:1; 15748debafeSMikulas Patocka bool writeback_fua:1; 15848debafeSMikulas Patocka 15948debafeSMikulas Patocka bool overwrote_committed:1; 16048debafeSMikulas Patocka bool memory_vmapped:1; 16148debafeSMikulas Patocka 16248debafeSMikulas Patocka bool high_wm_percent_set:1; 16348debafeSMikulas Patocka bool low_wm_percent_set:1; 16448debafeSMikulas Patocka bool max_writeback_jobs_set:1; 16548debafeSMikulas Patocka bool autocommit_blocks_set:1; 16648debafeSMikulas Patocka bool autocommit_time_set:1; 16748debafeSMikulas Patocka bool writeback_fua_set:1; 16848debafeSMikulas Patocka bool flush_on_suspend:1; 16993de44ebSMikulas Patocka bool cleaner:1; 17048debafeSMikulas Patocka 17148debafeSMikulas Patocka unsigned writeback_all; 17248debafeSMikulas Patocka struct workqueue_struct *writeback_wq; 17348debafeSMikulas Patocka struct work_struct writeback_work; 17448debafeSMikulas Patocka struct work_struct flush_work; 17548debafeSMikulas Patocka 17648debafeSMikulas Patocka struct dm_io_client *dm_io; 17748debafeSMikulas Patocka 17848debafeSMikulas Patocka raw_spinlock_t endio_list_lock; 17948debafeSMikulas Patocka struct list_head endio_list; 18048debafeSMikulas Patocka struct task_struct *endio_thread; 18148debafeSMikulas Patocka 18248debafeSMikulas Patocka struct task_struct *flush_thread; 18348debafeSMikulas Patocka struct bio_list flush_list; 18448debafeSMikulas Patocka 18548debafeSMikulas Patocka struct dm_kcopyd_client *dm_kcopyd; 18648debafeSMikulas Patocka unsigned long *dirty_bitmap; 18748debafeSMikulas Patocka unsigned dirty_bitmap_size; 18848debafeSMikulas Patocka 18948debafeSMikulas Patocka struct bio_set bio_set; 19048debafeSMikulas Patocka mempool_t copy_pool; 19148debafeSMikulas Patocka }; 19248debafeSMikulas Patocka 19348debafeSMikulas Patocka #define WB_LIST_INLINE 16 19448debafeSMikulas Patocka 19548debafeSMikulas Patocka struct writeback_struct { 19648debafeSMikulas Patocka struct list_head endio_entry; 19748debafeSMikulas Patocka struct dm_writecache *wc; 19848debafeSMikulas Patocka struct wc_entry **wc_list; 19948debafeSMikulas Patocka unsigned wc_list_n; 20048debafeSMikulas Patocka struct wc_entry *wc_list_inline[WB_LIST_INLINE]; 20148debafeSMikulas Patocka struct bio bio; 20248debafeSMikulas Patocka }; 20348debafeSMikulas Patocka 20448debafeSMikulas Patocka struct copy_struct { 20548debafeSMikulas Patocka struct list_head endio_entry; 20648debafeSMikulas Patocka struct dm_writecache *wc; 20748debafeSMikulas Patocka struct wc_entry *e; 20848debafeSMikulas Patocka unsigned n_entries; 20948debafeSMikulas Patocka int error; 21048debafeSMikulas Patocka }; 21148debafeSMikulas Patocka 21248debafeSMikulas Patocka DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle, 21348debafeSMikulas Patocka "A percentage of time allocated for data copying"); 21448debafeSMikulas Patocka 21548debafeSMikulas Patocka static void wc_lock(struct dm_writecache *wc) 21648debafeSMikulas Patocka { 21748debafeSMikulas Patocka mutex_lock(&wc->lock); 21848debafeSMikulas Patocka } 21948debafeSMikulas Patocka 22048debafeSMikulas Patocka static void wc_unlock(struct dm_writecache *wc) 22148debafeSMikulas Patocka { 22248debafeSMikulas Patocka mutex_unlock(&wc->lock); 22348debafeSMikulas Patocka } 22448debafeSMikulas Patocka 22548debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 22648debafeSMikulas Patocka static int persistent_memory_claim(struct dm_writecache *wc) 22748debafeSMikulas Patocka { 22848debafeSMikulas Patocka int r; 22948debafeSMikulas Patocka loff_t s; 23048debafeSMikulas Patocka long p, da; 23148debafeSMikulas Patocka pfn_t pfn; 23248debafeSMikulas Patocka int id; 23348debafeSMikulas Patocka struct page **pages; 23448debafeSMikulas Patocka 23548debafeSMikulas Patocka wc->memory_vmapped = false; 23648debafeSMikulas Patocka 23748debafeSMikulas Patocka if (!wc->ssd_dev->dax_dev) { 23848debafeSMikulas Patocka r = -EOPNOTSUPP; 23948debafeSMikulas Patocka goto err1; 24048debafeSMikulas Patocka } 24148debafeSMikulas Patocka s = wc->memory_map_size; 24248debafeSMikulas Patocka p = s >> PAGE_SHIFT; 24348debafeSMikulas Patocka if (!p) { 24448debafeSMikulas Patocka r = -EINVAL; 24548debafeSMikulas Patocka goto err1; 24648debafeSMikulas Patocka } 24748debafeSMikulas Patocka if (p != s >> PAGE_SHIFT) { 24848debafeSMikulas Patocka r = -EOVERFLOW; 24948debafeSMikulas Patocka goto err1; 25048debafeSMikulas Patocka } 25148debafeSMikulas Patocka 25248debafeSMikulas Patocka id = dax_read_lock(); 25348debafeSMikulas Patocka 25448debafeSMikulas Patocka da = dax_direct_access(wc->ssd_dev->dax_dev, 0, p, &wc->memory_map, &pfn); 25548debafeSMikulas Patocka if (da < 0) { 25648debafeSMikulas Patocka wc->memory_map = NULL; 25748debafeSMikulas Patocka r = da; 25848debafeSMikulas Patocka goto err2; 25948debafeSMikulas Patocka } 26048debafeSMikulas Patocka if (!pfn_t_has_page(pfn)) { 26148debafeSMikulas Patocka wc->memory_map = NULL; 26248debafeSMikulas Patocka r = -EOPNOTSUPP; 26348debafeSMikulas Patocka goto err2; 26448debafeSMikulas Patocka } 26548debafeSMikulas Patocka if (da != p) { 26648debafeSMikulas Patocka long i; 26748debafeSMikulas Patocka wc->memory_map = NULL; 26850a7d3baSKees Cook pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); 26948debafeSMikulas Patocka if (!pages) { 27048debafeSMikulas Patocka r = -ENOMEM; 27148debafeSMikulas Patocka goto err2; 27248debafeSMikulas Patocka } 27348debafeSMikulas Patocka i = 0; 27448debafeSMikulas Patocka do { 27548debafeSMikulas Patocka long daa; 27648debafeSMikulas Patocka daa = dax_direct_access(wc->ssd_dev->dax_dev, i, p - i, 277f742267aSHuaisheng Ye NULL, &pfn); 27848debafeSMikulas Patocka if (daa <= 0) { 27948debafeSMikulas Patocka r = daa ? daa : -EINVAL; 28048debafeSMikulas Patocka goto err3; 28148debafeSMikulas Patocka } 28248debafeSMikulas Patocka if (!pfn_t_has_page(pfn)) { 28348debafeSMikulas Patocka r = -EOPNOTSUPP; 28448debafeSMikulas Patocka goto err3; 28548debafeSMikulas Patocka } 28648debafeSMikulas Patocka while (daa-- && i < p) { 28748debafeSMikulas Patocka pages[i++] = pfn_t_to_page(pfn); 28848debafeSMikulas Patocka pfn.val++; 28948debafeSMikulas Patocka } 29048debafeSMikulas Patocka } while (i < p); 29148debafeSMikulas Patocka wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL); 29248debafeSMikulas Patocka if (!wc->memory_map) { 29348debafeSMikulas Patocka r = -ENOMEM; 29448debafeSMikulas Patocka goto err3; 29548debafeSMikulas Patocka } 29648debafeSMikulas Patocka kvfree(pages); 29748debafeSMikulas Patocka wc->memory_vmapped = true; 29848debafeSMikulas Patocka } 29948debafeSMikulas Patocka 30048debafeSMikulas Patocka dax_read_unlock(id); 301d284f824SMikulas Patocka 302d284f824SMikulas Patocka wc->memory_map += (size_t)wc->start_sector << SECTOR_SHIFT; 303d284f824SMikulas Patocka wc->memory_map_size -= (size_t)wc->start_sector << SECTOR_SHIFT; 304d284f824SMikulas Patocka 30548debafeSMikulas Patocka return 0; 30648debafeSMikulas Patocka err3: 30748debafeSMikulas Patocka kvfree(pages); 30848debafeSMikulas Patocka err2: 30948debafeSMikulas Patocka dax_read_unlock(id); 31048debafeSMikulas Patocka err1: 31148debafeSMikulas Patocka return r; 31248debafeSMikulas Patocka } 31348debafeSMikulas Patocka #else 31448debafeSMikulas Patocka static int persistent_memory_claim(struct dm_writecache *wc) 31548debafeSMikulas Patocka { 31648debafeSMikulas Patocka BUG(); 31748debafeSMikulas Patocka } 31848debafeSMikulas Patocka #endif 31948debafeSMikulas Patocka 32048debafeSMikulas Patocka static void persistent_memory_release(struct dm_writecache *wc) 32148debafeSMikulas Patocka { 32248debafeSMikulas Patocka if (wc->memory_vmapped) 323d284f824SMikulas Patocka vunmap(wc->memory_map - ((size_t)wc->start_sector << SECTOR_SHIFT)); 32448debafeSMikulas Patocka } 32548debafeSMikulas Patocka 32648debafeSMikulas Patocka static struct page *persistent_memory_page(void *addr) 32748debafeSMikulas Patocka { 32848debafeSMikulas Patocka if (is_vmalloc_addr(addr)) 32948debafeSMikulas Patocka return vmalloc_to_page(addr); 33048debafeSMikulas Patocka else 33148debafeSMikulas Patocka return virt_to_page(addr); 33248debafeSMikulas Patocka } 33348debafeSMikulas Patocka 33448debafeSMikulas Patocka static unsigned persistent_memory_page_offset(void *addr) 33548debafeSMikulas Patocka { 33648debafeSMikulas Patocka return (unsigned long)addr & (PAGE_SIZE - 1); 33748debafeSMikulas Patocka } 33848debafeSMikulas Patocka 33948debafeSMikulas Patocka static void persistent_memory_flush_cache(void *ptr, size_t size) 34048debafeSMikulas Patocka { 34148debafeSMikulas Patocka if (is_vmalloc_addr(ptr)) 34248debafeSMikulas Patocka flush_kernel_vmap_range(ptr, size); 34348debafeSMikulas Patocka } 34448debafeSMikulas Patocka 34548debafeSMikulas Patocka static void persistent_memory_invalidate_cache(void *ptr, size_t size) 34648debafeSMikulas Patocka { 34748debafeSMikulas Patocka if (is_vmalloc_addr(ptr)) 34848debafeSMikulas Patocka invalidate_kernel_vmap_range(ptr, size); 34948debafeSMikulas Patocka } 35048debafeSMikulas Patocka 35148debafeSMikulas Patocka static struct wc_memory_superblock *sb(struct dm_writecache *wc) 35248debafeSMikulas Patocka { 35348debafeSMikulas Patocka return wc->memory_map; 35448debafeSMikulas Patocka } 35548debafeSMikulas Patocka 35648debafeSMikulas Patocka static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e) 35748debafeSMikulas Patocka { 35848debafeSMikulas Patocka return &sb(wc)->entries[e->index]; 35948debafeSMikulas Patocka } 36048debafeSMikulas Patocka 36148debafeSMikulas Patocka static void *memory_data(struct dm_writecache *wc, struct wc_entry *e) 36248debafeSMikulas Patocka { 36348debafeSMikulas Patocka return (char *)wc->block_start + (e->index << wc->block_size_bits); 36448debafeSMikulas Patocka } 36548debafeSMikulas Patocka 36648debafeSMikulas Patocka static sector_t cache_sector(struct dm_writecache *wc, struct wc_entry *e) 36748debafeSMikulas Patocka { 368d284f824SMikulas Patocka return wc->start_sector + wc->metadata_sectors + 36948debafeSMikulas Patocka ((sector_t)e->index << (wc->block_size_bits - SECTOR_SHIFT)); 37048debafeSMikulas Patocka } 37148debafeSMikulas Patocka 37248debafeSMikulas Patocka static uint64_t read_original_sector(struct dm_writecache *wc, struct wc_entry *e) 37348debafeSMikulas Patocka { 37448debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 37548debafeSMikulas Patocka return e->original_sector; 37648debafeSMikulas Patocka #else 37748debafeSMikulas Patocka return le64_to_cpu(memory_entry(wc, e)->original_sector); 37848debafeSMikulas Patocka #endif 37948debafeSMikulas Patocka } 38048debafeSMikulas Patocka 38148debafeSMikulas Patocka static uint64_t read_seq_count(struct dm_writecache *wc, struct wc_entry *e) 38248debafeSMikulas Patocka { 38348debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 38448debafeSMikulas Patocka return e->seq_count; 38548debafeSMikulas Patocka #else 38648debafeSMikulas Patocka return le64_to_cpu(memory_entry(wc, e)->seq_count); 38748debafeSMikulas Patocka #endif 38848debafeSMikulas Patocka } 38948debafeSMikulas Patocka 39048debafeSMikulas Patocka static void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e) 39148debafeSMikulas Patocka { 39248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 39348debafeSMikulas Patocka e->seq_count = -1; 39448debafeSMikulas Patocka #endif 39548debafeSMikulas Patocka pmem_assign(memory_entry(wc, e)->seq_count, cpu_to_le64(-1)); 39648debafeSMikulas Patocka } 39748debafeSMikulas Patocka 39848debafeSMikulas Patocka static void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e, 39948debafeSMikulas Patocka uint64_t original_sector, uint64_t seq_count) 40048debafeSMikulas Patocka { 40148debafeSMikulas Patocka struct wc_memory_entry me; 40248debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 40348debafeSMikulas Patocka e->original_sector = original_sector; 40448debafeSMikulas Patocka e->seq_count = seq_count; 40548debafeSMikulas Patocka #endif 40648debafeSMikulas Patocka me.original_sector = cpu_to_le64(original_sector); 40748debafeSMikulas Patocka me.seq_count = cpu_to_le64(seq_count); 40848debafeSMikulas Patocka pmem_assign(*memory_entry(wc, e), me); 40948debafeSMikulas Patocka } 41048debafeSMikulas Patocka 41148debafeSMikulas Patocka #define writecache_error(wc, err, msg, arg...) \ 41248debafeSMikulas Patocka do { \ 41348debafeSMikulas Patocka if (!cmpxchg(&(wc)->error, 0, err)) \ 41448debafeSMikulas Patocka DMERR(msg, ##arg); \ 41548debafeSMikulas Patocka wake_up(&(wc)->freelist_wait); \ 41648debafeSMikulas Patocka } while (0) 41748debafeSMikulas Patocka 41848debafeSMikulas Patocka #define writecache_has_error(wc) (unlikely(READ_ONCE((wc)->error))) 41948debafeSMikulas Patocka 42048debafeSMikulas Patocka static void writecache_flush_all_metadata(struct dm_writecache *wc) 42148debafeSMikulas Patocka { 42248debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) 42348debafeSMikulas Patocka memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size); 42448debafeSMikulas Patocka } 42548debafeSMikulas Patocka 42648debafeSMikulas Patocka static void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size) 42748debafeSMikulas Patocka { 42848debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) 42948debafeSMikulas Patocka __set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY, 43048debafeSMikulas Patocka wc->dirty_bitmap); 43148debafeSMikulas Patocka } 43248debafeSMikulas Patocka 43348debafeSMikulas Patocka static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev); 43448debafeSMikulas Patocka 43548debafeSMikulas Patocka struct io_notify { 43648debafeSMikulas Patocka struct dm_writecache *wc; 43748debafeSMikulas Patocka struct completion c; 43848debafeSMikulas Patocka atomic_t count; 43948debafeSMikulas Patocka }; 44048debafeSMikulas Patocka 44148debafeSMikulas Patocka static void writecache_notify_io(unsigned long error, void *context) 44248debafeSMikulas Patocka { 44348debafeSMikulas Patocka struct io_notify *endio = context; 44448debafeSMikulas Patocka 44548debafeSMikulas Patocka if (unlikely(error != 0)) 44648debafeSMikulas Patocka writecache_error(endio->wc, -EIO, "error writing metadata"); 44748debafeSMikulas Patocka BUG_ON(atomic_read(&endio->count) <= 0); 44848debafeSMikulas Patocka if (atomic_dec_and_test(&endio->count)) 44948debafeSMikulas Patocka complete(&endio->c); 45048debafeSMikulas Patocka } 45148debafeSMikulas Patocka 452aa950920SMikulas Patocka static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) 453aa950920SMikulas Patocka { 454aa950920SMikulas Patocka wait_event(wc->bio_in_progress_wait[direction], 455aa950920SMikulas Patocka !atomic_read(&wc->bio_in_progress[direction])); 456aa950920SMikulas Patocka } 457aa950920SMikulas Patocka 458aa950920SMikulas Patocka static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 45948debafeSMikulas Patocka { 46048debafeSMikulas Patocka struct dm_io_region region; 46148debafeSMikulas Patocka struct dm_io_request req; 46248debafeSMikulas Patocka struct io_notify endio = { 46348debafeSMikulas Patocka wc, 46448debafeSMikulas Patocka COMPLETION_INITIALIZER_ONSTACK(endio.c), 46548debafeSMikulas Patocka ATOMIC_INIT(1), 46648debafeSMikulas Patocka }; 4671e1132eaSMikulas Patocka unsigned bitmap_bits = wc->dirty_bitmap_size * 8; 46848debafeSMikulas Patocka unsigned i = 0; 46948debafeSMikulas Patocka 47048debafeSMikulas Patocka while (1) { 47148debafeSMikulas Patocka unsigned j; 47248debafeSMikulas Patocka i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i); 47348debafeSMikulas Patocka if (unlikely(i == bitmap_bits)) 47448debafeSMikulas Patocka break; 47548debafeSMikulas Patocka j = find_next_zero_bit(wc->dirty_bitmap, bitmap_bits, i); 47648debafeSMikulas Patocka 47748debafeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 47848debafeSMikulas Patocka region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 47948debafeSMikulas Patocka region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 48048debafeSMikulas Patocka 48148debafeSMikulas Patocka if (unlikely(region.sector >= wc->metadata_sectors)) 48248debafeSMikulas Patocka break; 48348debafeSMikulas Patocka if (unlikely(region.sector + region.count > wc->metadata_sectors)) 48448debafeSMikulas Patocka region.count = wc->metadata_sectors - region.sector; 48548debafeSMikulas Patocka 486d284f824SMikulas Patocka region.sector += wc->start_sector; 48748debafeSMikulas Patocka atomic_inc(&endio.count); 48848debafeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 48948debafeSMikulas Patocka req.bi_op_flags = REQ_SYNC; 49048debafeSMikulas Patocka req.mem.type = DM_IO_VMA; 49148debafeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY; 49248debafeSMikulas Patocka req.client = wc->dm_io; 49348debafeSMikulas Patocka req.notify.fn = writecache_notify_io; 49448debafeSMikulas Patocka req.notify.context = &endio; 49548debafeSMikulas Patocka 49648debafeSMikulas Patocka /* writing via async dm-io (implied by notify.fn above) won't return an error */ 49748debafeSMikulas Patocka (void) dm_io(&req, 1, ®ion, NULL); 49848debafeSMikulas Patocka i = j; 49948debafeSMikulas Patocka } 50048debafeSMikulas Patocka 50148debafeSMikulas Patocka writecache_notify_io(0, &endio); 50248debafeSMikulas Patocka wait_for_completion_io(&endio.c); 50348debafeSMikulas Patocka 504aa950920SMikulas Patocka if (wait_for_ios) 505aa950920SMikulas Patocka writecache_wait_for_ios(wc, WRITE); 506aa950920SMikulas Patocka 50748debafeSMikulas Patocka writecache_disk_flush(wc, wc->ssd_dev); 50848debafeSMikulas Patocka 50948debafeSMikulas Patocka memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); 51048debafeSMikulas Patocka } 51148debafeSMikulas Patocka 512*dc8a01aeSMikulas Patocka static void ssd_commit_superblock(struct dm_writecache *wc) 513*dc8a01aeSMikulas Patocka { 514*dc8a01aeSMikulas Patocka int r; 515*dc8a01aeSMikulas Patocka struct dm_io_region region; 516*dc8a01aeSMikulas Patocka struct dm_io_request req; 517*dc8a01aeSMikulas Patocka 518*dc8a01aeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 519*dc8a01aeSMikulas Patocka region.sector = 0; 520*dc8a01aeSMikulas Patocka region.count = PAGE_SIZE; 521*dc8a01aeSMikulas Patocka 522*dc8a01aeSMikulas Patocka if (unlikely(region.sector + region.count > wc->metadata_sectors)) 523*dc8a01aeSMikulas Patocka region.count = wc->metadata_sectors - region.sector; 524*dc8a01aeSMikulas Patocka 525*dc8a01aeSMikulas Patocka region.sector += wc->start_sector; 526*dc8a01aeSMikulas Patocka 527*dc8a01aeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 528*dc8a01aeSMikulas Patocka req.bi_op_flags = REQ_SYNC | REQ_FUA; 529*dc8a01aeSMikulas Patocka req.mem.type = DM_IO_VMA; 530*dc8a01aeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map; 531*dc8a01aeSMikulas Patocka req.client = wc->dm_io; 532*dc8a01aeSMikulas Patocka req.notify.fn = NULL; 533*dc8a01aeSMikulas Patocka req.notify.context = NULL; 534*dc8a01aeSMikulas Patocka 535*dc8a01aeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 536*dc8a01aeSMikulas Patocka if (unlikely(r)) 537*dc8a01aeSMikulas Patocka writecache_error(wc, r, "error writing superblock"); 538*dc8a01aeSMikulas Patocka } 539*dc8a01aeSMikulas Patocka 540aa950920SMikulas Patocka static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 54148debafeSMikulas Patocka { 54248debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 54348debafeSMikulas Patocka wmb(); 54448debafeSMikulas Patocka else 545aa950920SMikulas Patocka ssd_commit_flushed(wc, wait_for_ios); 54648debafeSMikulas Patocka } 54748debafeSMikulas Patocka 54848debafeSMikulas Patocka static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) 54948debafeSMikulas Patocka { 55048debafeSMikulas Patocka int r; 55148debafeSMikulas Patocka struct dm_io_region region; 55248debafeSMikulas Patocka struct dm_io_request req; 55348debafeSMikulas Patocka 55448debafeSMikulas Patocka region.bdev = dev->bdev; 55548debafeSMikulas Patocka region.sector = 0; 55648debafeSMikulas Patocka region.count = 0; 55748debafeSMikulas Patocka req.bi_op = REQ_OP_WRITE; 55848debafeSMikulas Patocka req.bi_op_flags = REQ_PREFLUSH; 55948debafeSMikulas Patocka req.mem.type = DM_IO_KMEM; 56048debafeSMikulas Patocka req.mem.ptr.addr = NULL; 56148debafeSMikulas Patocka req.client = wc->dm_io; 56248debafeSMikulas Patocka req.notify.fn = NULL; 56348debafeSMikulas Patocka 56448debafeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 56548debafeSMikulas Patocka if (unlikely(r)) 56648debafeSMikulas Patocka writecache_error(wc, r, "error flushing metadata: %d", r); 56748debafeSMikulas Patocka } 56848debafeSMikulas Patocka 56948debafeSMikulas Patocka #define WFE_RETURN_FOLLOWING 1 57048debafeSMikulas Patocka #define WFE_LOWEST_SEQ 2 57148debafeSMikulas Patocka 57248debafeSMikulas Patocka static struct wc_entry *writecache_find_entry(struct dm_writecache *wc, 57348debafeSMikulas Patocka uint64_t block, int flags) 57448debafeSMikulas Patocka { 57548debafeSMikulas Patocka struct wc_entry *e; 57648debafeSMikulas Patocka struct rb_node *node = wc->tree.rb_node; 57748debafeSMikulas Patocka 57848debafeSMikulas Patocka if (unlikely(!node)) 57948debafeSMikulas Patocka return NULL; 58048debafeSMikulas Patocka 58148debafeSMikulas Patocka while (1) { 58248debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 58348debafeSMikulas Patocka if (read_original_sector(wc, e) == block) 58448debafeSMikulas Patocka break; 585f8011d33SMikulas Patocka 58648debafeSMikulas Patocka node = (read_original_sector(wc, e) >= block ? 58748debafeSMikulas Patocka e->rb_node.rb_left : e->rb_node.rb_right); 58848debafeSMikulas Patocka if (unlikely(!node)) { 589f8011d33SMikulas Patocka if (!(flags & WFE_RETURN_FOLLOWING)) 59048debafeSMikulas Patocka return NULL; 59148debafeSMikulas Patocka if (read_original_sector(wc, e) >= block) { 592f8011d33SMikulas Patocka return e; 59348debafeSMikulas Patocka } else { 59448debafeSMikulas Patocka node = rb_next(&e->rb_node); 595f8011d33SMikulas Patocka if (unlikely(!node)) 59648debafeSMikulas Patocka return NULL; 59748debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 598f8011d33SMikulas Patocka return e; 59948debafeSMikulas Patocka } 60048debafeSMikulas Patocka } 60148debafeSMikulas Patocka } 60248debafeSMikulas Patocka 60348debafeSMikulas Patocka while (1) { 60448debafeSMikulas Patocka struct wc_entry *e2; 60548debafeSMikulas Patocka if (flags & WFE_LOWEST_SEQ) 60648debafeSMikulas Patocka node = rb_prev(&e->rb_node); 60748debafeSMikulas Patocka else 60848debafeSMikulas Patocka node = rb_next(&e->rb_node); 60984420b1eSHuaisheng Ye if (unlikely(!node)) 61048debafeSMikulas Patocka return e; 61148debafeSMikulas Patocka e2 = container_of(node, struct wc_entry, rb_node); 61248debafeSMikulas Patocka if (read_original_sector(wc, e2) != block) 61348debafeSMikulas Patocka return e; 61448debafeSMikulas Patocka e = e2; 61548debafeSMikulas Patocka } 61648debafeSMikulas Patocka } 61748debafeSMikulas Patocka 61848debafeSMikulas Patocka static void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *ins) 61948debafeSMikulas Patocka { 62048debafeSMikulas Patocka struct wc_entry *e; 62148debafeSMikulas Patocka struct rb_node **node = &wc->tree.rb_node, *parent = NULL; 62248debafeSMikulas Patocka 62348debafeSMikulas Patocka while (*node) { 62448debafeSMikulas Patocka e = container_of(*node, struct wc_entry, rb_node); 62548debafeSMikulas Patocka parent = &e->rb_node; 62648debafeSMikulas Patocka if (read_original_sector(wc, e) > read_original_sector(wc, ins)) 62748debafeSMikulas Patocka node = &parent->rb_left; 62848debafeSMikulas Patocka else 62948debafeSMikulas Patocka node = &parent->rb_right; 63048debafeSMikulas Patocka } 63148debafeSMikulas Patocka rb_link_node(&ins->rb_node, parent, node); 63248debafeSMikulas Patocka rb_insert_color(&ins->rb_node, &wc->tree); 63348debafeSMikulas Patocka list_add(&ins->lru, &wc->lru); 6343923d485SMikulas Patocka ins->age = jiffies; 63548debafeSMikulas Patocka } 63648debafeSMikulas Patocka 63748debafeSMikulas Patocka static void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e) 63848debafeSMikulas Patocka { 63948debafeSMikulas Patocka list_del(&e->lru); 64048debafeSMikulas Patocka rb_erase(&e->rb_node, &wc->tree); 64148debafeSMikulas Patocka } 64248debafeSMikulas Patocka 64348debafeSMikulas Patocka static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry *e) 64448debafeSMikulas Patocka { 64548debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 64648debafeSMikulas Patocka struct rb_node **node = &wc->freetree.rb_node, *parent = NULL; 64748debafeSMikulas Patocka if (unlikely(!*node)) 64848debafeSMikulas Patocka wc->current_free = e; 64948debafeSMikulas Patocka while (*node) { 65048debafeSMikulas Patocka parent = *node; 65148debafeSMikulas Patocka if (&e->rb_node < *node) 65248debafeSMikulas Patocka node = &parent->rb_left; 65348debafeSMikulas Patocka else 65448debafeSMikulas Patocka node = &parent->rb_right; 65548debafeSMikulas Patocka } 65648debafeSMikulas Patocka rb_link_node(&e->rb_node, parent, node); 65748debafeSMikulas Patocka rb_insert_color(&e->rb_node, &wc->freetree); 65848debafeSMikulas Patocka } else { 65948debafeSMikulas Patocka list_add_tail(&e->lru, &wc->freelist); 66048debafeSMikulas Patocka } 66148debafeSMikulas Patocka wc->freelist_size++; 66248debafeSMikulas Patocka } 66348debafeSMikulas Patocka 66441c526c5SMikulas Patocka static inline void writecache_verify_watermark(struct dm_writecache *wc) 66541c526c5SMikulas Patocka { 66641c526c5SMikulas Patocka if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) 66741c526c5SMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 66841c526c5SMikulas Patocka } 66941c526c5SMikulas Patocka 6703923d485SMikulas Patocka static void writecache_max_age_timer(struct timer_list *t) 6713923d485SMikulas Patocka { 6723923d485SMikulas Patocka struct dm_writecache *wc = from_timer(wc, t, max_age_timer); 6733923d485SMikulas Patocka 6743923d485SMikulas Patocka if (!dm_suspended(wc->ti) && !writecache_has_error(wc)) { 6753923d485SMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 6763923d485SMikulas Patocka mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); 6773923d485SMikulas Patocka } 6783923d485SMikulas Patocka } 6793923d485SMikulas Patocka 680dcd19507SMikulas Patocka static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector) 68148debafeSMikulas Patocka { 68248debafeSMikulas Patocka struct wc_entry *e; 68348debafeSMikulas Patocka 68448debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 68548debafeSMikulas Patocka struct rb_node *next; 68648debafeSMikulas Patocka if (unlikely(!wc->current_free)) 68748debafeSMikulas Patocka return NULL; 68848debafeSMikulas Patocka e = wc->current_free; 689dcd19507SMikulas Patocka if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 690dcd19507SMikulas Patocka return NULL; 69148debafeSMikulas Patocka next = rb_next(&e->rb_node); 69248debafeSMikulas Patocka rb_erase(&e->rb_node, &wc->freetree); 69348debafeSMikulas Patocka if (unlikely(!next)) 69448debafeSMikulas Patocka next = rb_first(&wc->freetree); 69548debafeSMikulas Patocka wc->current_free = next ? container_of(next, struct wc_entry, rb_node) : NULL; 69648debafeSMikulas Patocka } else { 69748debafeSMikulas Patocka if (unlikely(list_empty(&wc->freelist))) 69848debafeSMikulas Patocka return NULL; 69948debafeSMikulas Patocka e = container_of(wc->freelist.next, struct wc_entry, lru); 700dcd19507SMikulas Patocka if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 701dcd19507SMikulas Patocka return NULL; 70248debafeSMikulas Patocka list_del(&e->lru); 70348debafeSMikulas Patocka } 70448debafeSMikulas Patocka wc->freelist_size--; 70541c526c5SMikulas Patocka 70641c526c5SMikulas Patocka writecache_verify_watermark(wc); 70748debafeSMikulas Patocka 70848debafeSMikulas Patocka return e; 70948debafeSMikulas Patocka } 71048debafeSMikulas Patocka 71148debafeSMikulas Patocka static void writecache_free_entry(struct dm_writecache *wc, struct wc_entry *e) 71248debafeSMikulas Patocka { 71348debafeSMikulas Patocka writecache_unlink(wc, e); 71448debafeSMikulas Patocka writecache_add_to_freelist(wc, e); 71548debafeSMikulas Patocka clear_seq_count(wc, e); 71648debafeSMikulas Patocka writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 71748debafeSMikulas Patocka if (unlikely(waitqueue_active(&wc->freelist_wait))) 71848debafeSMikulas Patocka wake_up(&wc->freelist_wait); 71948debafeSMikulas Patocka } 72048debafeSMikulas Patocka 72148debafeSMikulas Patocka static void writecache_wait_on_freelist(struct dm_writecache *wc) 72248debafeSMikulas Patocka { 72348debafeSMikulas Patocka DEFINE_WAIT(wait); 72448debafeSMikulas Patocka 72548debafeSMikulas Patocka prepare_to_wait(&wc->freelist_wait, &wait, TASK_UNINTERRUPTIBLE); 72648debafeSMikulas Patocka wc_unlock(wc); 72748debafeSMikulas Patocka io_schedule(); 72848debafeSMikulas Patocka finish_wait(&wc->freelist_wait, &wait); 72948debafeSMikulas Patocka wc_lock(wc); 73048debafeSMikulas Patocka } 73148debafeSMikulas Patocka 73248debafeSMikulas Patocka static void writecache_poison_lists(struct dm_writecache *wc) 73348debafeSMikulas Patocka { 73448debafeSMikulas Patocka /* 73548debafeSMikulas Patocka * Catch incorrect access to these values while the device is suspended. 73648debafeSMikulas Patocka */ 73748debafeSMikulas Patocka memset(&wc->tree, -1, sizeof wc->tree); 73848debafeSMikulas Patocka wc->lru.next = LIST_POISON1; 73948debafeSMikulas Patocka wc->lru.prev = LIST_POISON2; 74048debafeSMikulas Patocka wc->freelist.next = LIST_POISON1; 74148debafeSMikulas Patocka wc->freelist.prev = LIST_POISON2; 74248debafeSMikulas Patocka } 74348debafeSMikulas Patocka 74448debafeSMikulas Patocka static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e) 74548debafeSMikulas Patocka { 74648debafeSMikulas Patocka writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 74748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 74848debafeSMikulas Patocka writecache_flush_region(wc, memory_data(wc, e), wc->block_size); 74948debafeSMikulas Patocka } 75048debafeSMikulas Patocka 75148debafeSMikulas Patocka static bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e) 75248debafeSMikulas Patocka { 75348debafeSMikulas Patocka return read_seq_count(wc, e) < wc->seq_count; 75448debafeSMikulas Patocka } 75548debafeSMikulas Patocka 75648debafeSMikulas Patocka static void writecache_flush(struct dm_writecache *wc) 75748debafeSMikulas Patocka { 75848debafeSMikulas Patocka struct wc_entry *e, *e2; 75948debafeSMikulas Patocka bool need_flush_after_free; 76048debafeSMikulas Patocka 76148debafeSMikulas Patocka wc->uncommitted_blocks = 0; 76248debafeSMikulas Patocka del_timer(&wc->autocommit_timer); 76348debafeSMikulas Patocka 76448debafeSMikulas Patocka if (list_empty(&wc->lru)) 76548debafeSMikulas Patocka return; 76648debafeSMikulas Patocka 76748debafeSMikulas Patocka e = container_of(wc->lru.next, struct wc_entry, lru); 76848debafeSMikulas Patocka if (writecache_entry_is_committed(wc, e)) { 76948debafeSMikulas Patocka if (wc->overwrote_committed) { 77048debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 77148debafeSMikulas Patocka writecache_disk_flush(wc, wc->ssd_dev); 77248debafeSMikulas Patocka wc->overwrote_committed = false; 77348debafeSMikulas Patocka } 77448debafeSMikulas Patocka return; 77548debafeSMikulas Patocka } 77648debafeSMikulas Patocka while (1) { 77748debafeSMikulas Patocka writecache_flush_entry(wc, e); 77848debafeSMikulas Patocka if (unlikely(e->lru.next == &wc->lru)) 77948debafeSMikulas Patocka break; 78048debafeSMikulas Patocka e2 = container_of(e->lru.next, struct wc_entry, lru); 78148debafeSMikulas Patocka if (writecache_entry_is_committed(wc, e2)) 78248debafeSMikulas Patocka break; 78348debafeSMikulas Patocka e = e2; 78448debafeSMikulas Patocka cond_resched(); 78548debafeSMikulas Patocka } 786aa950920SMikulas Patocka writecache_commit_flushed(wc, true); 78748debafeSMikulas Patocka 78848debafeSMikulas Patocka wc->seq_count++; 78948debafeSMikulas Patocka pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); 790*dc8a01aeSMikulas Patocka if (WC_MODE_PMEM(wc)) 791aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 792*dc8a01aeSMikulas Patocka else 793*dc8a01aeSMikulas Patocka ssd_commit_superblock(wc); 79448debafeSMikulas Patocka 79548debafeSMikulas Patocka wc->overwrote_committed = false; 79648debafeSMikulas Patocka 79748debafeSMikulas Patocka need_flush_after_free = false; 79848debafeSMikulas Patocka while (1) { 79948debafeSMikulas Patocka /* Free another committed entry with lower seq-count */ 80048debafeSMikulas Patocka struct rb_node *rb_node = rb_prev(&e->rb_node); 80148debafeSMikulas Patocka 80248debafeSMikulas Patocka if (rb_node) { 80348debafeSMikulas Patocka e2 = container_of(rb_node, struct wc_entry, rb_node); 80448debafeSMikulas Patocka if (read_original_sector(wc, e2) == read_original_sector(wc, e) && 80548debafeSMikulas Patocka likely(!e2->write_in_progress)) { 80648debafeSMikulas Patocka writecache_free_entry(wc, e2); 80748debafeSMikulas Patocka need_flush_after_free = true; 80848debafeSMikulas Patocka } 80948debafeSMikulas Patocka } 81048debafeSMikulas Patocka if (unlikely(e->lru.prev == &wc->lru)) 81148debafeSMikulas Patocka break; 81248debafeSMikulas Patocka e = container_of(e->lru.prev, struct wc_entry, lru); 81348debafeSMikulas Patocka cond_resched(); 81448debafeSMikulas Patocka } 81548debafeSMikulas Patocka 81648debafeSMikulas Patocka if (need_flush_after_free) 817aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 81848debafeSMikulas Patocka } 81948debafeSMikulas Patocka 82048debafeSMikulas Patocka static void writecache_flush_work(struct work_struct *work) 82148debafeSMikulas Patocka { 82248debafeSMikulas Patocka struct dm_writecache *wc = container_of(work, struct dm_writecache, flush_work); 82348debafeSMikulas Patocka 82448debafeSMikulas Patocka wc_lock(wc); 82548debafeSMikulas Patocka writecache_flush(wc); 82648debafeSMikulas Patocka wc_unlock(wc); 82748debafeSMikulas Patocka } 82848debafeSMikulas Patocka 82948debafeSMikulas Patocka static void writecache_autocommit_timer(struct timer_list *t) 83048debafeSMikulas Patocka { 83148debafeSMikulas Patocka struct dm_writecache *wc = from_timer(wc, t, autocommit_timer); 83248debafeSMikulas Patocka if (!writecache_has_error(wc)) 83348debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->flush_work); 83448debafeSMikulas Patocka } 83548debafeSMikulas Patocka 83648debafeSMikulas Patocka static void writecache_schedule_autocommit(struct dm_writecache *wc) 83748debafeSMikulas Patocka { 83848debafeSMikulas Patocka if (!timer_pending(&wc->autocommit_timer)) 83948debafeSMikulas Patocka mod_timer(&wc->autocommit_timer, jiffies + wc->autocommit_jiffies); 84048debafeSMikulas Patocka } 84148debafeSMikulas Patocka 84248debafeSMikulas Patocka static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_t end) 84348debafeSMikulas Patocka { 84448debafeSMikulas Patocka struct wc_entry *e; 84548debafeSMikulas Patocka bool discarded_something = false; 84648debafeSMikulas Patocka 84748debafeSMikulas Patocka e = writecache_find_entry(wc, start, WFE_RETURN_FOLLOWING | WFE_LOWEST_SEQ); 84848debafeSMikulas Patocka if (unlikely(!e)) 84948debafeSMikulas Patocka return; 85048debafeSMikulas Patocka 85148debafeSMikulas Patocka while (read_original_sector(wc, e) < end) { 85248debafeSMikulas Patocka struct rb_node *node = rb_next(&e->rb_node); 85348debafeSMikulas Patocka 85448debafeSMikulas Patocka if (likely(!e->write_in_progress)) { 85548debafeSMikulas Patocka if (!discarded_something) { 85648debafeSMikulas Patocka writecache_wait_for_ios(wc, READ); 85748debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 85848debafeSMikulas Patocka discarded_something = true; 85948debafeSMikulas Patocka } 86048debafeSMikulas Patocka writecache_free_entry(wc, e); 86148debafeSMikulas Patocka } 86248debafeSMikulas Patocka 86384420b1eSHuaisheng Ye if (unlikely(!node)) 86448debafeSMikulas Patocka break; 86548debafeSMikulas Patocka 86648debafeSMikulas Patocka e = container_of(node, struct wc_entry, rb_node); 86748debafeSMikulas Patocka } 86848debafeSMikulas Patocka 86948debafeSMikulas Patocka if (discarded_something) 870aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 87148debafeSMikulas Patocka } 87248debafeSMikulas Patocka 87348debafeSMikulas Patocka static bool writecache_wait_for_writeback(struct dm_writecache *wc) 87448debafeSMikulas Patocka { 87548debafeSMikulas Patocka if (wc->writeback_size) { 87648debafeSMikulas Patocka writecache_wait_on_freelist(wc); 87748debafeSMikulas Patocka return true; 87848debafeSMikulas Patocka } 87948debafeSMikulas Patocka return false; 88048debafeSMikulas Patocka } 88148debafeSMikulas Patocka 88248debafeSMikulas Patocka static void writecache_suspend(struct dm_target *ti) 88348debafeSMikulas Patocka { 88448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 88548debafeSMikulas Patocka bool flush_on_suspend; 88648debafeSMikulas Patocka 88748debafeSMikulas Patocka del_timer_sync(&wc->autocommit_timer); 8883923d485SMikulas Patocka del_timer_sync(&wc->max_age_timer); 88948debafeSMikulas Patocka 89048debafeSMikulas Patocka wc_lock(wc); 89148debafeSMikulas Patocka writecache_flush(wc); 89248debafeSMikulas Patocka flush_on_suspend = wc->flush_on_suspend; 89348debafeSMikulas Patocka if (flush_on_suspend) { 89448debafeSMikulas Patocka wc->flush_on_suspend = false; 89548debafeSMikulas Patocka wc->writeback_all++; 89648debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 89748debafeSMikulas Patocka } 89848debafeSMikulas Patocka wc_unlock(wc); 89948debafeSMikulas Patocka 900adc0daadSMikulas Patocka drain_workqueue(wc->writeback_wq); 90148debafeSMikulas Patocka 90248debafeSMikulas Patocka wc_lock(wc); 90348debafeSMikulas Patocka if (flush_on_suspend) 90448debafeSMikulas Patocka wc->writeback_all--; 90548debafeSMikulas Patocka while (writecache_wait_for_writeback(wc)); 90648debafeSMikulas Patocka 90748debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 90848debafeSMikulas Patocka persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 90948debafeSMikulas Patocka 91048debafeSMikulas Patocka writecache_poison_lists(wc); 91148debafeSMikulas Patocka 91248debafeSMikulas Patocka wc_unlock(wc); 91348debafeSMikulas Patocka } 91448debafeSMikulas Patocka 91548debafeSMikulas Patocka static int writecache_alloc_entries(struct dm_writecache *wc) 91648debafeSMikulas Patocka { 91748debafeSMikulas Patocka size_t b; 91848debafeSMikulas Patocka 91948debafeSMikulas Patocka if (wc->entries) 92048debafeSMikulas Patocka return 0; 92150a7d3baSKees Cook wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks)); 92248debafeSMikulas Patocka if (!wc->entries) 92348debafeSMikulas Patocka return -ENOMEM; 92448debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 92548debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 92648debafeSMikulas Patocka e->index = b; 92748debafeSMikulas Patocka e->write_in_progress = false; 92848debafeSMikulas Patocka } 92948debafeSMikulas Patocka 93048debafeSMikulas Patocka return 0; 93148debafeSMikulas Patocka } 93248debafeSMikulas Patocka 93348debafeSMikulas Patocka static void writecache_resume(struct dm_target *ti) 93448debafeSMikulas Patocka { 93548debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 93648debafeSMikulas Patocka size_t b; 93748debafeSMikulas Patocka bool need_flush = false; 93848debafeSMikulas Patocka __le64 sb_seq_count; 93948debafeSMikulas Patocka int r; 94048debafeSMikulas Patocka 94148debafeSMikulas Patocka wc_lock(wc); 94248debafeSMikulas Patocka 94348debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 94448debafeSMikulas Patocka persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); 94548debafeSMikulas Patocka 94648debafeSMikulas Patocka wc->tree = RB_ROOT; 94748debafeSMikulas Patocka INIT_LIST_HEAD(&wc->lru); 94848debafeSMikulas Patocka if (WC_MODE_SORT_FREELIST(wc)) { 94948debafeSMikulas Patocka wc->freetree = RB_ROOT; 95048debafeSMikulas Patocka wc->current_free = NULL; 95148debafeSMikulas Patocka } else { 95248debafeSMikulas Patocka INIT_LIST_HEAD(&wc->freelist); 95348debafeSMikulas Patocka } 95448debafeSMikulas Patocka wc->freelist_size = 0; 95548debafeSMikulas Patocka 95648debafeSMikulas Patocka r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t)); 95748debafeSMikulas Patocka if (r) { 95848debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading superblock: %d", r); 95948debafeSMikulas Patocka sb_seq_count = cpu_to_le64(0); 96048debafeSMikulas Patocka } 96148debafeSMikulas Patocka wc->seq_count = le64_to_cpu(sb_seq_count); 96248debafeSMikulas Patocka 96348debafeSMikulas Patocka #ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 96448debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 96548debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 96648debafeSMikulas Patocka struct wc_memory_entry wme; 96748debafeSMikulas Patocka if (writecache_has_error(wc)) { 96848debafeSMikulas Patocka e->original_sector = -1; 96948debafeSMikulas Patocka e->seq_count = -1; 97048debafeSMikulas Patocka continue; 97148debafeSMikulas Patocka } 97248debafeSMikulas Patocka r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 97348debafeSMikulas Patocka if (r) { 97448debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", 97548debafeSMikulas Patocka (unsigned long)b, r); 97648debafeSMikulas Patocka e->original_sector = -1; 97748debafeSMikulas Patocka e->seq_count = -1; 97848debafeSMikulas Patocka } else { 97948debafeSMikulas Patocka e->original_sector = le64_to_cpu(wme.original_sector); 98048debafeSMikulas Patocka e->seq_count = le64_to_cpu(wme.seq_count); 98148debafeSMikulas Patocka } 98248debafeSMikulas Patocka } 98348debafeSMikulas Patocka #endif 98448debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) { 98548debafeSMikulas Patocka struct wc_entry *e = &wc->entries[b]; 98648debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) { 98748debafeSMikulas Patocka if (read_seq_count(wc, e) != -1) { 98848debafeSMikulas Patocka erase_this: 98948debafeSMikulas Patocka clear_seq_count(wc, e); 99048debafeSMikulas Patocka need_flush = true; 99148debafeSMikulas Patocka } 99248debafeSMikulas Patocka writecache_add_to_freelist(wc, e); 99348debafeSMikulas Patocka } else { 99448debafeSMikulas Patocka struct wc_entry *old; 99548debafeSMikulas Patocka 99648debafeSMikulas Patocka old = writecache_find_entry(wc, read_original_sector(wc, e), 0); 99748debafeSMikulas Patocka if (!old) { 99848debafeSMikulas Patocka writecache_insert_entry(wc, e); 99948debafeSMikulas Patocka } else { 100048debafeSMikulas Patocka if (read_seq_count(wc, old) == read_seq_count(wc, e)) { 100148debafeSMikulas Patocka writecache_error(wc, -EINVAL, 100248debafeSMikulas Patocka "two identical entries, position %llu, sector %llu, sequence %llu", 100348debafeSMikulas Patocka (unsigned long long)b, (unsigned long long)read_original_sector(wc, e), 100448debafeSMikulas Patocka (unsigned long long)read_seq_count(wc, e)); 100548debafeSMikulas Patocka } 100648debafeSMikulas Patocka if (read_seq_count(wc, old) > read_seq_count(wc, e)) { 100748debafeSMikulas Patocka goto erase_this; 100848debafeSMikulas Patocka } else { 100948debafeSMikulas Patocka writecache_free_entry(wc, old); 101048debafeSMikulas Patocka writecache_insert_entry(wc, e); 101148debafeSMikulas Patocka need_flush = true; 101248debafeSMikulas Patocka } 101348debafeSMikulas Patocka } 101448debafeSMikulas Patocka } 101548debafeSMikulas Patocka cond_resched(); 101648debafeSMikulas Patocka } 101748debafeSMikulas Patocka 101848debafeSMikulas Patocka if (need_flush) { 101948debafeSMikulas Patocka writecache_flush_all_metadata(wc); 1020aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 102148debafeSMikulas Patocka } 102248debafeSMikulas Patocka 102341c526c5SMikulas Patocka writecache_verify_watermark(wc); 102441c526c5SMikulas Patocka 10253923d485SMikulas Patocka if (wc->max_age != MAX_AGE_UNSPECIFIED) 10263923d485SMikulas Patocka mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); 10273923d485SMikulas Patocka 102848debafeSMikulas Patocka wc_unlock(wc); 102948debafeSMikulas Patocka } 103048debafeSMikulas Patocka 103148debafeSMikulas Patocka static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 103248debafeSMikulas Patocka { 103348debafeSMikulas Patocka if (argc != 1) 103448debafeSMikulas Patocka return -EINVAL; 103548debafeSMikulas Patocka 103648debafeSMikulas Patocka wc_lock(wc); 103748debafeSMikulas Patocka if (dm_suspended(wc->ti)) { 103848debafeSMikulas Patocka wc_unlock(wc); 103948debafeSMikulas Patocka return -EBUSY; 104048debafeSMikulas Patocka } 104148debafeSMikulas Patocka if (writecache_has_error(wc)) { 104248debafeSMikulas Patocka wc_unlock(wc); 104348debafeSMikulas Patocka return -EIO; 104448debafeSMikulas Patocka } 104548debafeSMikulas Patocka 104648debafeSMikulas Patocka writecache_flush(wc); 104748debafeSMikulas Patocka wc->writeback_all++; 104848debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 104948debafeSMikulas Patocka wc_unlock(wc); 105048debafeSMikulas Patocka 105148debafeSMikulas Patocka flush_workqueue(wc->writeback_wq); 105248debafeSMikulas Patocka 105348debafeSMikulas Patocka wc_lock(wc); 105448debafeSMikulas Patocka wc->writeback_all--; 105548debafeSMikulas Patocka if (writecache_has_error(wc)) { 105648debafeSMikulas Patocka wc_unlock(wc); 105748debafeSMikulas Patocka return -EIO; 105848debafeSMikulas Patocka } 105948debafeSMikulas Patocka wc_unlock(wc); 106048debafeSMikulas Patocka 106148debafeSMikulas Patocka return 0; 106248debafeSMikulas Patocka } 106348debafeSMikulas Patocka 106448debafeSMikulas Patocka static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 106548debafeSMikulas Patocka { 106648debafeSMikulas Patocka if (argc != 1) 106748debafeSMikulas Patocka return -EINVAL; 106848debafeSMikulas Patocka 106948debafeSMikulas Patocka wc_lock(wc); 107048debafeSMikulas Patocka wc->flush_on_suspend = true; 107148debafeSMikulas Patocka wc_unlock(wc); 107248debafeSMikulas Patocka 107348debafeSMikulas Patocka return 0; 107448debafeSMikulas Patocka } 107548debafeSMikulas Patocka 107693de44ebSMikulas Patocka static void activate_cleaner(struct dm_writecache *wc) 107793de44ebSMikulas Patocka { 107893de44ebSMikulas Patocka wc->flush_on_suspend = true; 107993de44ebSMikulas Patocka wc->cleaner = true; 108093de44ebSMikulas Patocka wc->freelist_high_watermark = wc->n_blocks; 108193de44ebSMikulas Patocka wc->freelist_low_watermark = wc->n_blocks; 108293de44ebSMikulas Patocka } 108393de44ebSMikulas Patocka 108493de44ebSMikulas Patocka static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache *wc) 108593de44ebSMikulas Patocka { 108693de44ebSMikulas Patocka if (argc != 1) 108793de44ebSMikulas Patocka return -EINVAL; 108893de44ebSMikulas Patocka 108993de44ebSMikulas Patocka wc_lock(wc); 109093de44ebSMikulas Patocka activate_cleaner(wc); 109193de44ebSMikulas Patocka if (!dm_suspended(wc->ti)) 109293de44ebSMikulas Patocka writecache_verify_watermark(wc); 109393de44ebSMikulas Patocka wc_unlock(wc); 109493de44ebSMikulas Patocka 109593de44ebSMikulas Patocka return 0; 109693de44ebSMikulas Patocka } 109793de44ebSMikulas Patocka 109848debafeSMikulas Patocka static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, 109948debafeSMikulas Patocka char *result, unsigned maxlen) 110048debafeSMikulas Patocka { 110148debafeSMikulas Patocka int r = -EINVAL; 110248debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 110348debafeSMikulas Patocka 110448debafeSMikulas Patocka if (!strcasecmp(argv[0], "flush")) 110548debafeSMikulas Patocka r = process_flush_mesg(argc, argv, wc); 110648debafeSMikulas Patocka else if (!strcasecmp(argv[0], "flush_on_suspend")) 110748debafeSMikulas Patocka r = process_flush_on_suspend_mesg(argc, argv, wc); 110893de44ebSMikulas Patocka else if (!strcasecmp(argv[0], "cleaner")) 110993de44ebSMikulas Patocka r = process_cleaner_mesg(argc, argv, wc); 111048debafeSMikulas Patocka else 111148debafeSMikulas Patocka DMERR("unrecognised message received: %s", argv[0]); 111248debafeSMikulas Patocka 111348debafeSMikulas Patocka return r; 111448debafeSMikulas Patocka } 111548debafeSMikulas Patocka 111648debafeSMikulas Patocka static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data) 111748debafeSMikulas Patocka { 111848debafeSMikulas Patocka void *buf; 111948debafeSMikulas Patocka unsigned long flags; 112048debafeSMikulas Patocka unsigned size; 112148debafeSMikulas Patocka int rw = bio_data_dir(bio); 112248debafeSMikulas Patocka unsigned remaining_size = wc->block_size; 112348debafeSMikulas Patocka 112448debafeSMikulas Patocka do { 112548debafeSMikulas Patocka struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); 112648debafeSMikulas Patocka buf = bvec_kmap_irq(&bv, &flags); 112748debafeSMikulas Patocka size = bv.bv_len; 112848debafeSMikulas Patocka if (unlikely(size > remaining_size)) 112948debafeSMikulas Patocka size = remaining_size; 113048debafeSMikulas Patocka 113148debafeSMikulas Patocka if (rw == READ) { 113248debafeSMikulas Patocka int r; 113348debafeSMikulas Patocka r = memcpy_mcsafe(buf, data, size); 113448debafeSMikulas Patocka flush_dcache_page(bio_page(bio)); 113548debafeSMikulas Patocka if (unlikely(r)) { 113648debafeSMikulas Patocka writecache_error(wc, r, "hardware memory error when reading data: %d", r); 113748debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 113848debafeSMikulas Patocka } 113948debafeSMikulas Patocka } else { 114048debafeSMikulas Patocka flush_dcache_page(bio_page(bio)); 114148debafeSMikulas Patocka memcpy_flushcache(data, buf, size); 114248debafeSMikulas Patocka } 114348debafeSMikulas Patocka 114448debafeSMikulas Patocka bvec_kunmap_irq(buf, &flags); 114548debafeSMikulas Patocka 114648debafeSMikulas Patocka data = (char *)data + size; 114748debafeSMikulas Patocka remaining_size -= size; 114848debafeSMikulas Patocka bio_advance(bio, size); 114948debafeSMikulas Patocka } while (unlikely(remaining_size)); 115048debafeSMikulas Patocka } 115148debafeSMikulas Patocka 115248debafeSMikulas Patocka static int writecache_flush_thread(void *data) 115348debafeSMikulas Patocka { 115448debafeSMikulas Patocka struct dm_writecache *wc = data; 115548debafeSMikulas Patocka 115648debafeSMikulas Patocka while (1) { 115748debafeSMikulas Patocka struct bio *bio; 115848debafeSMikulas Patocka 115948debafeSMikulas Patocka wc_lock(wc); 116048debafeSMikulas Patocka bio = bio_list_pop(&wc->flush_list); 116148debafeSMikulas Patocka if (!bio) { 116248debafeSMikulas Patocka set_current_state(TASK_INTERRUPTIBLE); 116348debafeSMikulas Patocka wc_unlock(wc); 116448debafeSMikulas Patocka 116548debafeSMikulas Patocka if (unlikely(kthread_should_stop())) { 116648debafeSMikulas Patocka set_current_state(TASK_RUNNING); 116748debafeSMikulas Patocka break; 116848debafeSMikulas Patocka } 116948debafeSMikulas Patocka 117048debafeSMikulas Patocka schedule(); 117148debafeSMikulas Patocka continue; 117248debafeSMikulas Patocka } 117348debafeSMikulas Patocka 117448debafeSMikulas Patocka if (bio_op(bio) == REQ_OP_DISCARD) { 117548debafeSMikulas Patocka writecache_discard(wc, bio->bi_iter.bi_sector, 117648debafeSMikulas Patocka bio_end_sector(bio)); 117748debafeSMikulas Patocka wc_unlock(wc); 117848debafeSMikulas Patocka bio_set_dev(bio, wc->dev->bdev); 117948debafeSMikulas Patocka generic_make_request(bio); 118048debafeSMikulas Patocka } else { 118148debafeSMikulas Patocka writecache_flush(wc); 118248debafeSMikulas Patocka wc_unlock(wc); 118348debafeSMikulas Patocka if (writecache_has_error(wc)) 118448debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 118548debafeSMikulas Patocka bio_endio(bio); 118648debafeSMikulas Patocka } 118748debafeSMikulas Patocka } 118848debafeSMikulas Patocka 118948debafeSMikulas Patocka return 0; 119048debafeSMikulas Patocka } 119148debafeSMikulas Patocka 119248debafeSMikulas Patocka static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio) 119348debafeSMikulas Patocka { 119448debafeSMikulas Patocka if (bio_list_empty(&wc->flush_list)) 119548debafeSMikulas Patocka wake_up_process(wc->flush_thread); 119648debafeSMikulas Patocka bio_list_add(&wc->flush_list, bio); 119748debafeSMikulas Patocka } 119848debafeSMikulas Patocka 119948debafeSMikulas Patocka static int writecache_map(struct dm_target *ti, struct bio *bio) 120048debafeSMikulas Patocka { 120148debafeSMikulas Patocka struct wc_entry *e; 120248debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 120348debafeSMikulas Patocka 120448debafeSMikulas Patocka bio->bi_private = NULL; 120548debafeSMikulas Patocka 120648debafeSMikulas Patocka wc_lock(wc); 120748debafeSMikulas Patocka 120848debafeSMikulas Patocka if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 120948debafeSMikulas Patocka if (writecache_has_error(wc)) 121048debafeSMikulas Patocka goto unlock_error; 121148debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 121248debafeSMikulas Patocka writecache_flush(wc); 121348debafeSMikulas Patocka if (writecache_has_error(wc)) 121448debafeSMikulas Patocka goto unlock_error; 121548debafeSMikulas Patocka goto unlock_submit; 121648debafeSMikulas Patocka } else { 121748debafeSMikulas Patocka writecache_offload_bio(wc, bio); 121848debafeSMikulas Patocka goto unlock_return; 121948debafeSMikulas Patocka } 122048debafeSMikulas Patocka } 122148debafeSMikulas Patocka 122248debafeSMikulas Patocka bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 122348debafeSMikulas Patocka 122448debafeSMikulas Patocka if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & 122548debafeSMikulas Patocka (wc->block_size / 512 - 1)) != 0)) { 122648debafeSMikulas Patocka DMERR("I/O is not aligned, sector %llu, size %u, block size %u", 122748debafeSMikulas Patocka (unsigned long long)bio->bi_iter.bi_sector, 122848debafeSMikulas Patocka bio->bi_iter.bi_size, wc->block_size); 122948debafeSMikulas Patocka goto unlock_error; 123048debafeSMikulas Patocka } 123148debafeSMikulas Patocka 123248debafeSMikulas Patocka if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { 123348debafeSMikulas Patocka if (writecache_has_error(wc)) 123448debafeSMikulas Patocka goto unlock_error; 123548debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 123648debafeSMikulas Patocka writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); 123748debafeSMikulas Patocka goto unlock_remap_origin; 123848debafeSMikulas Patocka } else { 123948debafeSMikulas Patocka writecache_offload_bio(wc, bio); 124048debafeSMikulas Patocka goto unlock_return; 124148debafeSMikulas Patocka } 124248debafeSMikulas Patocka } 124348debafeSMikulas Patocka 124448debafeSMikulas Patocka if (bio_data_dir(bio) == READ) { 124548debafeSMikulas Patocka read_next_block: 124648debafeSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 124748debafeSMikulas Patocka if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { 124848debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 124948debafeSMikulas Patocka bio_copy_block(wc, bio, memory_data(wc, e)); 125048debafeSMikulas Patocka if (bio->bi_iter.bi_size) 125148debafeSMikulas Patocka goto read_next_block; 125248debafeSMikulas Patocka goto unlock_submit; 125348debafeSMikulas Patocka } else { 125448debafeSMikulas Patocka dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); 125548debafeSMikulas Patocka bio_set_dev(bio, wc->ssd_dev->bdev); 125648debafeSMikulas Patocka bio->bi_iter.bi_sector = cache_sector(wc, e); 125748debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) 125848debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 125948debafeSMikulas Patocka goto unlock_remap; 126048debafeSMikulas Patocka } 126148debafeSMikulas Patocka } else { 126248debafeSMikulas Patocka if (e) { 126348debafeSMikulas Patocka sector_t next_boundary = 126448debafeSMikulas Patocka read_original_sector(wc, e) - bio->bi_iter.bi_sector; 126548debafeSMikulas Patocka if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { 126648debafeSMikulas Patocka dm_accept_partial_bio(bio, next_boundary); 126748debafeSMikulas Patocka } 126848debafeSMikulas Patocka } 126948debafeSMikulas Patocka goto unlock_remap_origin; 127048debafeSMikulas Patocka } 127148debafeSMikulas Patocka } else { 127248debafeSMikulas Patocka do { 1273d53f1fafSMikulas Patocka bool found_entry = false; 127448debafeSMikulas Patocka if (writecache_has_error(wc)) 127548debafeSMikulas Patocka goto unlock_error; 127648debafeSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); 127748debafeSMikulas Patocka if (e) { 127848debafeSMikulas Patocka if (!writecache_entry_is_committed(wc, e)) 127948debafeSMikulas Patocka goto bio_copy; 128048debafeSMikulas Patocka if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { 128148debafeSMikulas Patocka wc->overwrote_committed = true; 128248debafeSMikulas Patocka goto bio_copy; 128348debafeSMikulas Patocka } 1284d53f1fafSMikulas Patocka found_entry = true; 128593de44ebSMikulas Patocka } else { 128693de44ebSMikulas Patocka if (unlikely(wc->cleaner)) 128793de44ebSMikulas Patocka goto direct_write; 128848debafeSMikulas Patocka } 1289dcd19507SMikulas Patocka e = writecache_pop_from_freelist(wc, (sector_t)-1); 129048debafeSMikulas Patocka if (unlikely(!e)) { 1291d53f1fafSMikulas Patocka if (!found_entry) { 129293de44ebSMikulas Patocka direct_write: 1293d53f1fafSMikulas Patocka e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 1294d53f1fafSMikulas Patocka if (e) { 1295d53f1fafSMikulas Patocka sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; 1296d53f1fafSMikulas Patocka BUG_ON(!next_boundary); 1297d53f1fafSMikulas Patocka if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { 1298d53f1fafSMikulas Patocka dm_accept_partial_bio(bio, next_boundary); 1299d53f1fafSMikulas Patocka } 1300d53f1fafSMikulas Patocka } 1301d53f1fafSMikulas Patocka goto unlock_remap_origin; 1302d53f1fafSMikulas Patocka } 130348debafeSMikulas Patocka writecache_wait_on_freelist(wc); 130448debafeSMikulas Patocka continue; 130548debafeSMikulas Patocka } 130648debafeSMikulas Patocka write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); 130748debafeSMikulas Patocka writecache_insert_entry(wc, e); 130848debafeSMikulas Patocka wc->uncommitted_blocks++; 130948debafeSMikulas Patocka bio_copy: 131048debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 131148debafeSMikulas Patocka bio_copy_block(wc, bio, memory_data(wc, e)); 131248debafeSMikulas Patocka } else { 1313dcd19507SMikulas Patocka unsigned bio_size = wc->block_size; 1314dcd19507SMikulas Patocka sector_t start_cache_sec = cache_sector(wc, e); 1315dcd19507SMikulas Patocka sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); 1316dcd19507SMikulas Patocka 1317dcd19507SMikulas Patocka while (bio_size < bio->bi_iter.bi_size) { 1318dcd19507SMikulas Patocka struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); 1319dcd19507SMikulas Patocka if (!f) 1320dcd19507SMikulas Patocka break; 1321dcd19507SMikulas Patocka write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + 1322dcd19507SMikulas Patocka (bio_size >> SECTOR_SHIFT), wc->seq_count); 1323dcd19507SMikulas Patocka writecache_insert_entry(wc, f); 1324dcd19507SMikulas Patocka wc->uncommitted_blocks++; 1325dcd19507SMikulas Patocka bio_size += wc->block_size; 1326dcd19507SMikulas Patocka current_cache_sec += wc->block_size >> SECTOR_SHIFT; 1327dcd19507SMikulas Patocka } 1328dcd19507SMikulas Patocka 132948debafeSMikulas Patocka bio_set_dev(bio, wc->ssd_dev->bdev); 1330dcd19507SMikulas Patocka bio->bi_iter.bi_sector = start_cache_sec; 1331dcd19507SMikulas Patocka dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); 1332dcd19507SMikulas Patocka 133348debafeSMikulas Patocka if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { 133448debafeSMikulas Patocka wc->uncommitted_blocks = 0; 133548debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->flush_work); 133648debafeSMikulas Patocka } else { 133748debafeSMikulas Patocka writecache_schedule_autocommit(wc); 133848debafeSMikulas Patocka } 133948debafeSMikulas Patocka goto unlock_remap; 134048debafeSMikulas Patocka } 134148debafeSMikulas Patocka } while (bio->bi_iter.bi_size); 134248debafeSMikulas Patocka 1343c1005322SMaged Mokhtar if (unlikely(bio->bi_opf & REQ_FUA || 1344c1005322SMaged Mokhtar wc->uncommitted_blocks >= wc->autocommit_blocks)) 134548debafeSMikulas Patocka writecache_flush(wc); 134648debafeSMikulas Patocka else 134748debafeSMikulas Patocka writecache_schedule_autocommit(wc); 134848debafeSMikulas Patocka goto unlock_submit; 134948debafeSMikulas Patocka } 135048debafeSMikulas Patocka 135148debafeSMikulas Patocka unlock_remap_origin: 135248debafeSMikulas Patocka bio_set_dev(bio, wc->dev->bdev); 135348debafeSMikulas Patocka wc_unlock(wc); 135448debafeSMikulas Patocka return DM_MAPIO_REMAPPED; 135548debafeSMikulas Patocka 135648debafeSMikulas Patocka unlock_remap: 135748debafeSMikulas Patocka /* make sure that writecache_end_io decrements bio_in_progress: */ 135848debafeSMikulas Patocka bio->bi_private = (void *)1; 135948debafeSMikulas Patocka atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); 136048debafeSMikulas Patocka wc_unlock(wc); 136148debafeSMikulas Patocka return DM_MAPIO_REMAPPED; 136248debafeSMikulas Patocka 136348debafeSMikulas Patocka unlock_submit: 136448debafeSMikulas Patocka wc_unlock(wc); 136548debafeSMikulas Patocka bio_endio(bio); 136648debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 136748debafeSMikulas Patocka 136848debafeSMikulas Patocka unlock_return: 136948debafeSMikulas Patocka wc_unlock(wc); 137048debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 137148debafeSMikulas Patocka 137248debafeSMikulas Patocka unlock_error: 137348debafeSMikulas Patocka wc_unlock(wc); 137448debafeSMikulas Patocka bio_io_error(bio); 137548debafeSMikulas Patocka return DM_MAPIO_SUBMITTED; 137648debafeSMikulas Patocka } 137748debafeSMikulas Patocka 137848debafeSMikulas Patocka static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) 137948debafeSMikulas Patocka { 138048debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 138148debafeSMikulas Patocka 138248debafeSMikulas Patocka if (bio->bi_private != NULL) { 138348debafeSMikulas Patocka int dir = bio_data_dir(bio); 138448debafeSMikulas Patocka if (atomic_dec_and_test(&wc->bio_in_progress[dir])) 138548debafeSMikulas Patocka if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir]))) 138648debafeSMikulas Patocka wake_up(&wc->bio_in_progress_wait[dir]); 138748debafeSMikulas Patocka } 138848debafeSMikulas Patocka return 0; 138948debafeSMikulas Patocka } 139048debafeSMikulas Patocka 139148debafeSMikulas Patocka static int writecache_iterate_devices(struct dm_target *ti, 139248debafeSMikulas Patocka iterate_devices_callout_fn fn, void *data) 139348debafeSMikulas Patocka { 139448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 139548debafeSMikulas Patocka 139648debafeSMikulas Patocka return fn(ti, wc->dev, 0, ti->len, data); 139748debafeSMikulas Patocka } 139848debafeSMikulas Patocka 139948debafeSMikulas Patocka static void writecache_io_hints(struct dm_target *ti, struct queue_limits *limits) 140048debafeSMikulas Patocka { 140148debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 140248debafeSMikulas Patocka 140348debafeSMikulas Patocka if (limits->logical_block_size < wc->block_size) 140448debafeSMikulas Patocka limits->logical_block_size = wc->block_size; 140548debafeSMikulas Patocka 140648debafeSMikulas Patocka if (limits->physical_block_size < wc->block_size) 140748debafeSMikulas Patocka limits->physical_block_size = wc->block_size; 140848debafeSMikulas Patocka 140948debafeSMikulas Patocka if (limits->io_min < wc->block_size) 141048debafeSMikulas Patocka limits->io_min = wc->block_size; 141148debafeSMikulas Patocka } 141248debafeSMikulas Patocka 141348debafeSMikulas Patocka 141448debafeSMikulas Patocka static void writecache_writeback_endio(struct bio *bio) 141548debafeSMikulas Patocka { 141648debafeSMikulas Patocka struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio); 141748debafeSMikulas Patocka struct dm_writecache *wc = wb->wc; 141848debafeSMikulas Patocka unsigned long flags; 141948debafeSMikulas Patocka 142048debafeSMikulas Patocka raw_spin_lock_irqsave(&wc->endio_list_lock, flags); 142148debafeSMikulas Patocka if (unlikely(list_empty(&wc->endio_list))) 142248debafeSMikulas Patocka wake_up_process(wc->endio_thread); 142348debafeSMikulas Patocka list_add_tail(&wb->endio_entry, &wc->endio_list); 142448debafeSMikulas Patocka raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags); 142548debafeSMikulas Patocka } 142648debafeSMikulas Patocka 142748debafeSMikulas Patocka static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr) 142848debafeSMikulas Patocka { 142948debafeSMikulas Patocka struct copy_struct *c = ptr; 143048debafeSMikulas Patocka struct dm_writecache *wc = c->wc; 143148debafeSMikulas Patocka 143248debafeSMikulas Patocka c->error = likely(!(read_err | write_err)) ? 0 : -EIO; 143348debafeSMikulas Patocka 143448debafeSMikulas Patocka raw_spin_lock_irq(&wc->endio_list_lock); 143548debafeSMikulas Patocka if (unlikely(list_empty(&wc->endio_list))) 143648debafeSMikulas Patocka wake_up_process(wc->endio_thread); 143748debafeSMikulas Patocka list_add_tail(&c->endio_entry, &wc->endio_list); 143848debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 143948debafeSMikulas Patocka } 144048debafeSMikulas Patocka 144148debafeSMikulas Patocka static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list) 144248debafeSMikulas Patocka { 144348debafeSMikulas Patocka unsigned i; 144448debafeSMikulas Patocka struct writeback_struct *wb; 144548debafeSMikulas Patocka struct wc_entry *e; 144648debafeSMikulas Patocka unsigned long n_walked = 0; 144748debafeSMikulas Patocka 144848debafeSMikulas Patocka do { 144948debafeSMikulas Patocka wb = list_entry(list->next, struct writeback_struct, endio_entry); 145048debafeSMikulas Patocka list_del(&wb->endio_entry); 145148debafeSMikulas Patocka 145248debafeSMikulas Patocka if (unlikely(wb->bio.bi_status != BLK_STS_OK)) 145348debafeSMikulas Patocka writecache_error(wc, blk_status_to_errno(wb->bio.bi_status), 145448debafeSMikulas Patocka "write error %d", wb->bio.bi_status); 145548debafeSMikulas Patocka i = 0; 145648debafeSMikulas Patocka do { 145748debafeSMikulas Patocka e = wb->wc_list[i]; 145848debafeSMikulas Patocka BUG_ON(!e->write_in_progress); 145948debafeSMikulas Patocka e->write_in_progress = false; 146048debafeSMikulas Patocka INIT_LIST_HEAD(&e->lru); 146148debafeSMikulas Patocka if (!writecache_has_error(wc)) 146248debafeSMikulas Patocka writecache_free_entry(wc, e); 146348debafeSMikulas Patocka BUG_ON(!wc->writeback_size); 146448debafeSMikulas Patocka wc->writeback_size--; 146548debafeSMikulas Patocka n_walked++; 146648debafeSMikulas Patocka if (unlikely(n_walked >= ENDIO_LATENCY)) { 1467aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 146848debafeSMikulas Patocka wc_unlock(wc); 146948debafeSMikulas Patocka wc_lock(wc); 147048debafeSMikulas Patocka n_walked = 0; 147148debafeSMikulas Patocka } 147248debafeSMikulas Patocka } while (++i < wb->wc_list_n); 147348debafeSMikulas Patocka 147448debafeSMikulas Patocka if (wb->wc_list != wb->wc_list_inline) 147548debafeSMikulas Patocka kfree(wb->wc_list); 147648debafeSMikulas Patocka bio_put(&wb->bio); 147748debafeSMikulas Patocka } while (!list_empty(list)); 147848debafeSMikulas Patocka } 147948debafeSMikulas Patocka 148048debafeSMikulas Patocka static void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list) 148148debafeSMikulas Patocka { 148248debafeSMikulas Patocka struct copy_struct *c; 148348debafeSMikulas Patocka struct wc_entry *e; 148448debafeSMikulas Patocka 148548debafeSMikulas Patocka do { 148648debafeSMikulas Patocka c = list_entry(list->next, struct copy_struct, endio_entry); 148748debafeSMikulas Patocka list_del(&c->endio_entry); 148848debafeSMikulas Patocka 148948debafeSMikulas Patocka if (unlikely(c->error)) 149048debafeSMikulas Patocka writecache_error(wc, c->error, "copy error"); 149148debafeSMikulas Patocka 149248debafeSMikulas Patocka e = c->e; 149348debafeSMikulas Patocka do { 149448debafeSMikulas Patocka BUG_ON(!e->write_in_progress); 149548debafeSMikulas Patocka e->write_in_progress = false; 149648debafeSMikulas Patocka INIT_LIST_HEAD(&e->lru); 149748debafeSMikulas Patocka if (!writecache_has_error(wc)) 149848debafeSMikulas Patocka writecache_free_entry(wc, e); 149948debafeSMikulas Patocka 150048debafeSMikulas Patocka BUG_ON(!wc->writeback_size); 150148debafeSMikulas Patocka wc->writeback_size--; 150248debafeSMikulas Patocka e++; 150348debafeSMikulas Patocka } while (--c->n_entries); 150448debafeSMikulas Patocka mempool_free(c, &wc->copy_pool); 150548debafeSMikulas Patocka } while (!list_empty(list)); 150648debafeSMikulas Patocka } 150748debafeSMikulas Patocka 150848debafeSMikulas Patocka static int writecache_endio_thread(void *data) 150948debafeSMikulas Patocka { 151048debafeSMikulas Patocka struct dm_writecache *wc = data; 151148debafeSMikulas Patocka 151248debafeSMikulas Patocka while (1) { 151348debafeSMikulas Patocka struct list_head list; 151448debafeSMikulas Patocka 151548debafeSMikulas Patocka raw_spin_lock_irq(&wc->endio_list_lock); 151648debafeSMikulas Patocka if (!list_empty(&wc->endio_list)) 151748debafeSMikulas Patocka goto pop_from_list; 151848debafeSMikulas Patocka set_current_state(TASK_INTERRUPTIBLE); 151948debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 152048debafeSMikulas Patocka 152148debafeSMikulas Patocka if (unlikely(kthread_should_stop())) { 152248debafeSMikulas Patocka set_current_state(TASK_RUNNING); 152348debafeSMikulas Patocka break; 152448debafeSMikulas Patocka } 152548debafeSMikulas Patocka 152648debafeSMikulas Patocka schedule(); 152748debafeSMikulas Patocka 152848debafeSMikulas Patocka continue; 152948debafeSMikulas Patocka 153048debafeSMikulas Patocka pop_from_list: 153148debafeSMikulas Patocka list = wc->endio_list; 153248debafeSMikulas Patocka list.next->prev = list.prev->next = &list; 153348debafeSMikulas Patocka INIT_LIST_HEAD(&wc->endio_list); 153448debafeSMikulas Patocka raw_spin_unlock_irq(&wc->endio_list_lock); 153548debafeSMikulas Patocka 153648debafeSMikulas Patocka if (!WC_MODE_FUA(wc)) 153748debafeSMikulas Patocka writecache_disk_flush(wc, wc->dev); 153848debafeSMikulas Patocka 153948debafeSMikulas Patocka wc_lock(wc); 154048debafeSMikulas Patocka 154148debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 154248debafeSMikulas Patocka __writecache_endio_pmem(wc, &list); 154348debafeSMikulas Patocka } else { 154448debafeSMikulas Patocka __writecache_endio_ssd(wc, &list); 154548debafeSMikulas Patocka writecache_wait_for_ios(wc, READ); 154648debafeSMikulas Patocka } 154748debafeSMikulas Patocka 1548aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 154948debafeSMikulas Patocka 155048debafeSMikulas Patocka wc_unlock(wc); 155148debafeSMikulas Patocka } 155248debafeSMikulas Patocka 155348debafeSMikulas Patocka return 0; 155448debafeSMikulas Patocka } 155548debafeSMikulas Patocka 155648debafeSMikulas Patocka static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e, gfp_t gfp) 155748debafeSMikulas Patocka { 155848debafeSMikulas Patocka struct dm_writecache *wc = wb->wc; 155948debafeSMikulas Patocka unsigned block_size = wc->block_size; 156048debafeSMikulas Patocka void *address = memory_data(wc, e); 156148debafeSMikulas Patocka 156248debafeSMikulas Patocka persistent_memory_flush_cache(address, block_size); 156348debafeSMikulas Patocka return bio_add_page(&wb->bio, persistent_memory_page(address), 156448debafeSMikulas Patocka block_size, persistent_memory_page_offset(address)) != 0; 156548debafeSMikulas Patocka } 156648debafeSMikulas Patocka 156748debafeSMikulas Patocka struct writeback_list { 156848debafeSMikulas Patocka struct list_head list; 156948debafeSMikulas Patocka size_t size; 157048debafeSMikulas Patocka }; 157148debafeSMikulas Patocka 157248debafeSMikulas Patocka static void __writeback_throttle(struct dm_writecache *wc, struct writeback_list *wbl) 157348debafeSMikulas Patocka { 157448debafeSMikulas Patocka if (unlikely(wc->max_writeback_jobs)) { 157548debafeSMikulas Patocka if (READ_ONCE(wc->writeback_size) - wbl->size >= wc->max_writeback_jobs) { 157648debafeSMikulas Patocka wc_lock(wc); 157748debafeSMikulas Patocka while (wc->writeback_size - wbl->size >= wc->max_writeback_jobs) 157848debafeSMikulas Patocka writecache_wait_on_freelist(wc); 157948debafeSMikulas Patocka wc_unlock(wc); 158048debafeSMikulas Patocka } 158148debafeSMikulas Patocka } 158248debafeSMikulas Patocka cond_resched(); 158348debafeSMikulas Patocka } 158448debafeSMikulas Patocka 158548debafeSMikulas Patocka static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl) 158648debafeSMikulas Patocka { 158748debafeSMikulas Patocka struct wc_entry *e, *f; 158848debafeSMikulas Patocka struct bio *bio; 158948debafeSMikulas Patocka struct writeback_struct *wb; 159048debafeSMikulas Patocka unsigned max_pages; 159148debafeSMikulas Patocka 159248debafeSMikulas Patocka while (wbl->size) { 159348debafeSMikulas Patocka wbl->size--; 159448debafeSMikulas Patocka e = container_of(wbl->list.prev, struct wc_entry, lru); 159548debafeSMikulas Patocka list_del(&e->lru); 159648debafeSMikulas Patocka 159748debafeSMikulas Patocka max_pages = e->wc_list_contiguous; 159848debafeSMikulas Patocka 159948debafeSMikulas Patocka bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set); 160048debafeSMikulas Patocka wb = container_of(bio, struct writeback_struct, bio); 160148debafeSMikulas Patocka wb->wc = wc; 160209f2d656SHuaisheng Ye bio->bi_end_io = writecache_writeback_endio; 160309f2d656SHuaisheng Ye bio_set_dev(bio, wc->dev->bdev); 160409f2d656SHuaisheng Ye bio->bi_iter.bi_sector = read_original_sector(wc, e); 160548debafeSMikulas Patocka if (max_pages <= WB_LIST_INLINE || 160650a7d3baSKees Cook unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), 160748debafeSMikulas Patocka GFP_NOIO | __GFP_NORETRY | 160848debafeSMikulas Patocka __GFP_NOMEMALLOC | __GFP_NOWARN)))) { 160948debafeSMikulas Patocka wb->wc_list = wb->wc_list_inline; 161048debafeSMikulas Patocka max_pages = WB_LIST_INLINE; 161148debafeSMikulas Patocka } 161248debafeSMikulas Patocka 161348debafeSMikulas Patocka BUG_ON(!wc_add_block(wb, e, GFP_NOIO)); 161448debafeSMikulas Patocka 161548debafeSMikulas Patocka wb->wc_list[0] = e; 161648debafeSMikulas Patocka wb->wc_list_n = 1; 161748debafeSMikulas Patocka 161848debafeSMikulas Patocka while (wbl->size && wb->wc_list_n < max_pages) { 161948debafeSMikulas Patocka f = container_of(wbl->list.prev, struct wc_entry, lru); 162048debafeSMikulas Patocka if (read_original_sector(wc, f) != 162148debafeSMikulas Patocka read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) 162248debafeSMikulas Patocka break; 162348debafeSMikulas Patocka if (!wc_add_block(wb, f, GFP_NOWAIT | __GFP_NOWARN)) 162448debafeSMikulas Patocka break; 162548debafeSMikulas Patocka wbl->size--; 162648debafeSMikulas Patocka list_del(&f->lru); 162748debafeSMikulas Patocka wb->wc_list[wb->wc_list_n++] = f; 162848debafeSMikulas Patocka e = f; 162948debafeSMikulas Patocka } 163009f2d656SHuaisheng Ye bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA); 163148debafeSMikulas Patocka if (writecache_has_error(wc)) { 163248debafeSMikulas Patocka bio->bi_status = BLK_STS_IOERR; 163309f2d656SHuaisheng Ye bio_endio(bio); 163448debafeSMikulas Patocka } else { 163509f2d656SHuaisheng Ye submit_bio(bio); 163648debafeSMikulas Patocka } 163748debafeSMikulas Patocka 163848debafeSMikulas Patocka __writeback_throttle(wc, wbl); 163948debafeSMikulas Patocka } 164048debafeSMikulas Patocka } 164148debafeSMikulas Patocka 164248debafeSMikulas Patocka static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl) 164348debafeSMikulas Patocka { 164448debafeSMikulas Patocka struct wc_entry *e, *f; 164548debafeSMikulas Patocka struct dm_io_region from, to; 164648debafeSMikulas Patocka struct copy_struct *c; 164748debafeSMikulas Patocka 164848debafeSMikulas Patocka while (wbl->size) { 164948debafeSMikulas Patocka unsigned n_sectors; 165048debafeSMikulas Patocka 165148debafeSMikulas Patocka wbl->size--; 165248debafeSMikulas Patocka e = container_of(wbl->list.prev, struct wc_entry, lru); 165348debafeSMikulas Patocka list_del(&e->lru); 165448debafeSMikulas Patocka 165548debafeSMikulas Patocka n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT); 165648debafeSMikulas Patocka 165748debafeSMikulas Patocka from.bdev = wc->ssd_dev->bdev; 165848debafeSMikulas Patocka from.sector = cache_sector(wc, e); 165948debafeSMikulas Patocka from.count = n_sectors; 166048debafeSMikulas Patocka to.bdev = wc->dev->bdev; 166148debafeSMikulas Patocka to.sector = read_original_sector(wc, e); 166248debafeSMikulas Patocka to.count = n_sectors; 166348debafeSMikulas Patocka 166448debafeSMikulas Patocka c = mempool_alloc(&wc->copy_pool, GFP_NOIO); 166548debafeSMikulas Patocka c->wc = wc; 166648debafeSMikulas Patocka c->e = e; 166748debafeSMikulas Patocka c->n_entries = e->wc_list_contiguous; 166848debafeSMikulas Patocka 166948debafeSMikulas Patocka while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) { 167048debafeSMikulas Patocka wbl->size--; 167148debafeSMikulas Patocka f = container_of(wbl->list.prev, struct wc_entry, lru); 167248debafeSMikulas Patocka BUG_ON(f != e + 1); 167348debafeSMikulas Patocka list_del(&f->lru); 167448debafeSMikulas Patocka e = f; 167548debafeSMikulas Patocka } 167648debafeSMikulas Patocka 167748debafeSMikulas Patocka dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c); 167848debafeSMikulas Patocka 167948debafeSMikulas Patocka __writeback_throttle(wc, wbl); 168048debafeSMikulas Patocka } 168148debafeSMikulas Patocka } 168248debafeSMikulas Patocka 168348debafeSMikulas Patocka static void writecache_writeback(struct work_struct *work) 168448debafeSMikulas Patocka { 168548debafeSMikulas Patocka struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work); 168648debafeSMikulas Patocka struct blk_plug plug; 16878dd85873SMikulas Patocka struct wc_entry *f, *uninitialized_var(g), *e = NULL; 168848debafeSMikulas Patocka struct rb_node *node, *next_node; 168948debafeSMikulas Patocka struct list_head skipped; 169048debafeSMikulas Patocka struct writeback_list wbl; 169148debafeSMikulas Patocka unsigned long n_walked; 169248debafeSMikulas Patocka 169348debafeSMikulas Patocka wc_lock(wc); 169448debafeSMikulas Patocka restart: 169548debafeSMikulas Patocka if (writecache_has_error(wc)) { 169648debafeSMikulas Patocka wc_unlock(wc); 169748debafeSMikulas Patocka return; 169848debafeSMikulas Patocka } 169948debafeSMikulas Patocka 170048debafeSMikulas Patocka if (unlikely(wc->writeback_all)) { 170148debafeSMikulas Patocka if (writecache_wait_for_writeback(wc)) 170248debafeSMikulas Patocka goto restart; 170348debafeSMikulas Patocka } 170448debafeSMikulas Patocka 170548debafeSMikulas Patocka if (wc->overwrote_committed) { 170648debafeSMikulas Patocka writecache_wait_for_ios(wc, WRITE); 170748debafeSMikulas Patocka } 170848debafeSMikulas Patocka 170948debafeSMikulas Patocka n_walked = 0; 171048debafeSMikulas Patocka INIT_LIST_HEAD(&skipped); 171148debafeSMikulas Patocka INIT_LIST_HEAD(&wbl.list); 171248debafeSMikulas Patocka wbl.size = 0; 171348debafeSMikulas Patocka while (!list_empty(&wc->lru) && 171448debafeSMikulas Patocka (wc->writeback_all || 17153923d485SMikulas Patocka wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark || 17163923d485SMikulas Patocka (jiffies - container_of(wc->lru.prev, struct wc_entry, lru)->age >= 17173923d485SMikulas Patocka wc->max_age - wc->max_age / MAX_AGE_DIV))) { 171848debafeSMikulas Patocka 171948debafeSMikulas Patocka n_walked++; 172048debafeSMikulas Patocka if (unlikely(n_walked > WRITEBACK_LATENCY) && 172148debafeSMikulas Patocka likely(!wc->writeback_all) && likely(!dm_suspended(wc->ti))) { 172248debafeSMikulas Patocka queue_work(wc->writeback_wq, &wc->writeback_work); 172348debafeSMikulas Patocka break; 172448debafeSMikulas Patocka } 172548debafeSMikulas Patocka 17265229b489SHuaisheng Ye if (unlikely(wc->writeback_all)) { 17275229b489SHuaisheng Ye if (unlikely(!e)) { 17285229b489SHuaisheng Ye writecache_flush(wc); 17295229b489SHuaisheng Ye e = container_of(rb_first(&wc->tree), struct wc_entry, rb_node); 17305229b489SHuaisheng Ye } else 17315229b489SHuaisheng Ye e = g; 17325229b489SHuaisheng Ye } else 173348debafeSMikulas Patocka e = container_of(wc->lru.prev, struct wc_entry, lru); 173448debafeSMikulas Patocka BUG_ON(e->write_in_progress); 173548debafeSMikulas Patocka if (unlikely(!writecache_entry_is_committed(wc, e))) { 173648debafeSMikulas Patocka writecache_flush(wc); 173748debafeSMikulas Patocka } 173848debafeSMikulas Patocka node = rb_prev(&e->rb_node); 173948debafeSMikulas Patocka if (node) { 174048debafeSMikulas Patocka f = container_of(node, struct wc_entry, rb_node); 174148debafeSMikulas Patocka if (unlikely(read_original_sector(wc, f) == 174248debafeSMikulas Patocka read_original_sector(wc, e))) { 174348debafeSMikulas Patocka BUG_ON(!f->write_in_progress); 174448debafeSMikulas Patocka list_del(&e->lru); 174548debafeSMikulas Patocka list_add(&e->lru, &skipped); 174648debafeSMikulas Patocka cond_resched(); 174748debafeSMikulas Patocka continue; 174848debafeSMikulas Patocka } 174948debafeSMikulas Patocka } 175048debafeSMikulas Patocka wc->writeback_size++; 175148debafeSMikulas Patocka list_del(&e->lru); 175248debafeSMikulas Patocka list_add(&e->lru, &wbl.list); 175348debafeSMikulas Patocka wbl.size++; 175448debafeSMikulas Patocka e->write_in_progress = true; 175548debafeSMikulas Patocka e->wc_list_contiguous = 1; 175648debafeSMikulas Patocka 175748debafeSMikulas Patocka f = e; 175848debafeSMikulas Patocka 175948debafeSMikulas Patocka while (1) { 176048debafeSMikulas Patocka next_node = rb_next(&f->rb_node); 176148debafeSMikulas Patocka if (unlikely(!next_node)) 176248debafeSMikulas Patocka break; 176348debafeSMikulas Patocka g = container_of(next_node, struct wc_entry, rb_node); 176462421b38SHuaisheng Ye if (unlikely(read_original_sector(wc, g) == 176562421b38SHuaisheng Ye read_original_sector(wc, f))) { 176648debafeSMikulas Patocka f = g; 176748debafeSMikulas Patocka continue; 176848debafeSMikulas Patocka } 176948debafeSMikulas Patocka if (read_original_sector(wc, g) != 177048debafeSMikulas Patocka read_original_sector(wc, f) + (wc->block_size >> SECTOR_SHIFT)) 177148debafeSMikulas Patocka break; 177248debafeSMikulas Patocka if (unlikely(g->write_in_progress)) 177348debafeSMikulas Patocka break; 177448debafeSMikulas Patocka if (unlikely(!writecache_entry_is_committed(wc, g))) 177548debafeSMikulas Patocka break; 177648debafeSMikulas Patocka 177748debafeSMikulas Patocka if (!WC_MODE_PMEM(wc)) { 177848debafeSMikulas Patocka if (g != f + 1) 177948debafeSMikulas Patocka break; 178048debafeSMikulas Patocka } 178148debafeSMikulas Patocka 178248debafeSMikulas Patocka n_walked++; 178348debafeSMikulas Patocka //if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all)) 178448debafeSMikulas Patocka // break; 178548debafeSMikulas Patocka 178648debafeSMikulas Patocka wc->writeback_size++; 178748debafeSMikulas Patocka list_del(&g->lru); 178848debafeSMikulas Patocka list_add(&g->lru, &wbl.list); 178948debafeSMikulas Patocka wbl.size++; 179048debafeSMikulas Patocka g->write_in_progress = true; 179148debafeSMikulas Patocka g->wc_list_contiguous = BIO_MAX_PAGES; 179248debafeSMikulas Patocka f = g; 179348debafeSMikulas Patocka e->wc_list_contiguous++; 17945229b489SHuaisheng Ye if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) { 17955229b489SHuaisheng Ye if (unlikely(wc->writeback_all)) { 17965229b489SHuaisheng Ye next_node = rb_next(&f->rb_node); 17975229b489SHuaisheng Ye if (likely(next_node)) 17985229b489SHuaisheng Ye g = container_of(next_node, struct wc_entry, rb_node); 17995229b489SHuaisheng Ye } 180048debafeSMikulas Patocka break; 180148debafeSMikulas Patocka } 18025229b489SHuaisheng Ye } 180348debafeSMikulas Patocka cond_resched(); 180448debafeSMikulas Patocka } 180548debafeSMikulas Patocka 180648debafeSMikulas Patocka if (!list_empty(&skipped)) { 180748debafeSMikulas Patocka list_splice_tail(&skipped, &wc->lru); 180848debafeSMikulas Patocka /* 180948debafeSMikulas Patocka * If we didn't do any progress, we must wait until some 181048debafeSMikulas Patocka * writeback finishes to avoid burning CPU in a loop 181148debafeSMikulas Patocka */ 181248debafeSMikulas Patocka if (unlikely(!wbl.size)) 181348debafeSMikulas Patocka writecache_wait_for_writeback(wc); 181448debafeSMikulas Patocka } 181548debafeSMikulas Patocka 181648debafeSMikulas Patocka wc_unlock(wc); 181748debafeSMikulas Patocka 181848debafeSMikulas Patocka blk_start_plug(&plug); 181948debafeSMikulas Patocka 182048debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 182148debafeSMikulas Patocka __writecache_writeback_pmem(wc, &wbl); 182248debafeSMikulas Patocka else 182348debafeSMikulas Patocka __writecache_writeback_ssd(wc, &wbl); 182448debafeSMikulas Patocka 182548debafeSMikulas Patocka blk_finish_plug(&plug); 182648debafeSMikulas Patocka 182748debafeSMikulas Patocka if (unlikely(wc->writeback_all)) { 182848debafeSMikulas Patocka wc_lock(wc); 182948debafeSMikulas Patocka while (writecache_wait_for_writeback(wc)); 183048debafeSMikulas Patocka wc_unlock(wc); 183148debafeSMikulas Patocka } 183248debafeSMikulas Patocka } 183348debafeSMikulas Patocka 183448debafeSMikulas Patocka static int calculate_memory_size(uint64_t device_size, unsigned block_size, 183548debafeSMikulas Patocka size_t *n_blocks_p, size_t *n_metadata_blocks_p) 183648debafeSMikulas Patocka { 183748debafeSMikulas Patocka uint64_t n_blocks, offset; 183848debafeSMikulas Patocka struct wc_entry e; 183948debafeSMikulas Patocka 184048debafeSMikulas Patocka n_blocks = device_size; 184148debafeSMikulas Patocka do_div(n_blocks, block_size + sizeof(struct wc_memory_entry)); 184248debafeSMikulas Patocka 184348debafeSMikulas Patocka while (1) { 184448debafeSMikulas Patocka if (!n_blocks) 184548debafeSMikulas Patocka return -ENOSPC; 184648debafeSMikulas Patocka /* Verify the following entries[n_blocks] won't overflow */ 184748debafeSMikulas Patocka if (n_blocks >= ((size_t)-sizeof(struct wc_memory_superblock) / 184848debafeSMikulas Patocka sizeof(struct wc_memory_entry))) 184948debafeSMikulas Patocka return -EFBIG; 185048debafeSMikulas Patocka offset = offsetof(struct wc_memory_superblock, entries[n_blocks]); 185148debafeSMikulas Patocka offset = (offset + block_size - 1) & ~(uint64_t)(block_size - 1); 185248debafeSMikulas Patocka if (offset + n_blocks * block_size <= device_size) 185348debafeSMikulas Patocka break; 185448debafeSMikulas Patocka n_blocks--; 185548debafeSMikulas Patocka } 185648debafeSMikulas Patocka 185748debafeSMikulas Patocka /* check if the bit field overflows */ 185848debafeSMikulas Patocka e.index = n_blocks; 185948debafeSMikulas Patocka if (e.index != n_blocks) 186048debafeSMikulas Patocka return -EFBIG; 186148debafeSMikulas Patocka 186248debafeSMikulas Patocka if (n_blocks_p) 186348debafeSMikulas Patocka *n_blocks_p = n_blocks; 186448debafeSMikulas Patocka if (n_metadata_blocks_p) 186548debafeSMikulas Patocka *n_metadata_blocks_p = offset >> __ffs(block_size); 186648debafeSMikulas Patocka return 0; 186748debafeSMikulas Patocka } 186848debafeSMikulas Patocka 186948debafeSMikulas Patocka static int init_memory(struct dm_writecache *wc) 187048debafeSMikulas Patocka { 187148debafeSMikulas Patocka size_t b; 187248debafeSMikulas Patocka int r; 187348debafeSMikulas Patocka 187448debafeSMikulas Patocka r = calculate_memory_size(wc->memory_map_size, wc->block_size, &wc->n_blocks, NULL); 187548debafeSMikulas Patocka if (r) 187648debafeSMikulas Patocka return r; 187748debafeSMikulas Patocka 187848debafeSMikulas Patocka r = writecache_alloc_entries(wc); 187948debafeSMikulas Patocka if (r) 188048debafeSMikulas Patocka return r; 188148debafeSMikulas Patocka 188248debafeSMikulas Patocka for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++) 188348debafeSMikulas Patocka pmem_assign(sb(wc)->padding[b], cpu_to_le64(0)); 188448debafeSMikulas Patocka pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION)); 188548debafeSMikulas Patocka pmem_assign(sb(wc)->block_size, cpu_to_le32(wc->block_size)); 188648debafeSMikulas Patocka pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks)); 188748debafeSMikulas Patocka pmem_assign(sb(wc)->seq_count, cpu_to_le64(0)); 188848debafeSMikulas Patocka 188948debafeSMikulas Patocka for (b = 0; b < wc->n_blocks; b++) 189048debafeSMikulas Patocka write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); 189148debafeSMikulas Patocka 189248debafeSMikulas Patocka writecache_flush_all_metadata(wc); 1893aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 189448debafeSMikulas Patocka pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); 189548debafeSMikulas Patocka writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); 1896aa950920SMikulas Patocka writecache_commit_flushed(wc, false); 189748debafeSMikulas Patocka 189848debafeSMikulas Patocka return 0; 189948debafeSMikulas Patocka } 190048debafeSMikulas Patocka 190148debafeSMikulas Patocka static void writecache_dtr(struct dm_target *ti) 190248debafeSMikulas Patocka { 190348debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 190448debafeSMikulas Patocka 190548debafeSMikulas Patocka if (!wc) 190648debafeSMikulas Patocka return; 190748debafeSMikulas Patocka 190848debafeSMikulas Patocka if (wc->endio_thread) 190948debafeSMikulas Patocka kthread_stop(wc->endio_thread); 191048debafeSMikulas Patocka 191148debafeSMikulas Patocka if (wc->flush_thread) 191248debafeSMikulas Patocka kthread_stop(wc->flush_thread); 191348debafeSMikulas Patocka 191448debafeSMikulas Patocka bioset_exit(&wc->bio_set); 191548debafeSMikulas Patocka 191648debafeSMikulas Patocka mempool_exit(&wc->copy_pool); 191748debafeSMikulas Patocka 191848debafeSMikulas Patocka if (wc->writeback_wq) 191948debafeSMikulas Patocka destroy_workqueue(wc->writeback_wq); 192048debafeSMikulas Patocka 192148debafeSMikulas Patocka if (wc->dev) 192248debafeSMikulas Patocka dm_put_device(ti, wc->dev); 192348debafeSMikulas Patocka 192448debafeSMikulas Patocka if (wc->ssd_dev) 192548debafeSMikulas Patocka dm_put_device(ti, wc->ssd_dev); 192648debafeSMikulas Patocka 192748debafeSMikulas Patocka if (wc->entries) 192848debafeSMikulas Patocka vfree(wc->entries); 192948debafeSMikulas Patocka 193048debafeSMikulas Patocka if (wc->memory_map) { 193148debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 193248debafeSMikulas Patocka persistent_memory_release(wc); 193348debafeSMikulas Patocka else 193448debafeSMikulas Patocka vfree(wc->memory_map); 193548debafeSMikulas Patocka } 193648debafeSMikulas Patocka 193748debafeSMikulas Patocka if (wc->dm_kcopyd) 193848debafeSMikulas Patocka dm_kcopyd_client_destroy(wc->dm_kcopyd); 193948debafeSMikulas Patocka 194048debafeSMikulas Patocka if (wc->dm_io) 194148debafeSMikulas Patocka dm_io_client_destroy(wc->dm_io); 194248debafeSMikulas Patocka 194348debafeSMikulas Patocka if (wc->dirty_bitmap) 194448debafeSMikulas Patocka vfree(wc->dirty_bitmap); 194548debafeSMikulas Patocka 194648debafeSMikulas Patocka kfree(wc); 194748debafeSMikulas Patocka } 194848debafeSMikulas Patocka 194948debafeSMikulas Patocka static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) 195048debafeSMikulas Patocka { 195148debafeSMikulas Patocka struct dm_writecache *wc; 195248debafeSMikulas Patocka struct dm_arg_set as; 195348debafeSMikulas Patocka const char *string; 195448debafeSMikulas Patocka unsigned opt_params; 195548debafeSMikulas Patocka size_t offset, data_size; 195648debafeSMikulas Patocka int i, r; 195748debafeSMikulas Patocka char dummy; 195848debafeSMikulas Patocka int high_wm_percent = HIGH_WATERMARK; 195948debafeSMikulas Patocka int low_wm_percent = LOW_WATERMARK; 196048debafeSMikulas Patocka uint64_t x; 196148debafeSMikulas Patocka struct wc_memory_superblock s; 196248debafeSMikulas Patocka 196348debafeSMikulas Patocka static struct dm_arg _args[] = { 196448debafeSMikulas Patocka {0, 10, "Invalid number of feature args"}, 196548debafeSMikulas Patocka }; 196648debafeSMikulas Patocka 196748debafeSMikulas Patocka as.argc = argc; 196848debafeSMikulas Patocka as.argv = argv; 196948debafeSMikulas Patocka 197048debafeSMikulas Patocka wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL); 197148debafeSMikulas Patocka if (!wc) { 197248debafeSMikulas Patocka ti->error = "Cannot allocate writecache structure"; 197348debafeSMikulas Patocka r = -ENOMEM; 197448debafeSMikulas Patocka goto bad; 197548debafeSMikulas Patocka } 197648debafeSMikulas Patocka ti->private = wc; 197748debafeSMikulas Patocka wc->ti = ti; 197848debafeSMikulas Patocka 197948debafeSMikulas Patocka mutex_init(&wc->lock); 19803923d485SMikulas Patocka wc->max_age = MAX_AGE_UNSPECIFIED; 198148debafeSMikulas Patocka writecache_poison_lists(wc); 198248debafeSMikulas Patocka init_waitqueue_head(&wc->freelist_wait); 198348debafeSMikulas Patocka timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0); 19843923d485SMikulas Patocka timer_setup(&wc->max_age_timer, writecache_max_age_timer, 0); 198548debafeSMikulas Patocka 198648debafeSMikulas Patocka for (i = 0; i < 2; i++) { 198748debafeSMikulas Patocka atomic_set(&wc->bio_in_progress[i], 0); 198848debafeSMikulas Patocka init_waitqueue_head(&wc->bio_in_progress_wait[i]); 198948debafeSMikulas Patocka } 199048debafeSMikulas Patocka 199148debafeSMikulas Patocka wc->dm_io = dm_io_client_create(); 199248debafeSMikulas Patocka if (IS_ERR(wc->dm_io)) { 199348debafeSMikulas Patocka r = PTR_ERR(wc->dm_io); 199448debafeSMikulas Patocka ti->error = "Unable to allocate dm-io client"; 199548debafeSMikulas Patocka wc->dm_io = NULL; 199648debafeSMikulas Patocka goto bad; 199748debafeSMikulas Patocka } 199848debafeSMikulas Patocka 1999f87e033bSHuaisheng Ye wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1); 200048debafeSMikulas Patocka if (!wc->writeback_wq) { 200148debafeSMikulas Patocka r = -ENOMEM; 200248debafeSMikulas Patocka ti->error = "Could not allocate writeback workqueue"; 200348debafeSMikulas Patocka goto bad; 200448debafeSMikulas Patocka } 200548debafeSMikulas Patocka INIT_WORK(&wc->writeback_work, writecache_writeback); 200648debafeSMikulas Patocka INIT_WORK(&wc->flush_work, writecache_flush_work); 200748debafeSMikulas Patocka 200848debafeSMikulas Patocka raw_spin_lock_init(&wc->endio_list_lock); 200948debafeSMikulas Patocka INIT_LIST_HEAD(&wc->endio_list); 201048debafeSMikulas Patocka wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio"); 201148debafeSMikulas Patocka if (IS_ERR(wc->endio_thread)) { 201248debafeSMikulas Patocka r = PTR_ERR(wc->endio_thread); 201348debafeSMikulas Patocka wc->endio_thread = NULL; 201448debafeSMikulas Patocka ti->error = "Couldn't spawn endio thread"; 201548debafeSMikulas Patocka goto bad; 201648debafeSMikulas Patocka } 201748debafeSMikulas Patocka wake_up_process(wc->endio_thread); 201848debafeSMikulas Patocka 201948debafeSMikulas Patocka /* 202048debafeSMikulas Patocka * Parse the mode (pmem or ssd) 202148debafeSMikulas Patocka */ 202248debafeSMikulas Patocka string = dm_shift_arg(&as); 202348debafeSMikulas Patocka if (!string) 202448debafeSMikulas Patocka goto bad_arguments; 202548debafeSMikulas Patocka 202648debafeSMikulas Patocka if (!strcasecmp(string, "s")) { 202748debafeSMikulas Patocka wc->pmem_mode = false; 202848debafeSMikulas Patocka } else if (!strcasecmp(string, "p")) { 202948debafeSMikulas Patocka #ifdef DM_WRITECACHE_HAS_PMEM 203048debafeSMikulas Patocka wc->pmem_mode = true; 203148debafeSMikulas Patocka wc->writeback_fua = true; 203248debafeSMikulas Patocka #else 203348debafeSMikulas Patocka /* 203448debafeSMikulas Patocka * If the architecture doesn't support persistent memory or 203548debafeSMikulas Patocka * the kernel doesn't support any DAX drivers, this driver can 203648debafeSMikulas Patocka * only be used in SSD-only mode. 203748debafeSMikulas Patocka */ 203848debafeSMikulas Patocka r = -EOPNOTSUPP; 203948debafeSMikulas Patocka ti->error = "Persistent memory or DAX not supported on this system"; 204048debafeSMikulas Patocka goto bad; 204148debafeSMikulas Patocka #endif 204248debafeSMikulas Patocka } else { 204348debafeSMikulas Patocka goto bad_arguments; 204448debafeSMikulas Patocka } 204548debafeSMikulas Patocka 204648debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 204748debafeSMikulas Patocka r = bioset_init(&wc->bio_set, BIO_POOL_SIZE, 204848debafeSMikulas Patocka offsetof(struct writeback_struct, bio), 204948debafeSMikulas Patocka BIOSET_NEED_BVECS); 205048debafeSMikulas Patocka if (r) { 205148debafeSMikulas Patocka ti->error = "Could not allocate bio set"; 205248debafeSMikulas Patocka goto bad; 205348debafeSMikulas Patocka } 205448debafeSMikulas Patocka } else { 205548debafeSMikulas Patocka r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct)); 205648debafeSMikulas Patocka if (r) { 205748debafeSMikulas Patocka ti->error = "Could not allocate mempool"; 205848debafeSMikulas Patocka goto bad; 205948debafeSMikulas Patocka } 206048debafeSMikulas Patocka } 206148debafeSMikulas Patocka 206248debafeSMikulas Patocka /* 206348debafeSMikulas Patocka * Parse the origin data device 206448debafeSMikulas Patocka */ 206548debafeSMikulas Patocka string = dm_shift_arg(&as); 206648debafeSMikulas Patocka if (!string) 206748debafeSMikulas Patocka goto bad_arguments; 206848debafeSMikulas Patocka r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev); 206948debafeSMikulas Patocka if (r) { 207048debafeSMikulas Patocka ti->error = "Origin data device lookup failed"; 207148debafeSMikulas Patocka goto bad; 207248debafeSMikulas Patocka } 207348debafeSMikulas Patocka 207448debafeSMikulas Patocka /* 207548debafeSMikulas Patocka * Parse cache data device (be it pmem or ssd) 207648debafeSMikulas Patocka */ 207748debafeSMikulas Patocka string = dm_shift_arg(&as); 207848debafeSMikulas Patocka if (!string) 207948debafeSMikulas Patocka goto bad_arguments; 208048debafeSMikulas Patocka 208148debafeSMikulas Patocka r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev); 208248debafeSMikulas Patocka if (r) { 208348debafeSMikulas Patocka ti->error = "Cache data device lookup failed"; 208448debafeSMikulas Patocka goto bad; 208548debafeSMikulas Patocka } 208648debafeSMikulas Patocka wc->memory_map_size = i_size_read(wc->ssd_dev->bdev->bd_inode); 208748debafeSMikulas Patocka 208848debafeSMikulas Patocka /* 208948debafeSMikulas Patocka * Parse the cache block size 209048debafeSMikulas Patocka */ 209148debafeSMikulas Patocka string = dm_shift_arg(&as); 209248debafeSMikulas Patocka if (!string) 209348debafeSMikulas Patocka goto bad_arguments; 209448debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 || 209548debafeSMikulas Patocka wc->block_size < 512 || wc->block_size > PAGE_SIZE || 209648debafeSMikulas Patocka (wc->block_size & (wc->block_size - 1))) { 209748debafeSMikulas Patocka r = -EINVAL; 209848debafeSMikulas Patocka ti->error = "Invalid block size"; 209948debafeSMikulas Patocka goto bad; 210048debafeSMikulas Patocka } 210148debafeSMikulas Patocka wc->block_size_bits = __ffs(wc->block_size); 210248debafeSMikulas Patocka 210348debafeSMikulas Patocka wc->max_writeback_jobs = MAX_WRITEBACK_JOBS; 210448debafeSMikulas Patocka wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM; 210548debafeSMikulas Patocka wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC); 210648debafeSMikulas Patocka 210748debafeSMikulas Patocka /* 210848debafeSMikulas Patocka * Parse optional arguments 210948debafeSMikulas Patocka */ 211048debafeSMikulas Patocka r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); 211148debafeSMikulas Patocka if (r) 211248debafeSMikulas Patocka goto bad; 211348debafeSMikulas Patocka 211448debafeSMikulas Patocka while (opt_params) { 211548debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2116d284f824SMikulas Patocka if (!strcasecmp(string, "start_sector") && opt_params >= 1) { 2117d284f824SMikulas Patocka unsigned long long start_sector; 2118d284f824SMikulas Patocka string = dm_shift_arg(&as), opt_params--; 2119d284f824SMikulas Patocka if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1) 2120d284f824SMikulas Patocka goto invalid_optional; 2121d284f824SMikulas Patocka wc->start_sector = start_sector; 2122d284f824SMikulas Patocka if (wc->start_sector != start_sector || 2123d284f824SMikulas Patocka wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT) 2124d284f824SMikulas Patocka goto invalid_optional; 2125d284f824SMikulas Patocka } else if (!strcasecmp(string, "high_watermark") && opt_params >= 1) { 212648debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 212748debafeSMikulas Patocka if (sscanf(string, "%d%c", &high_wm_percent, &dummy) != 1) 212848debafeSMikulas Patocka goto invalid_optional; 212948debafeSMikulas Patocka if (high_wm_percent < 0 || high_wm_percent > 100) 213048debafeSMikulas Patocka goto invalid_optional; 213148debafeSMikulas Patocka wc->high_wm_percent_set = true; 213248debafeSMikulas Patocka } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) { 213348debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 213448debafeSMikulas Patocka if (sscanf(string, "%d%c", &low_wm_percent, &dummy) != 1) 213548debafeSMikulas Patocka goto invalid_optional; 213648debafeSMikulas Patocka if (low_wm_percent < 0 || low_wm_percent > 100) 213748debafeSMikulas Patocka goto invalid_optional; 213848debafeSMikulas Patocka wc->low_wm_percent_set = true; 213948debafeSMikulas Patocka } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) { 214048debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 214148debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1) 214248debafeSMikulas Patocka goto invalid_optional; 214348debafeSMikulas Patocka wc->max_writeback_jobs_set = true; 214448debafeSMikulas Patocka } else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) { 214548debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 214648debafeSMikulas Patocka if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1) 214748debafeSMikulas Patocka goto invalid_optional; 214848debafeSMikulas Patocka wc->autocommit_blocks_set = true; 214948debafeSMikulas Patocka } else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) { 215048debafeSMikulas Patocka unsigned autocommit_msecs; 215148debafeSMikulas Patocka string = dm_shift_arg(&as), opt_params--; 215248debafeSMikulas Patocka if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1) 215348debafeSMikulas Patocka goto invalid_optional; 215448debafeSMikulas Patocka if (autocommit_msecs > 3600000) 215548debafeSMikulas Patocka goto invalid_optional; 215648debafeSMikulas Patocka wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs); 215748debafeSMikulas Patocka wc->autocommit_time_set = true; 21583923d485SMikulas Patocka } else if (!strcasecmp(string, "max_age") && opt_params >= 1) { 21593923d485SMikulas Patocka unsigned max_age_msecs; 21603923d485SMikulas Patocka string = dm_shift_arg(&as), opt_params--; 21613923d485SMikulas Patocka if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1) 21623923d485SMikulas Patocka goto invalid_optional; 21633923d485SMikulas Patocka if (max_age_msecs > 86400000) 21643923d485SMikulas Patocka goto invalid_optional; 21653923d485SMikulas Patocka wc->max_age = msecs_to_jiffies(max_age_msecs); 216693de44ebSMikulas Patocka } else if (!strcasecmp(string, "cleaner")) { 216793de44ebSMikulas Patocka wc->cleaner = true; 216848debafeSMikulas Patocka } else if (!strcasecmp(string, "fua")) { 216948debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 217048debafeSMikulas Patocka wc->writeback_fua = true; 217148debafeSMikulas Patocka wc->writeback_fua_set = true; 217248debafeSMikulas Patocka } else goto invalid_optional; 217348debafeSMikulas Patocka } else if (!strcasecmp(string, "nofua")) { 217448debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) { 217548debafeSMikulas Patocka wc->writeback_fua = false; 217648debafeSMikulas Patocka wc->writeback_fua_set = true; 217748debafeSMikulas Patocka } else goto invalid_optional; 217848debafeSMikulas Patocka } else { 217948debafeSMikulas Patocka invalid_optional: 218048debafeSMikulas Patocka r = -EINVAL; 218148debafeSMikulas Patocka ti->error = "Invalid optional argument"; 218248debafeSMikulas Patocka goto bad; 218348debafeSMikulas Patocka } 218448debafeSMikulas Patocka } 218548debafeSMikulas Patocka 218648debafeSMikulas Patocka if (high_wm_percent < low_wm_percent) { 218748debafeSMikulas Patocka r = -EINVAL; 218848debafeSMikulas Patocka ti->error = "High watermark must be greater than or equal to low watermark"; 218948debafeSMikulas Patocka goto bad; 219048debafeSMikulas Patocka } 219148debafeSMikulas Patocka 2192d284f824SMikulas Patocka if (WC_MODE_PMEM(wc)) { 2193d284f824SMikulas Patocka r = persistent_memory_claim(wc); 2194d284f824SMikulas Patocka if (r) { 2195d284f824SMikulas Patocka ti->error = "Unable to map persistent memory for cache"; 2196d284f824SMikulas Patocka goto bad; 2197d284f824SMikulas Patocka } 2198d284f824SMikulas Patocka } else { 219948debafeSMikulas Patocka struct dm_io_region region; 220048debafeSMikulas Patocka struct dm_io_request req; 220148debafeSMikulas Patocka size_t n_blocks, n_metadata_blocks; 220248debafeSMikulas Patocka uint64_t n_bitmap_bits; 220348debafeSMikulas Patocka 2204d284f824SMikulas Patocka wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT; 2205d284f824SMikulas Patocka 220648debafeSMikulas Patocka bio_list_init(&wc->flush_list); 220748debafeSMikulas Patocka wc->flush_thread = kthread_create(writecache_flush_thread, wc, "dm_writecache_flush"); 220848debafeSMikulas Patocka if (IS_ERR(wc->flush_thread)) { 220948debafeSMikulas Patocka r = PTR_ERR(wc->flush_thread); 221048debafeSMikulas Patocka wc->flush_thread = NULL; 2211e8ea141aSShenghui Wang ti->error = "Couldn't spawn flush thread"; 221248debafeSMikulas Patocka goto bad; 221348debafeSMikulas Patocka } 221448debafeSMikulas Patocka wake_up_process(wc->flush_thread); 221548debafeSMikulas Patocka 221648debafeSMikulas Patocka r = calculate_memory_size(wc->memory_map_size, wc->block_size, 221748debafeSMikulas Patocka &n_blocks, &n_metadata_blocks); 221848debafeSMikulas Patocka if (r) { 221948debafeSMikulas Patocka ti->error = "Invalid device size"; 222048debafeSMikulas Patocka goto bad; 222148debafeSMikulas Patocka } 222248debafeSMikulas Patocka 222348debafeSMikulas Patocka n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) + 222448debafeSMikulas Patocka BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY; 222548debafeSMikulas Patocka /* this is limitation of test_bit functions */ 222648debafeSMikulas Patocka if (n_bitmap_bits > 1U << 31) { 222748debafeSMikulas Patocka r = -EFBIG; 222848debafeSMikulas Patocka ti->error = "Invalid device size"; 222948debafeSMikulas Patocka goto bad; 223048debafeSMikulas Patocka } 223148debafeSMikulas Patocka 223248debafeSMikulas Patocka wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits); 223348debafeSMikulas Patocka if (!wc->memory_map) { 223448debafeSMikulas Patocka r = -ENOMEM; 223548debafeSMikulas Patocka ti->error = "Unable to allocate memory for metadata"; 223648debafeSMikulas Patocka goto bad; 223748debafeSMikulas Patocka } 223848debafeSMikulas Patocka 223948debafeSMikulas Patocka wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle); 224048debafeSMikulas Patocka if (IS_ERR(wc->dm_kcopyd)) { 224148debafeSMikulas Patocka r = PTR_ERR(wc->dm_kcopyd); 224248debafeSMikulas Patocka ti->error = "Unable to allocate dm-kcopyd client"; 224348debafeSMikulas Patocka wc->dm_kcopyd = NULL; 224448debafeSMikulas Patocka goto bad; 224548debafeSMikulas Patocka } 224648debafeSMikulas Patocka 224748debafeSMikulas Patocka wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT); 224848debafeSMikulas Patocka wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) / 224948debafeSMikulas Patocka BITS_PER_LONG * sizeof(unsigned long); 225048debafeSMikulas Patocka wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size); 225148debafeSMikulas Patocka if (!wc->dirty_bitmap) { 225248debafeSMikulas Patocka r = -ENOMEM; 225348debafeSMikulas Patocka ti->error = "Unable to allocate dirty bitmap"; 225448debafeSMikulas Patocka goto bad; 225548debafeSMikulas Patocka } 225648debafeSMikulas Patocka 225748debafeSMikulas Patocka region.bdev = wc->ssd_dev->bdev; 2258d284f824SMikulas Patocka region.sector = wc->start_sector; 225948debafeSMikulas Patocka region.count = wc->metadata_sectors; 226048debafeSMikulas Patocka req.bi_op = REQ_OP_READ; 226148debafeSMikulas Patocka req.bi_op_flags = REQ_SYNC; 226248debafeSMikulas Patocka req.mem.type = DM_IO_VMA; 226348debafeSMikulas Patocka req.mem.ptr.vma = (char *)wc->memory_map; 226448debafeSMikulas Patocka req.client = wc->dm_io; 226548debafeSMikulas Patocka req.notify.fn = NULL; 226648debafeSMikulas Patocka 226748debafeSMikulas Patocka r = dm_io(&req, 1, ®ion, NULL); 226848debafeSMikulas Patocka if (r) { 226948debafeSMikulas Patocka ti->error = "Unable to read metadata"; 227048debafeSMikulas Patocka goto bad; 227148debafeSMikulas Patocka } 227248debafeSMikulas Patocka } 227348debafeSMikulas Patocka 227448debafeSMikulas Patocka r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); 227548debafeSMikulas Patocka if (r) { 227648debafeSMikulas Patocka ti->error = "Hardware memory error when reading superblock"; 227748debafeSMikulas Patocka goto bad; 227848debafeSMikulas Patocka } 227948debafeSMikulas Patocka if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) { 228048debafeSMikulas Patocka r = init_memory(wc); 228148debafeSMikulas Patocka if (r) { 228248debafeSMikulas Patocka ti->error = "Unable to initialize device"; 228348debafeSMikulas Patocka goto bad; 228448debafeSMikulas Patocka } 228548debafeSMikulas Patocka r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); 228648debafeSMikulas Patocka if (r) { 228748debafeSMikulas Patocka ti->error = "Hardware memory error when reading superblock"; 228848debafeSMikulas Patocka goto bad; 228948debafeSMikulas Patocka } 229048debafeSMikulas Patocka } 229148debafeSMikulas Patocka 229248debafeSMikulas Patocka if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) { 229348debafeSMikulas Patocka ti->error = "Invalid magic in the superblock"; 229448debafeSMikulas Patocka r = -EINVAL; 229548debafeSMikulas Patocka goto bad; 229648debafeSMikulas Patocka } 229748debafeSMikulas Patocka 229848debafeSMikulas Patocka if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) { 229948debafeSMikulas Patocka ti->error = "Invalid version in the superblock"; 230048debafeSMikulas Patocka r = -EINVAL; 230148debafeSMikulas Patocka goto bad; 230248debafeSMikulas Patocka } 230348debafeSMikulas Patocka 230448debafeSMikulas Patocka if (le32_to_cpu(s.block_size) != wc->block_size) { 230548debafeSMikulas Patocka ti->error = "Block size does not match superblock"; 230648debafeSMikulas Patocka r = -EINVAL; 230748debafeSMikulas Patocka goto bad; 230848debafeSMikulas Patocka } 230948debafeSMikulas Patocka 231048debafeSMikulas Patocka wc->n_blocks = le64_to_cpu(s.n_blocks); 231148debafeSMikulas Patocka 231248debafeSMikulas Patocka offset = wc->n_blocks * sizeof(struct wc_memory_entry); 231348debafeSMikulas Patocka if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) { 231448debafeSMikulas Patocka overflow: 231548debafeSMikulas Patocka ti->error = "Overflow in size calculation"; 231648debafeSMikulas Patocka r = -EINVAL; 231748debafeSMikulas Patocka goto bad; 231848debafeSMikulas Patocka } 231948debafeSMikulas Patocka offset += sizeof(struct wc_memory_superblock); 232048debafeSMikulas Patocka if (offset < sizeof(struct wc_memory_superblock)) 232148debafeSMikulas Patocka goto overflow; 232248debafeSMikulas Patocka offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1); 232348debafeSMikulas Patocka data_size = wc->n_blocks * (size_t)wc->block_size; 232448debafeSMikulas Patocka if (!offset || (data_size / wc->block_size != wc->n_blocks) || 232548debafeSMikulas Patocka (offset + data_size < offset)) 232648debafeSMikulas Patocka goto overflow; 232748debafeSMikulas Patocka if (offset + data_size > wc->memory_map_size) { 232848debafeSMikulas Patocka ti->error = "Memory area is too small"; 232948debafeSMikulas Patocka r = -EINVAL; 233048debafeSMikulas Patocka goto bad; 233148debafeSMikulas Patocka } 233248debafeSMikulas Patocka 233348debafeSMikulas Patocka wc->metadata_sectors = offset >> SECTOR_SHIFT; 233448debafeSMikulas Patocka wc->block_start = (char *)sb(wc) + offset; 233548debafeSMikulas Patocka 233648debafeSMikulas Patocka x = (uint64_t)wc->n_blocks * (100 - high_wm_percent); 233748debafeSMikulas Patocka x += 50; 233848debafeSMikulas Patocka do_div(x, 100); 233948debafeSMikulas Patocka wc->freelist_high_watermark = x; 234048debafeSMikulas Patocka x = (uint64_t)wc->n_blocks * (100 - low_wm_percent); 234148debafeSMikulas Patocka x += 50; 234248debafeSMikulas Patocka do_div(x, 100); 234348debafeSMikulas Patocka wc->freelist_low_watermark = x; 234448debafeSMikulas Patocka 234593de44ebSMikulas Patocka if (wc->cleaner) 234693de44ebSMikulas Patocka activate_cleaner(wc); 234793de44ebSMikulas Patocka 234848debafeSMikulas Patocka r = writecache_alloc_entries(wc); 234948debafeSMikulas Patocka if (r) { 235048debafeSMikulas Patocka ti->error = "Cannot allocate memory"; 235148debafeSMikulas Patocka goto bad; 235248debafeSMikulas Patocka } 235348debafeSMikulas Patocka 235448debafeSMikulas Patocka ti->num_flush_bios = 1; 235548debafeSMikulas Patocka ti->flush_supported = true; 235648debafeSMikulas Patocka ti->num_discard_bios = 1; 235748debafeSMikulas Patocka 235848debafeSMikulas Patocka if (WC_MODE_PMEM(wc)) 235948debafeSMikulas Patocka persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 236048debafeSMikulas Patocka 236148debafeSMikulas Patocka return 0; 236248debafeSMikulas Patocka 236348debafeSMikulas Patocka bad_arguments: 236448debafeSMikulas Patocka r = -EINVAL; 236548debafeSMikulas Patocka ti->error = "Bad arguments"; 236648debafeSMikulas Patocka bad: 236748debafeSMikulas Patocka writecache_dtr(ti); 236848debafeSMikulas Patocka return r; 236948debafeSMikulas Patocka } 237048debafeSMikulas Patocka 237148debafeSMikulas Patocka static void writecache_status(struct dm_target *ti, status_type_t type, 237248debafeSMikulas Patocka unsigned status_flags, char *result, unsigned maxlen) 237348debafeSMikulas Patocka { 237448debafeSMikulas Patocka struct dm_writecache *wc = ti->private; 237548debafeSMikulas Patocka unsigned extra_args; 237648debafeSMikulas Patocka unsigned sz = 0; 237748debafeSMikulas Patocka uint64_t x; 237848debafeSMikulas Patocka 237948debafeSMikulas Patocka switch (type) { 238048debafeSMikulas Patocka case STATUSTYPE_INFO: 238148debafeSMikulas Patocka DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc), 238248debafeSMikulas Patocka (unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size, 238348debafeSMikulas Patocka (unsigned long long)wc->writeback_size); 238448debafeSMikulas Patocka break; 238548debafeSMikulas Patocka case STATUSTYPE_TABLE: 238648debafeSMikulas Patocka DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', 238748debafeSMikulas Patocka wc->dev->name, wc->ssd_dev->name, wc->block_size); 238848debafeSMikulas Patocka extra_args = 0; 23899ff07e7dSMikulas Patocka if (wc->start_sector) 23909ff07e7dSMikulas Patocka extra_args += 2; 239193de44ebSMikulas Patocka if (wc->high_wm_percent_set && !wc->cleaner) 239248debafeSMikulas Patocka extra_args += 2; 239393de44ebSMikulas Patocka if (wc->low_wm_percent_set && !wc->cleaner) 239448debafeSMikulas Patocka extra_args += 2; 239548debafeSMikulas Patocka if (wc->max_writeback_jobs_set) 239648debafeSMikulas Patocka extra_args += 2; 239748debafeSMikulas Patocka if (wc->autocommit_blocks_set) 239848debafeSMikulas Patocka extra_args += 2; 239948debafeSMikulas Patocka if (wc->autocommit_time_set) 240048debafeSMikulas Patocka extra_args += 2; 240193de44ebSMikulas Patocka if (wc->cleaner) 240293de44ebSMikulas Patocka extra_args++; 240348debafeSMikulas Patocka if (wc->writeback_fua_set) 240448debafeSMikulas Patocka extra_args++; 240548debafeSMikulas Patocka 240648debafeSMikulas Patocka DMEMIT("%u", extra_args); 24079ff07e7dSMikulas Patocka if (wc->start_sector) 24089ff07e7dSMikulas Patocka DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); 240993de44ebSMikulas Patocka if (wc->high_wm_percent_set && !wc->cleaner) { 241048debafeSMikulas Patocka x = (uint64_t)wc->freelist_high_watermark * 100; 241148debafeSMikulas Patocka x += wc->n_blocks / 2; 241248debafeSMikulas Patocka do_div(x, (size_t)wc->n_blocks); 241348debafeSMikulas Patocka DMEMIT(" high_watermark %u", 100 - (unsigned)x); 241448debafeSMikulas Patocka } 241593de44ebSMikulas Patocka if (wc->low_wm_percent_set && !wc->cleaner) { 241648debafeSMikulas Patocka x = (uint64_t)wc->freelist_low_watermark * 100; 241748debafeSMikulas Patocka x += wc->n_blocks / 2; 241848debafeSMikulas Patocka do_div(x, (size_t)wc->n_blocks); 241948debafeSMikulas Patocka DMEMIT(" low_watermark %u", 100 - (unsigned)x); 242048debafeSMikulas Patocka } 242148debafeSMikulas Patocka if (wc->max_writeback_jobs_set) 242248debafeSMikulas Patocka DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs); 242348debafeSMikulas Patocka if (wc->autocommit_blocks_set) 242448debafeSMikulas Patocka DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks); 242548debafeSMikulas Patocka if (wc->autocommit_time_set) 242648debafeSMikulas Patocka DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies)); 24273923d485SMikulas Patocka if (wc->max_age != MAX_AGE_UNSPECIFIED) 24283923d485SMikulas Patocka DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age)); 242993de44ebSMikulas Patocka if (wc->cleaner) 243093de44ebSMikulas Patocka DMEMIT(" cleaner"); 243148debafeSMikulas Patocka if (wc->writeback_fua_set) 243248debafeSMikulas Patocka DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); 243348debafeSMikulas Patocka break; 243448debafeSMikulas Patocka } 243548debafeSMikulas Patocka } 243648debafeSMikulas Patocka 243748debafeSMikulas Patocka static struct target_type writecache_target = { 243848debafeSMikulas Patocka .name = "writecache", 243993de44ebSMikulas Patocka .version = {1, 3, 0}, 244048debafeSMikulas Patocka .module = THIS_MODULE, 244148debafeSMikulas Patocka .ctr = writecache_ctr, 244248debafeSMikulas Patocka .dtr = writecache_dtr, 244348debafeSMikulas Patocka .status = writecache_status, 244448debafeSMikulas Patocka .postsuspend = writecache_suspend, 244548debafeSMikulas Patocka .resume = writecache_resume, 244648debafeSMikulas Patocka .message = writecache_message, 244748debafeSMikulas Patocka .map = writecache_map, 244848debafeSMikulas Patocka .end_io = writecache_end_io, 244948debafeSMikulas Patocka .iterate_devices = writecache_iterate_devices, 245048debafeSMikulas Patocka .io_hints = writecache_io_hints, 245148debafeSMikulas Patocka }; 245248debafeSMikulas Patocka 245348debafeSMikulas Patocka static int __init dm_writecache_init(void) 245448debafeSMikulas Patocka { 245548debafeSMikulas Patocka int r; 245648debafeSMikulas Patocka 245748debafeSMikulas Patocka r = dm_register_target(&writecache_target); 245848debafeSMikulas Patocka if (r < 0) { 245948debafeSMikulas Patocka DMERR("register failed %d", r); 246048debafeSMikulas Patocka return r; 246148debafeSMikulas Patocka } 246248debafeSMikulas Patocka 246348debafeSMikulas Patocka return 0; 246448debafeSMikulas Patocka } 246548debafeSMikulas Patocka 246648debafeSMikulas Patocka static void __exit dm_writecache_exit(void) 246748debafeSMikulas Patocka { 246848debafeSMikulas Patocka dm_unregister_target(&writecache_target); 246948debafeSMikulas Patocka } 247048debafeSMikulas Patocka 247148debafeSMikulas Patocka module_init(dm_writecache_init); 247248debafeSMikulas Patocka module_exit(dm_writecache_exit); 247348debafeSMikulas Patocka 247448debafeSMikulas Patocka MODULE_DESCRIPTION(DM_NAME " writecache target"); 247548debafeSMikulas Patocka MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); 247648debafeSMikulas Patocka MODULE_LICENSE("GPL"); 2477