1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */ 3 #include <linux/memremap.h> 4 #include <linux/pagemap.h> 5 #include <linux/memory.h> 6 #include <linux/module.h> 7 #include <linux/device.h> 8 #include <linux/pfn_t.h> 9 #include <linux/slab.h> 10 #include <linux/dax.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/mman.h> 14 #include "dax-private.h" 15 #include "bus.h" 16 17 /* Memory resource name used for add_memory_driver_managed(). */ 18 static const char *kmem_name; 19 /* Set if any memory will remain added when the driver will be unloaded. */ 20 static bool any_hotremove_failed; 21 22 static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r) 23 { 24 struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 25 struct range *range = &dax_range->range; 26 27 /* memory-block align the hotplug range */ 28 r->start = ALIGN(range->start, memory_block_size_bytes()); 29 r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1; 30 if (r->start >= r->end) { 31 r->start = range->start; 32 r->end = range->end; 33 return -ENOSPC; 34 } 35 return 0; 36 } 37 38 static int dev_dax_kmem_probe(struct dev_dax *dev_dax) 39 { 40 struct device *dev = &dev_dax->dev; 41 int i, mapped = 0; 42 char *res_name; 43 int numa_node; 44 45 /* 46 * Ensure good NUMA information for the persistent memory. 47 * Without this check, there is a risk that slow memory 48 * could be mixed in a node with faster memory, causing 49 * unavoidable performance issues. 50 */ 51 numa_node = dev_dax->target_node; 52 if (numa_node < 0) { 53 dev_warn(dev, "rejecting DAX region with invalid node: %d\n", 54 numa_node); 55 return -EINVAL; 56 } 57 58 res_name = kstrdup(dev_name(dev), GFP_KERNEL); 59 if (!res_name) 60 return -ENOMEM; 61 62 for (i = 0; i < dev_dax->nr_range; i++) { 63 struct resource *res; 64 struct range range; 65 int rc; 66 67 rc = dax_kmem_range(dev_dax, i, &range); 68 if (rc) { 69 dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n", 70 i, range.start, range.end); 71 continue; 72 } 73 74 /* Region is permanently reserved if hotremove fails. */ 75 res = request_mem_region(range.start, range_len(&range), res_name); 76 if (!res) { 77 dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n", 78 i, range.start, range.end); 79 /* 80 * Once some memory has been onlined we can't 81 * assume that it can be un-onlined safely. 82 */ 83 if (mapped) 84 continue; 85 kfree(res_name); 86 return -EBUSY; 87 } 88 89 /* 90 * Set flags appropriate for System RAM. Leave ..._BUSY clear 91 * so that add_memory() can add a child resource. Do not 92 * inherit flags from the parent since it may set new flags 93 * unknown to us that will break add_memory() below. 94 */ 95 res->flags = IORESOURCE_SYSTEM_RAM; 96 97 /* 98 * Ensure that future kexec'd kernels will not treat 99 * this as RAM automatically. 100 */ 101 rc = add_memory_driver_managed(numa_node, range.start, 102 range_len(&range), kmem_name); 103 104 if (rc) { 105 dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", 106 i, range.start, range.end); 107 release_mem_region(range.start, range_len(&range)); 108 if (mapped) 109 continue; 110 kfree(res_name); 111 return rc; 112 } 113 mapped++; 114 } 115 116 dev_set_drvdata(dev, res_name); 117 118 return 0; 119 } 120 121 #ifdef CONFIG_MEMORY_HOTREMOVE 122 static int dev_dax_kmem_remove(struct dev_dax *dev_dax) 123 { 124 int i, success = 0; 125 struct device *dev = &dev_dax->dev; 126 const char *res_name = dev_get_drvdata(dev); 127 128 /* 129 * We have one shot for removing memory, if some memory blocks were not 130 * offline prior to calling this function remove_memory() will fail, and 131 * there is no way to hotremove this memory until reboot because device 132 * unbind will succeed even if we return failure. 133 */ 134 for (i = 0; i < dev_dax->nr_range; i++) { 135 struct range range; 136 int rc; 137 138 rc = dax_kmem_range(dev_dax, i, &range); 139 if (rc) 140 continue; 141 142 rc = remove_memory(dev_dax->target_node, range.start, 143 range_len(&range)); 144 if (rc == 0) { 145 release_mem_region(range.start, range_len(&range)); 146 success++; 147 continue; 148 } 149 any_hotremove_failed = true; 150 dev_err(dev, 151 "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n", 152 i, range.start, range.end); 153 } 154 155 if (success >= dev_dax->nr_range) { 156 kfree(res_name); 157 dev_set_drvdata(dev, NULL); 158 } 159 160 return 0; 161 } 162 #else 163 static int dev_dax_kmem_remove(struct dev_dax *dev_dax) 164 { 165 /* 166 * Without hotremove purposely leak the request_mem_region() for the 167 * device-dax range and return '0' to ->remove() attempts. The removal 168 * of the device from the driver always succeeds, but the region is 169 * permanently pinned as reserved by the unreleased 170 * request_mem_region(). 171 */ 172 any_hotremove_failed = true; 173 return 0; 174 } 175 #endif /* CONFIG_MEMORY_HOTREMOVE */ 176 177 static struct dax_device_driver device_dax_kmem_driver = { 178 .probe = dev_dax_kmem_probe, 179 .remove = dev_dax_kmem_remove, 180 }; 181 182 static int __init dax_kmem_init(void) 183 { 184 int rc; 185 186 /* Resource name is permanently allocated if any hotremove fails. */ 187 kmem_name = kstrdup_const("System RAM (kmem)", GFP_KERNEL); 188 if (!kmem_name) 189 return -ENOMEM; 190 191 rc = dax_driver_register(&device_dax_kmem_driver); 192 if (rc) 193 kfree_const(kmem_name); 194 return rc; 195 } 196 197 static void __exit dax_kmem_exit(void) 198 { 199 dax_driver_unregister(&device_dax_kmem_driver); 200 if (!any_hotremove_failed) 201 kfree_const(kmem_name); 202 } 203 204 MODULE_AUTHOR("Intel Corporation"); 205 MODULE_LICENSE("GPL v2"); 206 module_init(dax_kmem_init); 207 module_exit(dax_kmem_exit); 208 MODULE_ALIAS_DAX_DEVICE(0); 209