1 /* 2 * drivers/base/memory.c - basic Memory class support 3 * 4 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 5 * Dave Hansen <haveblue@us.ibm.com> 6 * 7 * This file provides the necessary infrastructure to represent 8 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 9 * All arch-independent code that assumes MEMORY_HOTPLUG requires 10 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 11 */ 12 13 #include <linux/sysdev.h> 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/kobject.h> 21 #include <linux/memory_hotplug.h> 22 #include <linux/mm.h> 23 #include <linux/mutex.h> 24 #include <asm/atomic.h> 25 #include <asm/uaccess.h> 26 27 #define MEMORY_CLASS_NAME "memory" 28 29 static struct sysdev_class memory_sysdev_class = { 30 .name = MEMORY_CLASS_NAME, 31 }; 32 33 static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj) 34 { 35 return MEMORY_CLASS_NAME; 36 } 37 38 static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env) 39 { 40 int retval = 0; 41 42 return retval; 43 } 44 45 static struct kset_uevent_ops memory_uevent_ops = { 46 .name = memory_uevent_name, 47 .uevent = memory_uevent, 48 }; 49 50 static BLOCKING_NOTIFIER_HEAD(memory_chain); 51 52 int register_memory_notifier(struct notifier_block *nb) 53 { 54 return blocking_notifier_chain_register(&memory_chain, nb); 55 } 56 57 void unregister_memory_notifier(struct notifier_block *nb) 58 { 59 blocking_notifier_chain_unregister(&memory_chain, nb); 60 } 61 62 /* 63 * register_memory - Setup a sysfs device for a memory block 64 */ 65 static 66 int register_memory(struct memory_block *memory, struct mem_section *section) 67 { 68 int error; 69 70 memory->sysdev.cls = &memory_sysdev_class; 71 memory->sysdev.id = __section_nr(section); 72 73 error = sysdev_register(&memory->sysdev); 74 return error; 75 } 76 77 static void 78 unregister_memory(struct memory_block *memory, struct mem_section *section) 79 { 80 BUG_ON(memory->sysdev.cls != &memory_sysdev_class); 81 BUG_ON(memory->sysdev.id != __section_nr(section)); 82 83 /* drop the ref. we got in remove_memory_block() */ 84 kobject_put(&memory->sysdev.kobj); 85 sysdev_unregister(&memory->sysdev); 86 } 87 88 /* 89 * use this as the physical section index that this memsection 90 * uses. 91 */ 92 93 static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf) 94 { 95 struct memory_block *mem = 96 container_of(dev, struct memory_block, sysdev); 97 return sprintf(buf, "%08lx\n", mem->phys_index); 98 } 99 100 /* 101 * online, offline, going offline, etc. 102 */ 103 static ssize_t show_mem_state(struct sys_device *dev, char *buf) 104 { 105 struct memory_block *mem = 106 container_of(dev, struct memory_block, sysdev); 107 ssize_t len = 0; 108 109 /* 110 * We can probably put these states in a nice little array 111 * so that they're not open-coded 112 */ 113 switch (mem->state) { 114 case MEM_ONLINE: 115 len = sprintf(buf, "online\n"); 116 break; 117 case MEM_OFFLINE: 118 len = sprintf(buf, "offline\n"); 119 break; 120 case MEM_GOING_OFFLINE: 121 len = sprintf(buf, "going-offline\n"); 122 break; 123 default: 124 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 125 mem->state); 126 WARN_ON(1); 127 break; 128 } 129 130 return len; 131 } 132 133 int memory_notify(unsigned long val, void *v) 134 { 135 return blocking_notifier_call_chain(&memory_chain, val, v); 136 } 137 138 /* 139 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 140 * OK to have direct references to sparsemem variables in here. 141 */ 142 static int 143 memory_block_action(struct memory_block *mem, unsigned long action) 144 { 145 int i; 146 unsigned long psection; 147 unsigned long start_pfn, start_paddr; 148 struct page *first_page; 149 int ret; 150 int old_state = mem->state; 151 152 psection = mem->phys_index; 153 first_page = pfn_to_page(psection << PFN_SECTION_SHIFT); 154 155 /* 156 * The probe routines leave the pages reserved, just 157 * as the bootmem code does. Make sure they're still 158 * that way. 159 */ 160 if (action == MEM_ONLINE) { 161 for (i = 0; i < PAGES_PER_SECTION; i++) { 162 if (PageReserved(first_page+i)) 163 continue; 164 165 printk(KERN_WARNING "section number %ld page number %d " 166 "not reserved, was it already online? \n", 167 psection, i); 168 return -EBUSY; 169 } 170 } 171 172 switch (action) { 173 case MEM_ONLINE: 174 start_pfn = page_to_pfn(first_page); 175 ret = online_pages(start_pfn, PAGES_PER_SECTION); 176 break; 177 case MEM_OFFLINE: 178 mem->state = MEM_GOING_OFFLINE; 179 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; 180 ret = remove_memory(start_paddr, 181 PAGES_PER_SECTION << PAGE_SHIFT); 182 if (ret) { 183 mem->state = old_state; 184 break; 185 } 186 break; 187 default: 188 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", 189 __func__, mem, action, action); 190 WARN_ON(1); 191 ret = -EINVAL; 192 } 193 194 return ret; 195 } 196 197 static int memory_block_change_state(struct memory_block *mem, 198 unsigned long to_state, unsigned long from_state_req) 199 { 200 int ret = 0; 201 mutex_lock(&mem->state_mutex); 202 203 if (mem->state != from_state_req) { 204 ret = -EINVAL; 205 goto out; 206 } 207 208 ret = memory_block_action(mem, to_state); 209 if (!ret) 210 mem->state = to_state; 211 212 out: 213 mutex_unlock(&mem->state_mutex); 214 return ret; 215 } 216 217 static ssize_t 218 store_mem_state(struct sys_device *dev, const char *buf, size_t count) 219 { 220 struct memory_block *mem; 221 unsigned int phys_section_nr; 222 int ret = -EINVAL; 223 224 mem = container_of(dev, struct memory_block, sysdev); 225 phys_section_nr = mem->phys_index; 226 227 if (!present_section_nr(phys_section_nr)) 228 goto out; 229 230 if (!strncmp(buf, "online", min((int)count, 6))) 231 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 232 else if(!strncmp(buf, "offline", min((int)count, 7))) 233 ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 234 out: 235 if (ret) 236 return ret; 237 return count; 238 } 239 240 /* 241 * phys_device is a bad name for this. What I really want 242 * is a way to differentiate between memory ranges that 243 * are part of physical devices that constitute 244 * a complete removable unit or fru. 245 * i.e. do these ranges belong to the same physical device, 246 * s.t. if I offline all of these sections I can then 247 * remove the physical device? 248 */ 249 static ssize_t show_phys_device(struct sys_device *dev, char *buf) 250 { 251 struct memory_block *mem = 252 container_of(dev, struct memory_block, sysdev); 253 return sprintf(buf, "%d\n", mem->phys_device); 254 } 255 256 static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); 257 static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); 258 static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); 259 260 #define mem_create_simple_file(mem, attr_name) \ 261 sysdev_create_file(&mem->sysdev, &attr_##attr_name) 262 #define mem_remove_simple_file(mem, attr_name) \ 263 sysdev_remove_file(&mem->sysdev, &attr_##attr_name) 264 265 /* 266 * Block size attribute stuff 267 */ 268 static ssize_t 269 print_block_size(struct class *class, char *buf) 270 { 271 return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); 272 } 273 274 static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); 275 276 static int block_size_init(void) 277 { 278 return sysfs_create_file(&memory_sysdev_class.kset.kobj, 279 &class_attr_block_size_bytes.attr); 280 } 281 282 /* 283 * Some architectures will have custom drivers to do this, and 284 * will not need to do it from userspace. The fake hot-add code 285 * as well as ppc64 will do all of their discovery in userspace 286 * and will require this interface. 287 */ 288 #ifdef CONFIG_ARCH_MEMORY_PROBE 289 static ssize_t 290 memory_probe_store(struct class *class, const char *buf, size_t count) 291 { 292 u64 phys_addr; 293 int nid; 294 int ret; 295 296 phys_addr = simple_strtoull(buf, NULL, 0); 297 298 nid = memory_add_physaddr_to_nid(phys_addr); 299 ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); 300 301 if (ret) 302 count = ret; 303 304 return count; 305 } 306 static CLASS_ATTR(probe, 0700, NULL, memory_probe_store); 307 308 static int memory_probe_init(void) 309 { 310 return sysfs_create_file(&memory_sysdev_class.kset.kobj, 311 &class_attr_probe.attr); 312 } 313 #else 314 static inline int memory_probe_init(void) 315 { 316 return 0; 317 } 318 #endif 319 320 /* 321 * Note that phys_device is optional. It is here to allow for 322 * differentiation between which *physical* devices each 323 * section belongs to... 324 */ 325 326 static int add_memory_block(unsigned long node_id, struct mem_section *section, 327 unsigned long state, int phys_device) 328 { 329 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 330 int ret = 0; 331 332 if (!mem) 333 return -ENOMEM; 334 335 mem->phys_index = __section_nr(section); 336 mem->state = state; 337 mutex_init(&mem->state_mutex); 338 mem->phys_device = phys_device; 339 340 ret = register_memory(mem, section); 341 if (!ret) 342 ret = mem_create_simple_file(mem, phys_index); 343 if (!ret) 344 ret = mem_create_simple_file(mem, state); 345 if (!ret) 346 ret = mem_create_simple_file(mem, phys_device); 347 348 return ret; 349 } 350 351 /* 352 * For now, we have a linear search to go find the appropriate 353 * memory_block corresponding to a particular phys_index. If 354 * this gets to be a real problem, we can always use a radix 355 * tree or something here. 356 * 357 * This could be made generic for all sysdev classes. 358 */ 359 static struct memory_block *find_memory_block(struct mem_section *section) 360 { 361 struct kobject *kobj; 362 struct sys_device *sysdev; 363 struct memory_block *mem; 364 char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1]; 365 366 /* 367 * This only works because we know that section == sysdev->id 368 * slightly redundant with sysdev_register() 369 */ 370 sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section)); 371 372 kobj = kset_find_obj(&memory_sysdev_class.kset, name); 373 if (!kobj) 374 return NULL; 375 376 sysdev = container_of(kobj, struct sys_device, kobj); 377 mem = container_of(sysdev, struct memory_block, sysdev); 378 379 return mem; 380 } 381 382 int remove_memory_block(unsigned long node_id, struct mem_section *section, 383 int phys_device) 384 { 385 struct memory_block *mem; 386 387 mem = find_memory_block(section); 388 mem_remove_simple_file(mem, phys_index); 389 mem_remove_simple_file(mem, state); 390 mem_remove_simple_file(mem, phys_device); 391 unregister_memory(mem, section); 392 393 return 0; 394 } 395 396 /* 397 * need an interface for the VM to add new memory regions, 398 * but without onlining it. 399 */ 400 int register_new_memory(struct mem_section *section) 401 { 402 return add_memory_block(0, section, MEM_OFFLINE, 0); 403 } 404 405 int unregister_memory_section(struct mem_section *section) 406 { 407 if (!present_section(section)) 408 return -EINVAL; 409 410 return remove_memory_block(0, section, 0); 411 } 412 413 /* 414 * Initialize the sysfs support for memory devices... 415 */ 416 int __init memory_dev_init(void) 417 { 418 unsigned int i; 419 int ret; 420 int err; 421 422 memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; 423 ret = sysdev_class_register(&memory_sysdev_class); 424 if (ret) 425 goto out; 426 427 /* 428 * Create entries for memory sections that were found 429 * during boot and have been initialized 430 */ 431 for (i = 0; i < NR_MEM_SECTIONS; i++) { 432 if (!present_section_nr(i)) 433 continue; 434 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); 435 if (!ret) 436 ret = err; 437 } 438 439 err = memory_probe_init(); 440 if (!ret) 441 ret = err; 442 err = block_size_init(); 443 if (!ret) 444 ret = err; 445 out: 446 if (ret) 447 printk(KERN_ERR "%s() failed: %d\n", __func__, ret); 448 return ret; 449 } 450