1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory hotplug support via sclp 4 * 5 * Copyright IBM Corp. 2025 6 */ 7 8 #define pr_fmt(fmt) "sclp_mem: " fmt 9 10 #include <linux/cpufeature.h> 11 #include <linux/container_of.h> 12 #include <linux/err.h> 13 #include <linux/errno.h> 14 #include <linux/init.h> 15 #include <linux/kobject.h> 16 #include <linux/kstrtox.h> 17 #include <linux/memory.h> 18 #include <linux/memory_hotplug.h> 19 #include <linux/mm.h> 20 #include <linux/mmzone.h> 21 #include <linux/slab.h> 22 #include <asm/facility.h> 23 #include <asm/page.h> 24 #include <asm/page-states.h> 25 #include <asm/sclp.h> 26 27 #include "sclp.h" 28 29 #define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001 30 #define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001 31 32 static LIST_HEAD(sclp_mem_list); 33 static u8 sclp_max_storage_id; 34 static DECLARE_BITMAP(sclp_storage_ids, 256); 35 36 struct memory_increment { 37 struct list_head list; 38 u16 rn; 39 int standby; 40 }; 41 42 struct sclp_mem { 43 struct kobject kobj; 44 unsigned int id; 45 unsigned int memmap_on_memory; 46 unsigned int config; 47 #ifdef CONFIG_KASAN 48 unsigned int early_shadow_mapped; 49 #endif 50 }; 51 52 struct sclp_mem_arg { 53 struct sclp_mem *sclp_mems; 54 struct kset *kset; 55 }; 56 57 struct assign_storage_sccb { 58 struct sccb_header header; 59 u16 rn; 60 } __packed; 61 62 struct attach_storage_sccb { 63 struct sccb_header header; 64 u16 :16; 65 u16 assigned; 66 u32 :32; 67 u32 entries[]; 68 } __packed; 69 70 int arch_get_memory_phys_device(unsigned long start_pfn) 71 { 72 if (!sclp.rzm) 73 return 0; 74 return PFN_PHYS(start_pfn) >> ilog2(sclp.rzm); 75 } 76 77 static unsigned long rn2addr(u16 rn) 78 { 79 return (unsigned long)(rn - 1) * sclp.rzm; 80 } 81 82 static int do_assign_storage(sclp_cmdw_t cmd, u16 rn) 83 { 84 struct assign_storage_sccb *sccb; 85 int rc; 86 87 sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); 88 if (!sccb) 89 return -ENOMEM; 90 sccb->header.length = PAGE_SIZE; 91 sccb->rn = rn; 92 rc = sclp_sync_request_timeout(cmd, sccb, SCLP_QUEUE_INTERVAL); 93 if (rc) 94 goto out; 95 switch (sccb->header.response_code) { 96 case 0x0020: 97 case 0x0120: 98 break; 99 default: 100 pr_warn("assign storage failed (cmd=0x%08x, response=0x%04x, rn=0x%04x)\n", 101 cmd, sccb->header.response_code, rn); 102 rc = -EIO; 103 break; 104 } 105 out: 106 free_page((unsigned long)sccb); 107 return rc; 108 } 109 110 static int sclp_assign_storage(u16 rn) 111 { 112 unsigned long start; 113 int rc; 114 115 rc = do_assign_storage(SCLP_CMDW_ASSIGN_STORAGE, rn); 116 if (rc) 117 return rc; 118 start = rn2addr(rn); 119 storage_key_init_range(start, start + sclp.rzm); 120 return 0; 121 } 122 123 static int sclp_unassign_storage(u16 rn) 124 { 125 return do_assign_storage(SCLP_CMDW_UNASSIGN_STORAGE, rn); 126 } 127 128 static int sclp_attach_storage(u8 id) 129 { 130 struct attach_storage_sccb *sccb; 131 int rc, i; 132 133 sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); 134 if (!sccb) 135 return -ENOMEM; 136 sccb->header.length = PAGE_SIZE; 137 sccb->header.function_code = 0x40; 138 rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb, 139 SCLP_QUEUE_INTERVAL); 140 if (rc) 141 goto out; 142 switch (sccb->header.response_code) { 143 case 0x0020: 144 set_bit(id, sclp_storage_ids); 145 for (i = 0; i < sccb->assigned; i++) { 146 if (sccb->entries[i]) 147 sclp_unassign_storage(sccb->entries[i] >> 16); 148 } 149 break; 150 default: 151 rc = -EIO; 152 break; 153 } 154 out: 155 free_page((unsigned long)sccb); 156 return rc; 157 } 158 159 static int sclp_mem_change_state(unsigned long start, unsigned long size, 160 int online) 161 { 162 struct memory_increment *incr; 163 unsigned long istart; 164 int rc = 0; 165 166 list_for_each_entry(incr, &sclp_mem_list, list) { 167 istart = rn2addr(incr->rn); 168 if (start + size - 1 < istart) 169 break; 170 if (start > istart + sclp.rzm - 1) 171 continue; 172 if (online) 173 rc |= sclp_assign_storage(incr->rn); 174 else 175 sclp_unassign_storage(incr->rn); 176 if (rc == 0) 177 incr->standby = online ? 0 : 1; 178 } 179 return rc ? -EIO : 0; 180 } 181 182 static ssize_t sclp_config_mem_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 183 { 184 struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj); 185 186 return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->config)); 187 } 188 189 static ssize_t sclp_config_mem_store(struct kobject *kobj, struct kobj_attribute *attr, 190 const char *buf, size_t count) 191 { 192 unsigned long addr, block_size; 193 struct sclp_mem *sclp_mem; 194 struct memory_block *mem; 195 unsigned char id; 196 bool value; 197 int rc; 198 199 rc = kstrtobool(buf, &value); 200 if (rc) 201 return rc; 202 sclp_mem = container_of(kobj, struct sclp_mem, kobj); 203 block_size = memory_block_size_bytes(); 204 addr = sclp_mem->id * block_size; 205 /* 206 * Hold device_hotplug_lock when adding/removing memory blocks. 207 * Additionally, also protect calls to memory_block_get() and 208 * sclp_attach_storage(). 209 */ 210 rc = lock_device_hotplug_sysfs(); 211 if (rc) 212 goto out; 213 for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1) 214 sclp_attach_storage(id); 215 if (value) { 216 if (sclp_mem->config) 217 goto out_unlock; 218 rc = sclp_mem_change_state(addr, block_size, 1); 219 if (rc) 220 goto out_unlock; 221 /* 222 * Set entire memory block CMMA state to nodat. Later, when 223 * page tables pages are allocated via __add_memory(), those 224 * regions are marked __arch_set_page_dat(). 225 */ 226 __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT); 227 rc = __add_memory(0, addr, block_size, 228 sclp_mem->memmap_on_memory ? 229 MHP_MEMMAP_ON_MEMORY : MHP_NONE); 230 if (rc) { 231 sclp_mem_change_state(addr, block_size, 0); 232 goto out_unlock; 233 } 234 mem = memory_block_get(phys_to_block_id(addr)); 235 memory_block_put(mem); 236 WRITE_ONCE(sclp_mem->config, 1); 237 } else { 238 if (!sclp_mem->config) 239 goto out_unlock; 240 mem = memory_block_get(phys_to_block_id(addr)); 241 if (mem->state != MEM_OFFLINE) { 242 memory_block_put(mem); 243 rc = -EBUSY; 244 goto out_unlock; 245 } 246 memory_block_put(mem); 247 sclp_mem_change_state(addr, block_size, 0); 248 __remove_memory(addr, block_size); 249 #ifdef CONFIG_KASAN 250 if (sclp_mem->early_shadow_mapped) { 251 unsigned long start, end; 252 253 start = (unsigned long)kasan_mem_to_shadow(__va(addr)); 254 end = start + (block_size >> KASAN_SHADOW_SCALE_SHIFT); 255 vmemmap_free(start, end, NULL); 256 sclp_mem->early_shadow_mapped = 0; 257 } 258 #endif 259 WRITE_ONCE(sclp_mem->config, 0); 260 } 261 out_unlock: 262 unlock_device_hotplug(); 263 out: 264 return rc ? rc : count; 265 } 266 267 static struct kobj_attribute sclp_config_mem_attr = 268 __ATTR(config, 0644, sclp_config_mem_show, sclp_config_mem_store); 269 270 static ssize_t sclp_memmap_on_memory_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 271 { 272 struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj); 273 274 return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->memmap_on_memory)); 275 } 276 277 static ssize_t sclp_memmap_on_memory_store(struct kobject *kobj, struct kobj_attribute *attr, 278 const char *buf, size_t count) 279 { 280 struct sclp_mem *sclp_mem; 281 unsigned long block_size; 282 struct memory_block *mem; 283 bool value; 284 int rc; 285 286 rc = kstrtobool(buf, &value); 287 if (rc) 288 return rc; 289 if (value && !mhp_supports_memmap_on_memory()) 290 return -EOPNOTSUPP; 291 rc = lock_device_hotplug_sysfs(); 292 if (rc) 293 return rc; 294 block_size = memory_block_size_bytes(); 295 sclp_mem = container_of(kobj, struct sclp_mem, kobj); 296 mem = memory_block_get(phys_to_block_id(sclp_mem->id * block_size)); 297 if (!mem) { 298 WRITE_ONCE(sclp_mem->memmap_on_memory, value); 299 } else { 300 memory_block_put(mem); 301 rc = -EBUSY; 302 } 303 unlock_device_hotplug(); 304 return rc ? rc : count; 305 } 306 307 static const struct kobj_type ktype = { 308 .sysfs_ops = &kobj_sysfs_ops, 309 }; 310 311 static struct kobj_attribute sclp_memmap_attr = 312 __ATTR(memmap_on_memory, 0644, sclp_memmap_on_memory_show, sclp_memmap_on_memory_store); 313 314 static struct attribute *sclp_mem_attrs[] = { 315 &sclp_config_mem_attr.attr, 316 &sclp_memmap_attr.attr, 317 NULL, 318 }; 319 320 static struct attribute_group sclp_mem_attr_group = { 321 .attrs = sclp_mem_attrs, 322 }; 323 324 static int sclp_create_mem(struct sclp_mem *sclp_mem, struct kset *kset, 325 unsigned int id, bool config, bool memmap_on_memory) 326 { 327 int rc; 328 329 sclp_mem->memmap_on_memory = memmap_on_memory; 330 sclp_mem->config = config; 331 #ifdef CONFIG_KASAN 332 sclp_mem->early_shadow_mapped = config; 333 #endif 334 sclp_mem->id = id; 335 kobject_init(&sclp_mem->kobj, &ktype); 336 rc = kobject_add(&sclp_mem->kobj, &kset->kobj, "memory%d", id); 337 if (rc) 338 return rc; 339 return sysfs_create_group(&sclp_mem->kobj, &sclp_mem_attr_group); 340 } 341 342 static int sclp_create_configured_mem(struct memory_block *mem, void *argument) 343 { 344 struct sclp_mem *sclp_mems; 345 struct sclp_mem_arg *arg; 346 struct kset *kset; 347 unsigned int id; 348 349 id = mem->dev.id; 350 arg = (struct sclp_mem_arg *)argument; 351 sclp_mems = arg->sclp_mems; 352 kset = arg->kset; 353 return sclp_create_mem(&sclp_mems[id], kset, id, true, false); 354 } 355 356 static void __init align_to_block_size(unsigned long *start, 357 unsigned long *size, 358 unsigned long alignment) 359 { 360 unsigned long start_align, size_align; 361 362 start_align = roundup(*start, alignment); 363 size_align = rounddown(*start + *size, alignment) - start_align; 364 365 pr_info("Standby memory at 0x%lx (%luM of %luM usable)\n", 366 *start, size_align >> 20, *size >> 20); 367 *start = start_align; 368 *size = size_align; 369 } 370 371 static int __init sclp_create_standby_mems_merged(struct sclp_mem *sclp_mems, 372 struct kset *kset, u16 rn) 373 { 374 unsigned long start, size, addr, block_size; 375 static u16 first_rn, num; 376 unsigned int id; 377 int rc = 0; 378 379 if (rn && first_rn && (first_rn + num == rn)) { 380 num++; 381 return rc; 382 } 383 if (!first_rn) 384 goto skip_add; 385 start = rn2addr(first_rn); 386 size = (unsigned long)num * sclp.rzm; 387 if (start >= ident_map_size) 388 goto skip_add; 389 if (start + size > ident_map_size) 390 size = ident_map_size - start; 391 block_size = memory_block_size_bytes(); 392 align_to_block_size(&start, &size, block_size); 393 if (!size) 394 goto skip_add; 395 for (addr = start; addr < start + size; addr += block_size) { 396 id = addr / block_size; 397 rc = sclp_create_mem(&sclp_mems[id], kset, id, false, 398 mhp_supports_memmap_on_memory()); 399 if (rc) 400 break; 401 } 402 skip_add: 403 first_rn = rn; 404 num = 1; 405 return rc; 406 } 407 408 static int __init sclp_create_standby_mems(struct sclp_mem *sclp_mems, struct kset *kset) 409 { 410 struct memory_increment *incr; 411 int rc = 0; 412 413 list_for_each_entry(incr, &sclp_mem_list, list) { 414 if (incr->standby) 415 rc = sclp_create_standby_mems_merged(sclp_mems, kset, incr->rn); 416 if (rc) 417 return rc; 418 } 419 return sclp_create_standby_mems_merged(sclp_mems, kset, 0); 420 } 421 422 static int __init sclp_init_mem(void) 423 { 424 const unsigned long block_size = memory_block_size_bytes(); 425 unsigned int max_sclp_mems; 426 struct sclp_mem *sclp_mems; 427 struct sclp_mem_arg arg; 428 struct kset *kset; 429 int rc; 430 431 max_sclp_mems = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size; 432 /* Allocate memory for all blocks ahead of time. */ 433 sclp_mems = kzalloc_objs(struct sclp_mem, max_sclp_mems); 434 if (!sclp_mems) 435 return -ENOMEM; 436 kset = kset_create_and_add("memory", NULL, firmware_kobj); 437 if (!kset) 438 return -ENOMEM; 439 /* Initial memory is in the "configured" state already. */ 440 arg.sclp_mems = sclp_mems; 441 arg.kset = kset; 442 rc = for_each_memory_block(&arg, sclp_create_configured_mem); 443 if (rc) 444 return rc; 445 /* Standby memory is "deconfigured". */ 446 return sclp_create_standby_mems(sclp_mems, kset); 447 } 448 449 static void __init insert_increment(u16 rn, int standby, int assigned) 450 { 451 struct memory_increment *incr, *new_incr; 452 struct list_head *prev; 453 u16 last_rn; 454 455 new_incr = kzalloc_obj(*new_incr); 456 if (!new_incr) 457 return; 458 new_incr->rn = rn; 459 new_incr->standby = standby; 460 last_rn = 0; 461 prev = &sclp_mem_list; 462 list_for_each_entry(incr, &sclp_mem_list, list) { 463 if (assigned && incr->rn > rn) 464 break; 465 if (!assigned && incr->rn - last_rn > 1) 466 break; 467 last_rn = incr->rn; 468 prev = &incr->list; 469 } 470 if (!assigned) 471 new_incr->rn = last_rn + 1; 472 if (new_incr->rn > sclp.rnmax) { 473 kfree(new_incr); 474 return; 475 } 476 list_add(&new_incr->list, prev); 477 } 478 479 static int __init sclp_setup_memory(void) 480 { 481 struct read_storage_sccb *sccb; 482 int i, id, assigned, rc; 483 484 /* No standby memory in kdump mode */ 485 if (oldmem_data.start) 486 return 0; 487 if ((sclp.facilities & 0xe00000000000UL) != 0xe00000000000UL) 488 return 0; 489 rc = -ENOMEM; 490 sccb = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 491 if (!sccb) 492 goto out; 493 assigned = 0; 494 for (id = 0; id <= sclp_max_storage_id; id++) { 495 memset(sccb, 0, PAGE_SIZE); 496 sccb->header.length = PAGE_SIZE; 497 rc = sclp_sync_request(SCLP_CMDW_READ_STORAGE_INFO | id << 8, sccb); 498 if (rc) 499 goto out; 500 switch (sccb->header.response_code) { 501 case 0x0010: 502 set_bit(id, sclp_storage_ids); 503 for (i = 0; i < sccb->assigned; i++) { 504 if (!sccb->entries[i]) 505 continue; 506 assigned++; 507 insert_increment(sccb->entries[i] >> 16, 0, 1); 508 } 509 break; 510 case 0x0310: 511 break; 512 case 0x0410: 513 for (i = 0; i < sccb->assigned; i++) { 514 if (!sccb->entries[i]) 515 continue; 516 assigned++; 517 insert_increment(sccb->entries[i] >> 16, 1, 1); 518 } 519 break; 520 default: 521 rc = -EIO; 522 break; 523 } 524 if (!rc) 525 sclp_max_storage_id = sccb->max_id; 526 } 527 if (rc || list_empty(&sclp_mem_list)) 528 goto out; 529 for (i = 1; i <= sclp.rnmax - assigned; i++) 530 insert_increment(0, 1, 0); 531 rc = sclp_init_mem(); 532 out: 533 free_page((unsigned long)sccb); 534 return rc; 535 } 536 __initcall(sclp_setup_memory); 537