1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory hotplug support via sclp 4 * 5 * Copyright IBM Corp. 2025 6 */ 7 8 #define pr_fmt(fmt) "sclp_mem: " fmt 9 10 #include <linux/cpufeature.h> 11 #include <linux/container_of.h> 12 #include <linux/err.h> 13 #include <linux/errno.h> 14 #include <linux/init.h> 15 #include <linux/kobject.h> 16 #include <linux/kstrtox.h> 17 #include <linux/memory.h> 18 #include <linux/memory_hotplug.h> 19 #include <linux/mm.h> 20 #include <linux/mmzone.h> 21 #include <linux/slab.h> 22 #include <asm/facility.h> 23 #include <asm/page.h> 24 #include <asm/page-states.h> 25 #include <asm/sclp.h> 26 27 #include "sclp.h" 28 29 #define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001 30 #define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001 31 32 static LIST_HEAD(sclp_mem_list); 33 static u8 sclp_max_storage_id; 34 static DECLARE_BITMAP(sclp_storage_ids, 256); 35 36 struct memory_increment { 37 struct list_head list; 38 u16 rn; 39 int standby; 40 }; 41 42 struct sclp_mem { 43 struct kobject kobj; 44 unsigned int id; 45 unsigned int memmap_on_memory; 46 unsigned int config; 47 #ifdef CONFIG_KASAN 48 unsigned int early_shadow_mapped; 49 #endif 50 }; 51 52 struct sclp_mem_arg { 53 struct sclp_mem *sclp_mems; 54 struct kset *kset; 55 }; 56 57 struct assign_storage_sccb { 58 struct sccb_header header; 59 u16 rn; 60 } __packed; 61 62 struct attach_storage_sccb { 63 struct sccb_header header; 64 u16 :16; 65 u16 assigned; 66 u32 :32; 67 u32 entries[]; 68 } __packed; 69 70 int arch_get_memory_phys_device(unsigned long start_pfn) 71 { 72 if (!sclp.rzm) 73 return 0; 74 return PFN_PHYS(start_pfn) >> ilog2(sclp.rzm); 75 } 76 77 static unsigned long rn2addr(u16 rn) 78 { 79 return (unsigned long)(rn - 1) * sclp.rzm; 80 } 81 82 static int do_assign_storage(sclp_cmdw_t cmd, u16 rn) 83 { 84 struct assign_storage_sccb *sccb; 85 int rc; 86 87 sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); 88 if (!sccb) 89 return -ENOMEM; 90 sccb->header.length = PAGE_SIZE; 91 sccb->rn = rn; 92 rc = sclp_sync_request_timeout(cmd, sccb, SCLP_QUEUE_INTERVAL); 93 if (rc) 94 goto out; 95 switch (sccb->header.response_code) { 96 case 0x0020: 97 case 0x0120: 98 break; 99 default: 100 pr_warn("assign storage failed (cmd=0x%08x, response=0x%04x, rn=0x%04x)\n", 101 cmd, sccb->header.response_code, rn); 102 rc = -EIO; 103 break; 104 } 105 out: 106 free_page((unsigned long)sccb); 107 return rc; 108 } 109 110 static int sclp_assign_storage(u16 rn) 111 { 112 unsigned long start; 113 int rc; 114 115 rc = do_assign_storage(SCLP_CMDW_ASSIGN_STORAGE, rn); 116 if (rc) 117 return rc; 118 start = rn2addr(rn); 119 storage_key_init_range(start, start + sclp.rzm); 120 return 0; 121 } 122 123 static int sclp_unassign_storage(u16 rn) 124 { 125 return do_assign_storage(SCLP_CMDW_UNASSIGN_STORAGE, rn); 126 } 127 128 static int sclp_attach_storage(u8 id) 129 { 130 struct attach_storage_sccb *sccb; 131 int rc, i; 132 133 sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); 134 if (!sccb) 135 return -ENOMEM; 136 sccb->header.length = PAGE_SIZE; 137 sccb->header.function_code = 0x40; 138 rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb, 139 SCLP_QUEUE_INTERVAL); 140 if (rc) 141 goto out; 142 switch (sccb->header.response_code) { 143 case 0x0020: 144 set_bit(id, sclp_storage_ids); 145 for (i = 0; i < sccb->assigned; i++) { 146 if (sccb->entries[i]) 147 sclp_unassign_storage(sccb->entries[i] >> 16); 148 } 149 break; 150 default: 151 rc = -EIO; 152 break; 153 } 154 out: 155 free_page((unsigned long)sccb); 156 return rc; 157 } 158 159 static int sclp_mem_change_state(unsigned long start, unsigned long size, 160 int online) 161 { 162 struct memory_increment *incr; 163 unsigned long istart; 164 int rc = 0; 165 166 list_for_each_entry(incr, &sclp_mem_list, list) { 167 istart = rn2addr(incr->rn); 168 if (start + size - 1 < istart) 169 break; 170 if (start > istart + sclp.rzm - 1) 171 continue; 172 if (online) 173 rc |= sclp_assign_storage(incr->rn); 174 else 175 sclp_unassign_storage(incr->rn); 176 if (rc == 0) 177 incr->standby = online ? 0 : 1; 178 } 179 return rc ? -EIO : 0; 180 } 181 182 static ssize_t sclp_config_mem_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 183 { 184 struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj); 185 186 return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->config)); 187 } 188 189 static ssize_t sclp_config_mem_store(struct kobject *kobj, struct kobj_attribute *attr, 190 const char *buf, size_t count) 191 { 192 unsigned long addr, block_size; 193 struct sclp_mem *sclp_mem; 194 struct memory_block *mem; 195 unsigned char id; 196 bool value; 197 int rc; 198 199 rc = kstrtobool(buf, &value); 200 if (rc) 201 return rc; 202 sclp_mem = container_of(kobj, struct sclp_mem, kobj); 203 block_size = memory_block_size_bytes(); 204 addr = sclp_mem->id * block_size; 205 /* 206 * Hold device_hotplug_lock when adding/removing memory blocks. 207 * Additionally, also protect calls to find_memory_block() and 208 * sclp_attach_storage(). 209 */ 210 rc = lock_device_hotplug_sysfs(); 211 if (rc) 212 goto out; 213 for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1) 214 sclp_attach_storage(id); 215 if (value) { 216 if (sclp_mem->config) 217 goto out_unlock; 218 rc = sclp_mem_change_state(addr, block_size, 1); 219 if (rc) 220 goto out_unlock; 221 /* 222 * Set entire memory block CMMA state to nodat. Later, when 223 * page tables pages are allocated via __add_memory(), those 224 * regions are marked __arch_set_page_dat(). 225 */ 226 __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT); 227 rc = __add_memory(0, addr, block_size, 228 sclp_mem->memmap_on_memory ? 229 MHP_MEMMAP_ON_MEMORY : MHP_NONE); 230 if (rc) { 231 sclp_mem_change_state(addr, block_size, 0); 232 goto out_unlock; 233 } 234 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr))); 235 put_device(&mem->dev); 236 WRITE_ONCE(sclp_mem->config, 1); 237 } else { 238 if (!sclp_mem->config) 239 goto out_unlock; 240 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr))); 241 if (mem->state != MEM_OFFLINE) { 242 put_device(&mem->dev); 243 rc = -EBUSY; 244 goto out_unlock; 245 } 246 /* drop the ref just got via find_memory_block() */ 247 put_device(&mem->dev); 248 sclp_mem_change_state(addr, block_size, 0); 249 __remove_memory(addr, block_size); 250 #ifdef CONFIG_KASAN 251 if (sclp_mem->early_shadow_mapped) { 252 unsigned long start, end; 253 254 start = (unsigned long)kasan_mem_to_shadow(__va(addr)); 255 end = start + (block_size >> KASAN_SHADOW_SCALE_SHIFT); 256 vmemmap_free(start, end, NULL); 257 sclp_mem->early_shadow_mapped = 0; 258 } 259 #endif 260 WRITE_ONCE(sclp_mem->config, 0); 261 } 262 out_unlock: 263 unlock_device_hotplug(); 264 out: 265 return rc ? rc : count; 266 } 267 268 static struct kobj_attribute sclp_config_mem_attr = 269 __ATTR(config, 0644, sclp_config_mem_show, sclp_config_mem_store); 270 271 static ssize_t sclp_memmap_on_memory_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 272 { 273 struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj); 274 275 return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->memmap_on_memory)); 276 } 277 278 static ssize_t sclp_memmap_on_memory_store(struct kobject *kobj, struct kobj_attribute *attr, 279 const char *buf, size_t count) 280 { 281 struct sclp_mem *sclp_mem; 282 unsigned long block_size; 283 struct memory_block *mem; 284 bool value; 285 int rc; 286 287 rc = kstrtobool(buf, &value); 288 if (rc) 289 return rc; 290 if (value && !mhp_supports_memmap_on_memory()) 291 return -EOPNOTSUPP; 292 rc = lock_device_hotplug_sysfs(); 293 if (rc) 294 return rc; 295 block_size = memory_block_size_bytes(); 296 sclp_mem = container_of(kobj, struct sclp_mem, kobj); 297 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(sclp_mem->id * block_size))); 298 if (!mem) { 299 WRITE_ONCE(sclp_mem->memmap_on_memory, value); 300 } else { 301 put_device(&mem->dev); 302 rc = -EBUSY; 303 } 304 unlock_device_hotplug(); 305 return rc ? rc : count; 306 } 307 308 static const struct kobj_type ktype = { 309 .sysfs_ops = &kobj_sysfs_ops, 310 }; 311 312 static struct kobj_attribute sclp_memmap_attr = 313 __ATTR(memmap_on_memory, 0644, sclp_memmap_on_memory_show, sclp_memmap_on_memory_store); 314 315 static struct attribute *sclp_mem_attrs[] = { 316 &sclp_config_mem_attr.attr, 317 &sclp_memmap_attr.attr, 318 NULL, 319 }; 320 321 static struct attribute_group sclp_mem_attr_group = { 322 .attrs = sclp_mem_attrs, 323 }; 324 325 static int sclp_create_mem(struct sclp_mem *sclp_mem, struct kset *kset, 326 unsigned int id, bool config, bool memmap_on_memory) 327 { 328 int rc; 329 330 sclp_mem->memmap_on_memory = memmap_on_memory; 331 sclp_mem->config = config; 332 #ifdef CONFIG_KASAN 333 sclp_mem->early_shadow_mapped = config; 334 #endif 335 sclp_mem->id = id; 336 kobject_init(&sclp_mem->kobj, &ktype); 337 rc = kobject_add(&sclp_mem->kobj, &kset->kobj, "memory%d", id); 338 if (rc) 339 return rc; 340 return sysfs_create_group(&sclp_mem->kobj, &sclp_mem_attr_group); 341 } 342 343 static int sclp_create_configured_mem(struct memory_block *mem, void *argument) 344 { 345 struct sclp_mem *sclp_mems; 346 struct sclp_mem_arg *arg; 347 struct kset *kset; 348 unsigned int id; 349 350 id = mem->dev.id; 351 arg = (struct sclp_mem_arg *)argument; 352 sclp_mems = arg->sclp_mems; 353 kset = arg->kset; 354 return sclp_create_mem(&sclp_mems[id], kset, id, true, false); 355 } 356 357 static void __init align_to_block_size(unsigned long *start, 358 unsigned long *size, 359 unsigned long alignment) 360 { 361 unsigned long start_align, size_align; 362 363 start_align = roundup(*start, alignment); 364 size_align = rounddown(*start + *size, alignment) - start_align; 365 366 pr_info("Standby memory at 0x%lx (%luM of %luM usable)\n", 367 *start, size_align >> 20, *size >> 20); 368 *start = start_align; 369 *size = size_align; 370 } 371 372 static int __init sclp_create_standby_mems_merged(struct sclp_mem *sclp_mems, 373 struct kset *kset, u16 rn) 374 { 375 unsigned long start, size, addr, block_size; 376 static u16 first_rn, num; 377 unsigned int id; 378 int rc = 0; 379 380 if (rn && first_rn && (first_rn + num == rn)) { 381 num++; 382 return rc; 383 } 384 if (!first_rn) 385 goto skip_add; 386 start = rn2addr(first_rn); 387 size = (unsigned long)num * sclp.rzm; 388 if (start >= ident_map_size) 389 goto skip_add; 390 if (start + size > ident_map_size) 391 size = ident_map_size - start; 392 block_size = memory_block_size_bytes(); 393 align_to_block_size(&start, &size, block_size); 394 if (!size) 395 goto skip_add; 396 for (addr = start; addr < start + size; addr += block_size) { 397 id = addr / block_size; 398 rc = sclp_create_mem(&sclp_mems[id], kset, id, false, 399 mhp_supports_memmap_on_memory()); 400 if (rc) 401 break; 402 } 403 skip_add: 404 first_rn = rn; 405 num = 1; 406 return rc; 407 } 408 409 static int __init sclp_create_standby_mems(struct sclp_mem *sclp_mems, struct kset *kset) 410 { 411 struct memory_increment *incr; 412 int rc = 0; 413 414 list_for_each_entry(incr, &sclp_mem_list, list) { 415 if (incr->standby) 416 rc = sclp_create_standby_mems_merged(sclp_mems, kset, incr->rn); 417 if (rc) 418 return rc; 419 } 420 return sclp_create_standby_mems_merged(sclp_mems, kset, 0); 421 } 422 423 static int __init sclp_init_mem(void) 424 { 425 const unsigned long block_size = memory_block_size_bytes(); 426 unsigned int max_sclp_mems; 427 struct sclp_mem *sclp_mems; 428 struct sclp_mem_arg arg; 429 struct kset *kset; 430 int rc; 431 432 max_sclp_mems = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size; 433 /* Allocate memory for all blocks ahead of time. */ 434 sclp_mems = kcalloc(max_sclp_mems, sizeof(struct sclp_mem), GFP_KERNEL); 435 if (!sclp_mems) 436 return -ENOMEM; 437 kset = kset_create_and_add("memory", NULL, firmware_kobj); 438 if (!kset) 439 return -ENOMEM; 440 /* Initial memory is in the "configured" state already. */ 441 arg.sclp_mems = sclp_mems; 442 arg.kset = kset; 443 rc = for_each_memory_block(&arg, sclp_create_configured_mem); 444 if (rc) 445 return rc; 446 /* Standby memory is "deconfigured". */ 447 return sclp_create_standby_mems(sclp_mems, kset); 448 } 449 450 static void __init insert_increment(u16 rn, int standby, int assigned) 451 { 452 struct memory_increment *incr, *new_incr; 453 struct list_head *prev; 454 u16 last_rn; 455 456 new_incr = kzalloc(sizeof(*new_incr), GFP_KERNEL); 457 if (!new_incr) 458 return; 459 new_incr->rn = rn; 460 new_incr->standby = standby; 461 last_rn = 0; 462 prev = &sclp_mem_list; 463 list_for_each_entry(incr, &sclp_mem_list, list) { 464 if (assigned && incr->rn > rn) 465 break; 466 if (!assigned && incr->rn - last_rn > 1) 467 break; 468 last_rn = incr->rn; 469 prev = &incr->list; 470 } 471 if (!assigned) 472 new_incr->rn = last_rn + 1; 473 if (new_incr->rn > sclp.rnmax) { 474 kfree(new_incr); 475 return; 476 } 477 list_add(&new_incr->list, prev); 478 } 479 480 static int __init sclp_setup_memory(void) 481 { 482 struct read_storage_sccb *sccb; 483 int i, id, assigned, rc; 484 485 /* No standby memory in kdump mode */ 486 if (oldmem_data.start) 487 return 0; 488 if ((sclp.facilities & 0xe00000000000UL) != 0xe00000000000UL) 489 return 0; 490 rc = -ENOMEM; 491 sccb = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 492 if (!sccb) 493 goto out; 494 assigned = 0; 495 for (id = 0; id <= sclp_max_storage_id; id++) { 496 memset(sccb, 0, PAGE_SIZE); 497 sccb->header.length = PAGE_SIZE; 498 rc = sclp_sync_request(SCLP_CMDW_READ_STORAGE_INFO | id << 8, sccb); 499 if (rc) 500 goto out; 501 switch (sccb->header.response_code) { 502 case 0x0010: 503 set_bit(id, sclp_storage_ids); 504 for (i = 0; i < sccb->assigned; i++) { 505 if (!sccb->entries[i]) 506 continue; 507 assigned++; 508 insert_increment(sccb->entries[i] >> 16, 0, 1); 509 } 510 break; 511 case 0x0310: 512 break; 513 case 0x0410: 514 for (i = 0; i < sccb->assigned; i++) { 515 if (!sccb->entries[i]) 516 continue; 517 assigned++; 518 insert_increment(sccb->entries[i] >> 16, 1, 1); 519 } 520 break; 521 default: 522 rc = -EIO; 523 break; 524 } 525 if (!rc) 526 sclp_max_storage_id = sccb->max_id; 527 } 528 if (rc || list_empty(&sclp_mem_list)) 529 goto out; 530 for (i = 1; i <= sclp.rnmax - assigned; i++) 531 insert_increment(0, 1, 0); 532 rc = sclp_init_mem(); 533 out: 534 free_page((unsigned long)sccb); 535 return rc; 536 } 537 __initcall(sclp_setup_memory); 538