1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory hotplug support via sclp 4 * 5 * Copyright IBM Corp. 2025 6 */ 7 8 #define pr_fmt(fmt) "sclp_mem: " fmt 9 10 #include <linux/cpufeature.h> 11 #include <linux/container_of.h> 12 #include <linux/err.h> 13 #include <linux/errno.h> 14 #include <linux/init.h> 15 #include <linux/kobject.h> 16 #include <linux/kstrtox.h> 17 #include <linux/memory.h> 18 #include <linux/memory_hotplug.h> 19 #include <linux/mm.h> 20 #include <linux/mmzone.h> 21 #include <linux/slab.h> 22 #include <asm/facility.h> 23 #include <asm/page.h> 24 #include <asm/page-states.h> 25 #include <asm/sclp.h> 26 27 #include "sclp.h" 28 29 #define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001 30 #define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001 31 32 static LIST_HEAD(sclp_mem_list); 33 static u8 sclp_max_storage_id; 34 static DECLARE_BITMAP(sclp_storage_ids, 256); 35 36 struct memory_increment { 37 struct list_head list; 38 u16 rn; 39 int standby; 40 }; 41 42 struct sclp_mem { 43 struct kobject kobj; 44 unsigned int id; 45 unsigned int memmap_on_memory; 46 unsigned int config; 47 }; 48 49 struct sclp_mem_arg { 50 struct sclp_mem *sclp_mems; 51 struct kset *kset; 52 }; 53 54 struct assign_storage_sccb { 55 struct sccb_header header; 56 u16 rn; 57 } __packed; 58 59 struct attach_storage_sccb { 60 struct sccb_header header; 61 u16 :16; 62 u16 assigned; 63 u32 :32; 64 u32 entries[]; 65 } __packed; 66 67 int arch_get_memory_phys_device(unsigned long start_pfn) 68 { 69 if (!sclp.rzm) 70 return 0; 71 return PFN_PHYS(start_pfn) >> ilog2(sclp.rzm); 72 } 73 74 static unsigned long rn2addr(u16 rn) 75 { 76 return (unsigned long)(rn - 1) * sclp.rzm; 77 } 78 79 static int do_assign_storage(sclp_cmdw_t cmd, u16 rn) 80 { 81 struct assign_storage_sccb *sccb; 82 int rc; 83 84 sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); 85 if (!sccb) 86 return -ENOMEM; 87 sccb->header.length = PAGE_SIZE; 88 sccb->rn = rn; 89 rc = sclp_sync_request_timeout(cmd, sccb, SCLP_QUEUE_INTERVAL); 90 if (rc) 91 goto out; 92 switch (sccb->header.response_code) { 93 case 0x0020: 94 case 0x0120: 95 break; 96 default: 97 pr_warn("assign storage failed (cmd=0x%08x, response=0x%04x, rn=0x%04x)\n", 98 cmd, sccb->header.response_code, rn); 99 rc = -EIO; 100 break; 101 } 102 out: 103 free_page((unsigned long)sccb); 104 return rc; 105 } 106 107 static int sclp_assign_storage(u16 rn) 108 { 109 unsigned long start; 110 int rc; 111 112 rc = do_assign_storage(SCLP_CMDW_ASSIGN_STORAGE, rn); 113 if (rc) 114 return rc; 115 start = rn2addr(rn); 116 storage_key_init_range(start, start + sclp.rzm); 117 return 0; 118 } 119 120 static int sclp_unassign_storage(u16 rn) 121 { 122 return do_assign_storage(SCLP_CMDW_UNASSIGN_STORAGE, rn); 123 } 124 125 static int sclp_attach_storage(u8 id) 126 { 127 struct attach_storage_sccb *sccb; 128 int rc, i; 129 130 sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); 131 if (!sccb) 132 return -ENOMEM; 133 sccb->header.length = PAGE_SIZE; 134 sccb->header.function_code = 0x40; 135 rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb, 136 SCLP_QUEUE_INTERVAL); 137 if (rc) 138 goto out; 139 switch (sccb->header.response_code) { 140 case 0x0020: 141 set_bit(id, sclp_storage_ids); 142 for (i = 0; i < sccb->assigned; i++) { 143 if (sccb->entries[i]) 144 sclp_unassign_storage(sccb->entries[i] >> 16); 145 } 146 break; 147 default: 148 rc = -EIO; 149 break; 150 } 151 out: 152 free_page((unsigned long)sccb); 153 return rc; 154 } 155 156 static int sclp_mem_change_state(unsigned long start, unsigned long size, 157 int online) 158 { 159 struct memory_increment *incr; 160 unsigned long istart; 161 int rc = 0; 162 163 list_for_each_entry(incr, &sclp_mem_list, list) { 164 istart = rn2addr(incr->rn); 165 if (start + size - 1 < istart) 166 break; 167 if (start > istart + sclp.rzm - 1) 168 continue; 169 if (online) 170 rc |= sclp_assign_storage(incr->rn); 171 else 172 sclp_unassign_storage(incr->rn); 173 if (rc == 0) 174 incr->standby = online ? 0 : 1; 175 } 176 return rc ? -EIO : 0; 177 } 178 179 static ssize_t sclp_config_mem_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 180 { 181 struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj); 182 183 return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->config)); 184 } 185 186 static ssize_t sclp_config_mem_store(struct kobject *kobj, struct kobj_attribute *attr, 187 const char *buf, size_t count) 188 { 189 unsigned long addr, block_size; 190 struct sclp_mem *sclp_mem; 191 struct memory_block *mem; 192 unsigned char id; 193 bool value; 194 int rc; 195 196 rc = kstrtobool(buf, &value); 197 if (rc) 198 return rc; 199 sclp_mem = container_of(kobj, struct sclp_mem, kobj); 200 block_size = memory_block_size_bytes(); 201 addr = sclp_mem->id * block_size; 202 /* 203 * Hold device_hotplug_lock when adding/removing memory blocks. 204 * Additionally, also protect calls to find_memory_block() and 205 * sclp_attach_storage(). 206 */ 207 rc = lock_device_hotplug_sysfs(); 208 if (rc) 209 goto out; 210 for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1) 211 sclp_attach_storage(id); 212 if (value) { 213 if (sclp_mem->config) 214 goto out_unlock; 215 rc = sclp_mem_change_state(addr, block_size, 1); 216 if (rc) 217 goto out_unlock; 218 /* 219 * Set entire memory block CMMA state to nodat. Later, when 220 * page tables pages are allocated via __add_memory(), those 221 * regions are marked __arch_set_page_dat(). 222 */ 223 __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT); 224 rc = __add_memory(0, addr, block_size, 225 sclp_mem->memmap_on_memory ? 226 MHP_MEMMAP_ON_MEMORY : MHP_NONE); 227 if (rc) { 228 sclp_mem_change_state(addr, block_size, 0); 229 goto out_unlock; 230 } 231 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr))); 232 put_device(&mem->dev); 233 WRITE_ONCE(sclp_mem->config, 1); 234 } else { 235 if (!sclp_mem->config) 236 goto out_unlock; 237 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr))); 238 if (mem->state != MEM_OFFLINE) { 239 put_device(&mem->dev); 240 rc = -EBUSY; 241 goto out_unlock; 242 } 243 /* drop the ref just got via find_memory_block() */ 244 put_device(&mem->dev); 245 sclp_mem_change_state(addr, block_size, 0); 246 __remove_memory(addr, block_size); 247 WRITE_ONCE(sclp_mem->config, 0); 248 } 249 out_unlock: 250 unlock_device_hotplug(); 251 out: 252 return rc ? rc : count; 253 } 254 255 static struct kobj_attribute sclp_config_mem_attr = 256 __ATTR(config, 0644, sclp_config_mem_show, sclp_config_mem_store); 257 258 static ssize_t sclp_memmap_on_memory_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 259 { 260 struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj); 261 262 return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->memmap_on_memory)); 263 } 264 265 static ssize_t sclp_memmap_on_memory_store(struct kobject *kobj, struct kobj_attribute *attr, 266 const char *buf, size_t count) 267 { 268 struct sclp_mem *sclp_mem; 269 unsigned long block_size; 270 struct memory_block *mem; 271 bool value; 272 int rc; 273 274 rc = kstrtobool(buf, &value); 275 if (rc) 276 return rc; 277 if (value && !mhp_supports_memmap_on_memory()) 278 return -EOPNOTSUPP; 279 rc = lock_device_hotplug_sysfs(); 280 if (rc) 281 return rc; 282 block_size = memory_block_size_bytes(); 283 sclp_mem = container_of(kobj, struct sclp_mem, kobj); 284 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(sclp_mem->id * block_size))); 285 if (!mem) { 286 WRITE_ONCE(sclp_mem->memmap_on_memory, value); 287 } else { 288 put_device(&mem->dev); 289 rc = -EBUSY; 290 } 291 unlock_device_hotplug(); 292 return rc ? rc : count; 293 } 294 295 static const struct kobj_type ktype = { 296 .sysfs_ops = &kobj_sysfs_ops, 297 }; 298 299 static struct kobj_attribute sclp_memmap_attr = 300 __ATTR(memmap_on_memory, 0644, sclp_memmap_on_memory_show, sclp_memmap_on_memory_store); 301 302 static struct attribute *sclp_mem_attrs[] = { 303 &sclp_config_mem_attr.attr, 304 &sclp_memmap_attr.attr, 305 NULL, 306 }; 307 308 static struct attribute_group sclp_mem_attr_group = { 309 .attrs = sclp_mem_attrs, 310 }; 311 312 static int sclp_create_mem(struct sclp_mem *sclp_mem, struct kset *kset, 313 unsigned int id, bool config, bool memmap_on_memory) 314 { 315 int rc; 316 317 sclp_mem->memmap_on_memory = memmap_on_memory; 318 sclp_mem->config = config; 319 sclp_mem->id = id; 320 kobject_init(&sclp_mem->kobj, &ktype); 321 rc = kobject_add(&sclp_mem->kobj, &kset->kobj, "memory%d", id); 322 if (rc) 323 return rc; 324 return sysfs_create_group(&sclp_mem->kobj, &sclp_mem_attr_group); 325 } 326 327 static int sclp_create_configured_mem(struct memory_block *mem, void *argument) 328 { 329 struct sclp_mem *sclp_mems; 330 struct sclp_mem_arg *arg; 331 struct kset *kset; 332 unsigned int id; 333 334 id = mem->dev.id; 335 arg = (struct sclp_mem_arg *)argument; 336 sclp_mems = arg->sclp_mems; 337 kset = arg->kset; 338 return sclp_create_mem(&sclp_mems[id], kset, id, true, false); 339 } 340 341 static void __init align_to_block_size(unsigned long *start, 342 unsigned long *size, 343 unsigned long alignment) 344 { 345 unsigned long start_align, size_align; 346 347 start_align = roundup(*start, alignment); 348 size_align = rounddown(*start + *size, alignment) - start_align; 349 350 pr_info("Standby memory at 0x%lx (%luM of %luM usable)\n", 351 *start, size_align >> 20, *size >> 20); 352 *start = start_align; 353 *size = size_align; 354 } 355 356 static int __init sclp_create_standby_mems_merged(struct sclp_mem *sclp_mems, 357 struct kset *kset, u16 rn) 358 { 359 unsigned long start, size, addr, block_size; 360 static u16 first_rn, num; 361 unsigned int id; 362 int rc = 0; 363 364 if (rn && first_rn && (first_rn + num == rn)) { 365 num++; 366 return rc; 367 } 368 if (!first_rn) 369 goto skip_add; 370 start = rn2addr(first_rn); 371 size = (unsigned long)num * sclp.rzm; 372 if (start >= ident_map_size) 373 goto skip_add; 374 if (start + size > ident_map_size) 375 size = ident_map_size - start; 376 block_size = memory_block_size_bytes(); 377 align_to_block_size(&start, &size, block_size); 378 if (!size) 379 goto skip_add; 380 for (addr = start; addr < start + size; addr += block_size) { 381 id = addr / block_size; 382 rc = sclp_create_mem(&sclp_mems[id], kset, id, false, 383 mhp_supports_memmap_on_memory()); 384 if (rc) 385 break; 386 } 387 skip_add: 388 first_rn = rn; 389 num = 1; 390 return rc; 391 } 392 393 static int __init sclp_create_standby_mems(struct sclp_mem *sclp_mems, struct kset *kset) 394 { 395 struct memory_increment *incr; 396 int rc = 0; 397 398 list_for_each_entry(incr, &sclp_mem_list, list) { 399 if (incr->standby) 400 rc = sclp_create_standby_mems_merged(sclp_mems, kset, incr->rn); 401 if (rc) 402 return rc; 403 } 404 return sclp_create_standby_mems_merged(sclp_mems, kset, 0); 405 } 406 407 static int __init sclp_init_mem(void) 408 { 409 const unsigned long block_size = memory_block_size_bytes(); 410 unsigned int max_sclp_mems; 411 struct sclp_mem *sclp_mems; 412 struct sclp_mem_arg arg; 413 struct kset *kset; 414 int rc; 415 416 max_sclp_mems = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size; 417 /* Allocate memory for all blocks ahead of time. */ 418 sclp_mems = kcalloc(max_sclp_mems, sizeof(struct sclp_mem), GFP_KERNEL); 419 if (!sclp_mems) 420 return -ENOMEM; 421 kset = kset_create_and_add("memory", NULL, firmware_kobj); 422 if (!kset) 423 return -ENOMEM; 424 /* Initial memory is in the "configured" state already. */ 425 arg.sclp_mems = sclp_mems; 426 arg.kset = kset; 427 rc = for_each_memory_block(&arg, sclp_create_configured_mem); 428 if (rc) 429 return rc; 430 /* Standby memory is "deconfigured". */ 431 return sclp_create_standby_mems(sclp_mems, kset); 432 } 433 434 static void __init insert_increment(u16 rn, int standby, int assigned) 435 { 436 struct memory_increment *incr, *new_incr; 437 struct list_head *prev; 438 u16 last_rn; 439 440 new_incr = kzalloc(sizeof(*new_incr), GFP_KERNEL); 441 if (!new_incr) 442 return; 443 new_incr->rn = rn; 444 new_incr->standby = standby; 445 last_rn = 0; 446 prev = &sclp_mem_list; 447 list_for_each_entry(incr, &sclp_mem_list, list) { 448 if (assigned && incr->rn > rn) 449 break; 450 if (!assigned && incr->rn - last_rn > 1) 451 break; 452 last_rn = incr->rn; 453 prev = &incr->list; 454 } 455 if (!assigned) 456 new_incr->rn = last_rn + 1; 457 if (new_incr->rn > sclp.rnmax) { 458 kfree(new_incr); 459 return; 460 } 461 list_add(&new_incr->list, prev); 462 } 463 464 static int __init sclp_setup_memory(void) 465 { 466 struct read_storage_sccb *sccb; 467 int i, id, assigned, rc; 468 469 /* No standby memory in kdump mode */ 470 if (oldmem_data.start) 471 return 0; 472 if ((sclp.facilities & 0xe00000000000UL) != 0xe00000000000UL) 473 return 0; 474 rc = -ENOMEM; 475 sccb = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 476 if (!sccb) 477 goto out; 478 assigned = 0; 479 for (id = 0; id <= sclp_max_storage_id; id++) { 480 memset(sccb, 0, PAGE_SIZE); 481 sccb->header.length = PAGE_SIZE; 482 rc = sclp_sync_request(SCLP_CMDW_READ_STORAGE_INFO | id << 8, sccb); 483 if (rc) 484 goto out; 485 switch (sccb->header.response_code) { 486 case 0x0010: 487 set_bit(id, sclp_storage_ids); 488 for (i = 0; i < sccb->assigned; i++) { 489 if (!sccb->entries[i]) 490 continue; 491 assigned++; 492 insert_increment(sccb->entries[i] >> 16, 0, 1); 493 } 494 break; 495 case 0x0310: 496 break; 497 case 0x0410: 498 for (i = 0; i < sccb->assigned; i++) { 499 if (!sccb->entries[i]) 500 continue; 501 assigned++; 502 insert_increment(sccb->entries[i] >> 16, 1, 1); 503 } 504 break; 505 default: 506 rc = -EIO; 507 break; 508 } 509 if (!rc) 510 sclp_max_storage_id = sccb->max_id; 511 } 512 if (rc || list_empty(&sclp_mem_list)) 513 goto out; 514 for (i = 1; i <= sclp.rnmax - assigned; i++) 515 insert_increment(0, 1, 0); 516 rc = sclp_init_mem(); 517 out: 518 free_page((unsigned long)sccb); 519 return rc; 520 } 521 __initcall(sclp_setup_memory); 522