1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Memory hotplug support via sclp
4 *
5 * Copyright IBM Corp. 2025
6 */
7
8 #define pr_fmt(fmt) "sclp_mem: " fmt
9
10 #include <linux/cpufeature.h>
11 #include <linux/container_of.h>
12 #include <linux/err.h>
13 #include <linux/errno.h>
14 #include <linux/init.h>
15 #include <linux/kobject.h>
16 #include <linux/kstrtox.h>
17 #include <linux/memory.h>
18 #include <linux/memory_hotplug.h>
19 #include <linux/mm.h>
20 #include <linux/mmzone.h>
21 #include <linux/slab.h>
22 #include <asm/facility.h>
23 #include <asm/page.h>
24 #include <asm/page-states.h>
25 #include <asm/sclp.h>
26
27 #include "sclp.h"
28
29 #define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001
30 #define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001
31
32 static LIST_HEAD(sclp_mem_list);
33 static u8 sclp_max_storage_id;
34 static DECLARE_BITMAP(sclp_storage_ids, 256);
35
36 struct memory_increment {
37 struct list_head list;
38 u16 rn;
39 int standby;
40 };
41
42 struct sclp_mem {
43 struct kobject kobj;
44 unsigned int id;
45 unsigned int memmap_on_memory;
46 unsigned int config;
47 #ifdef CONFIG_KASAN
48 unsigned int early_shadow_mapped;
49 #endif
50 };
51
52 struct sclp_mem_arg {
53 struct sclp_mem *sclp_mems;
54 struct kset *kset;
55 };
56
57 struct assign_storage_sccb {
58 struct sccb_header header;
59 u16 rn;
60 } __packed;
61
62 struct attach_storage_sccb {
63 struct sccb_header header;
64 u16 :16;
65 u16 assigned;
66 u32 :32;
67 u32 entries[];
68 } __packed;
69
arch_get_memory_phys_device(unsigned long start_pfn)70 int arch_get_memory_phys_device(unsigned long start_pfn)
71 {
72 if (!sclp.rzm)
73 return 0;
74 return PFN_PHYS(start_pfn) >> ilog2(sclp.rzm);
75 }
76
rn2addr(u16 rn)77 static unsigned long rn2addr(u16 rn)
78 {
79 return (unsigned long)(rn - 1) * sclp.rzm;
80 }
81
do_assign_storage(sclp_cmdw_t cmd,u16 rn)82 static int do_assign_storage(sclp_cmdw_t cmd, u16 rn)
83 {
84 struct assign_storage_sccb *sccb;
85 int rc;
86
87 sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
88 if (!sccb)
89 return -ENOMEM;
90 sccb->header.length = PAGE_SIZE;
91 sccb->rn = rn;
92 rc = sclp_sync_request_timeout(cmd, sccb, SCLP_QUEUE_INTERVAL);
93 if (rc)
94 goto out;
95 switch (sccb->header.response_code) {
96 case 0x0020:
97 case 0x0120:
98 break;
99 default:
100 pr_warn("assign storage failed (cmd=0x%08x, response=0x%04x, rn=0x%04x)\n",
101 cmd, sccb->header.response_code, rn);
102 rc = -EIO;
103 break;
104 }
105 out:
106 free_page((unsigned long)sccb);
107 return rc;
108 }
109
sclp_assign_storage(u16 rn)110 static int sclp_assign_storage(u16 rn)
111 {
112 unsigned long start;
113 int rc;
114
115 rc = do_assign_storage(SCLP_CMDW_ASSIGN_STORAGE, rn);
116 if (rc)
117 return rc;
118 start = rn2addr(rn);
119 storage_key_init_range(start, start + sclp.rzm);
120 return 0;
121 }
122
sclp_unassign_storage(u16 rn)123 static int sclp_unassign_storage(u16 rn)
124 {
125 return do_assign_storage(SCLP_CMDW_UNASSIGN_STORAGE, rn);
126 }
127
sclp_attach_storage(u8 id)128 static int sclp_attach_storage(u8 id)
129 {
130 struct attach_storage_sccb *sccb;
131 int rc, i;
132
133 sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
134 if (!sccb)
135 return -ENOMEM;
136 sccb->header.length = PAGE_SIZE;
137 sccb->header.function_code = 0x40;
138 rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb,
139 SCLP_QUEUE_INTERVAL);
140 if (rc)
141 goto out;
142 switch (sccb->header.response_code) {
143 case 0x0020:
144 set_bit(id, sclp_storage_ids);
145 for (i = 0; i < sccb->assigned; i++) {
146 if (sccb->entries[i])
147 sclp_unassign_storage(sccb->entries[i] >> 16);
148 }
149 break;
150 default:
151 rc = -EIO;
152 break;
153 }
154 out:
155 free_page((unsigned long)sccb);
156 return rc;
157 }
158
sclp_mem_change_state(unsigned long start,unsigned long size,int online)159 static int sclp_mem_change_state(unsigned long start, unsigned long size,
160 int online)
161 {
162 struct memory_increment *incr;
163 unsigned long istart;
164 int rc = 0;
165
166 list_for_each_entry(incr, &sclp_mem_list, list) {
167 istart = rn2addr(incr->rn);
168 if (start + size - 1 < istart)
169 break;
170 if (start > istart + sclp.rzm - 1)
171 continue;
172 if (online)
173 rc |= sclp_assign_storage(incr->rn);
174 else
175 sclp_unassign_storage(incr->rn);
176 if (rc == 0)
177 incr->standby = online ? 0 : 1;
178 }
179 return rc ? -EIO : 0;
180 }
181
sclp_config_mem_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)182 static ssize_t sclp_config_mem_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
183 {
184 struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj);
185
186 return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->config));
187 }
188
sclp_config_mem_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)189 static ssize_t sclp_config_mem_store(struct kobject *kobj, struct kobj_attribute *attr,
190 const char *buf, size_t count)
191 {
192 unsigned long addr, block_size;
193 struct sclp_mem *sclp_mem;
194 struct memory_block *mem;
195 unsigned char id;
196 bool value;
197 int rc;
198
199 rc = kstrtobool(buf, &value);
200 if (rc)
201 return rc;
202 sclp_mem = container_of(kobj, struct sclp_mem, kobj);
203 block_size = memory_block_size_bytes();
204 addr = sclp_mem->id * block_size;
205 /*
206 * Hold device_hotplug_lock when adding/removing memory blocks.
207 * Additionally, also protect calls to find_memory_block() and
208 * sclp_attach_storage().
209 */
210 rc = lock_device_hotplug_sysfs();
211 if (rc)
212 goto out;
213 for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
214 sclp_attach_storage(id);
215 if (value) {
216 if (sclp_mem->config)
217 goto out_unlock;
218 rc = sclp_mem_change_state(addr, block_size, 1);
219 if (rc)
220 goto out_unlock;
221 /*
222 * Set entire memory block CMMA state to nodat. Later, when
223 * page tables pages are allocated via __add_memory(), those
224 * regions are marked __arch_set_page_dat().
225 */
226 __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT);
227 rc = __add_memory(0, addr, block_size,
228 sclp_mem->memmap_on_memory ?
229 MHP_MEMMAP_ON_MEMORY : MHP_NONE);
230 if (rc) {
231 sclp_mem_change_state(addr, block_size, 0);
232 goto out_unlock;
233 }
234 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr)));
235 put_device(&mem->dev);
236 WRITE_ONCE(sclp_mem->config, 1);
237 } else {
238 if (!sclp_mem->config)
239 goto out_unlock;
240 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr)));
241 if (mem->state != MEM_OFFLINE) {
242 put_device(&mem->dev);
243 rc = -EBUSY;
244 goto out_unlock;
245 }
246 /* drop the ref just got via find_memory_block() */
247 put_device(&mem->dev);
248 sclp_mem_change_state(addr, block_size, 0);
249 __remove_memory(addr, block_size);
250 #ifdef CONFIG_KASAN
251 if (sclp_mem->early_shadow_mapped) {
252 unsigned long start, end;
253
254 start = (unsigned long)kasan_mem_to_shadow(__va(addr));
255 end = start + (block_size >> KASAN_SHADOW_SCALE_SHIFT);
256 vmemmap_free(start, end, NULL);
257 sclp_mem->early_shadow_mapped = 0;
258 }
259 #endif
260 WRITE_ONCE(sclp_mem->config, 0);
261 }
262 out_unlock:
263 unlock_device_hotplug();
264 out:
265 return rc ? rc : count;
266 }
267
268 static struct kobj_attribute sclp_config_mem_attr =
269 __ATTR(config, 0644, sclp_config_mem_show, sclp_config_mem_store);
270
sclp_memmap_on_memory_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)271 static ssize_t sclp_memmap_on_memory_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
272 {
273 struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj);
274
275 return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->memmap_on_memory));
276 }
277
sclp_memmap_on_memory_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)278 static ssize_t sclp_memmap_on_memory_store(struct kobject *kobj, struct kobj_attribute *attr,
279 const char *buf, size_t count)
280 {
281 struct sclp_mem *sclp_mem;
282 unsigned long block_size;
283 struct memory_block *mem;
284 bool value;
285 int rc;
286
287 rc = kstrtobool(buf, &value);
288 if (rc)
289 return rc;
290 if (value && !mhp_supports_memmap_on_memory())
291 return -EOPNOTSUPP;
292 rc = lock_device_hotplug_sysfs();
293 if (rc)
294 return rc;
295 block_size = memory_block_size_bytes();
296 sclp_mem = container_of(kobj, struct sclp_mem, kobj);
297 mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(sclp_mem->id * block_size)));
298 if (!mem) {
299 WRITE_ONCE(sclp_mem->memmap_on_memory, value);
300 } else {
301 put_device(&mem->dev);
302 rc = -EBUSY;
303 }
304 unlock_device_hotplug();
305 return rc ? rc : count;
306 }
307
308 static const struct kobj_type ktype = {
309 .sysfs_ops = &kobj_sysfs_ops,
310 };
311
312 static struct kobj_attribute sclp_memmap_attr =
313 __ATTR(memmap_on_memory, 0644, sclp_memmap_on_memory_show, sclp_memmap_on_memory_store);
314
315 static struct attribute *sclp_mem_attrs[] = {
316 &sclp_config_mem_attr.attr,
317 &sclp_memmap_attr.attr,
318 NULL,
319 };
320
321 static struct attribute_group sclp_mem_attr_group = {
322 .attrs = sclp_mem_attrs,
323 };
324
sclp_create_mem(struct sclp_mem * sclp_mem,struct kset * kset,unsigned int id,bool config,bool memmap_on_memory)325 static int sclp_create_mem(struct sclp_mem *sclp_mem, struct kset *kset,
326 unsigned int id, bool config, bool memmap_on_memory)
327 {
328 int rc;
329
330 sclp_mem->memmap_on_memory = memmap_on_memory;
331 sclp_mem->config = config;
332 #ifdef CONFIG_KASAN
333 sclp_mem->early_shadow_mapped = config;
334 #endif
335 sclp_mem->id = id;
336 kobject_init(&sclp_mem->kobj, &ktype);
337 rc = kobject_add(&sclp_mem->kobj, &kset->kobj, "memory%d", id);
338 if (rc)
339 return rc;
340 return sysfs_create_group(&sclp_mem->kobj, &sclp_mem_attr_group);
341 }
342
sclp_create_configured_mem(struct memory_block * mem,void * argument)343 static int sclp_create_configured_mem(struct memory_block *mem, void *argument)
344 {
345 struct sclp_mem *sclp_mems;
346 struct sclp_mem_arg *arg;
347 struct kset *kset;
348 unsigned int id;
349
350 id = mem->dev.id;
351 arg = (struct sclp_mem_arg *)argument;
352 sclp_mems = arg->sclp_mems;
353 kset = arg->kset;
354 return sclp_create_mem(&sclp_mems[id], kset, id, true, false);
355 }
356
align_to_block_size(unsigned long * start,unsigned long * size,unsigned long alignment)357 static void __init align_to_block_size(unsigned long *start,
358 unsigned long *size,
359 unsigned long alignment)
360 {
361 unsigned long start_align, size_align;
362
363 start_align = roundup(*start, alignment);
364 size_align = rounddown(*start + *size, alignment) - start_align;
365
366 pr_info("Standby memory at 0x%lx (%luM of %luM usable)\n",
367 *start, size_align >> 20, *size >> 20);
368 *start = start_align;
369 *size = size_align;
370 }
371
sclp_create_standby_mems_merged(struct sclp_mem * sclp_mems,struct kset * kset,u16 rn)372 static int __init sclp_create_standby_mems_merged(struct sclp_mem *sclp_mems,
373 struct kset *kset, u16 rn)
374 {
375 unsigned long start, size, addr, block_size;
376 static u16 first_rn, num;
377 unsigned int id;
378 int rc = 0;
379
380 if (rn && first_rn && (first_rn + num == rn)) {
381 num++;
382 return rc;
383 }
384 if (!first_rn)
385 goto skip_add;
386 start = rn2addr(first_rn);
387 size = (unsigned long)num * sclp.rzm;
388 if (start >= ident_map_size)
389 goto skip_add;
390 if (start + size > ident_map_size)
391 size = ident_map_size - start;
392 block_size = memory_block_size_bytes();
393 align_to_block_size(&start, &size, block_size);
394 if (!size)
395 goto skip_add;
396 for (addr = start; addr < start + size; addr += block_size) {
397 id = addr / block_size;
398 rc = sclp_create_mem(&sclp_mems[id], kset, id, false,
399 mhp_supports_memmap_on_memory());
400 if (rc)
401 break;
402 }
403 skip_add:
404 first_rn = rn;
405 num = 1;
406 return rc;
407 }
408
sclp_create_standby_mems(struct sclp_mem * sclp_mems,struct kset * kset)409 static int __init sclp_create_standby_mems(struct sclp_mem *sclp_mems, struct kset *kset)
410 {
411 struct memory_increment *incr;
412 int rc = 0;
413
414 list_for_each_entry(incr, &sclp_mem_list, list) {
415 if (incr->standby)
416 rc = sclp_create_standby_mems_merged(sclp_mems, kset, incr->rn);
417 if (rc)
418 return rc;
419 }
420 return sclp_create_standby_mems_merged(sclp_mems, kset, 0);
421 }
422
sclp_init_mem(void)423 static int __init sclp_init_mem(void)
424 {
425 const unsigned long block_size = memory_block_size_bytes();
426 unsigned int max_sclp_mems;
427 struct sclp_mem *sclp_mems;
428 struct sclp_mem_arg arg;
429 struct kset *kset;
430 int rc;
431
432 max_sclp_mems = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
433 /* Allocate memory for all blocks ahead of time. */
434 sclp_mems = kcalloc(max_sclp_mems, sizeof(struct sclp_mem), GFP_KERNEL);
435 if (!sclp_mems)
436 return -ENOMEM;
437 kset = kset_create_and_add("memory", NULL, firmware_kobj);
438 if (!kset)
439 return -ENOMEM;
440 /* Initial memory is in the "configured" state already. */
441 arg.sclp_mems = sclp_mems;
442 arg.kset = kset;
443 rc = for_each_memory_block(&arg, sclp_create_configured_mem);
444 if (rc)
445 return rc;
446 /* Standby memory is "deconfigured". */
447 return sclp_create_standby_mems(sclp_mems, kset);
448 }
449
insert_increment(u16 rn,int standby,int assigned)450 static void __init insert_increment(u16 rn, int standby, int assigned)
451 {
452 struct memory_increment *incr, *new_incr;
453 struct list_head *prev;
454 u16 last_rn;
455
456 new_incr = kzalloc(sizeof(*new_incr), GFP_KERNEL);
457 if (!new_incr)
458 return;
459 new_incr->rn = rn;
460 new_incr->standby = standby;
461 last_rn = 0;
462 prev = &sclp_mem_list;
463 list_for_each_entry(incr, &sclp_mem_list, list) {
464 if (assigned && incr->rn > rn)
465 break;
466 if (!assigned && incr->rn - last_rn > 1)
467 break;
468 last_rn = incr->rn;
469 prev = &incr->list;
470 }
471 if (!assigned)
472 new_incr->rn = last_rn + 1;
473 if (new_incr->rn > sclp.rnmax) {
474 kfree(new_incr);
475 return;
476 }
477 list_add(&new_incr->list, prev);
478 }
479
sclp_setup_memory(void)480 static int __init sclp_setup_memory(void)
481 {
482 struct read_storage_sccb *sccb;
483 int i, id, assigned, rc;
484
485 /* No standby memory in kdump mode */
486 if (oldmem_data.start)
487 return 0;
488 if ((sclp.facilities & 0xe00000000000UL) != 0xe00000000000UL)
489 return 0;
490 rc = -ENOMEM;
491 sccb = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
492 if (!sccb)
493 goto out;
494 assigned = 0;
495 for (id = 0; id <= sclp_max_storage_id; id++) {
496 memset(sccb, 0, PAGE_SIZE);
497 sccb->header.length = PAGE_SIZE;
498 rc = sclp_sync_request(SCLP_CMDW_READ_STORAGE_INFO | id << 8, sccb);
499 if (rc)
500 goto out;
501 switch (sccb->header.response_code) {
502 case 0x0010:
503 set_bit(id, sclp_storage_ids);
504 for (i = 0; i < sccb->assigned; i++) {
505 if (!sccb->entries[i])
506 continue;
507 assigned++;
508 insert_increment(sccb->entries[i] >> 16, 0, 1);
509 }
510 break;
511 case 0x0310:
512 break;
513 case 0x0410:
514 for (i = 0; i < sccb->assigned; i++) {
515 if (!sccb->entries[i])
516 continue;
517 assigned++;
518 insert_increment(sccb->entries[i] >> 16, 1, 1);
519 }
520 break;
521 default:
522 rc = -EIO;
523 break;
524 }
525 if (!rc)
526 sclp_max_storage_id = sccb->max_id;
527 }
528 if (rc || list_empty(&sclp_mem_list))
529 goto out;
530 for (i = 1; i <= sclp.rnmax - assigned; i++)
531 insert_increment(0, 1, 0);
532 rc = sclp_init_mem();
533 out:
534 free_page((unsigned long)sccb);
535 return rc;
536 }
537 __initcall(sclp_setup_memory);
538