1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * sun4v Memory DR Module
28 */
29
30
31 #include <sys/types.h>
32 #include <sys/cmn_err.h>
33 #include <sys/vmem.h>
34 #include <sys/kmem.h>
35 #include <sys/systm.h>
36 #include <sys/machsystm.h> /* for page_freelist_coalesce() */
37 #include <sys/errno.h>
38 #include <sys/memnode.h>
39 #include <sys/memlist.h>
40 #include <sys/memlist_impl.h>
41 #include <sys/tuneable.h>
42 #include <sys/proc.h>
43 #include <sys/disp.h>
44 #include <sys/debug.h>
45 #include <sys/vm.h>
46 #include <sys/callb.h>
47 #include <sys/memlist_plat.h> /* for installed_top_size() */
48 #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */
49 #include <sys/dumphdr.h> /* for dump_resize() */
50 #include <sys/atomic.h> /* for use in stats collection */
51 #include <sys/rwlock.h>
52 #include <vm/seg_kmem.h>
53 #include <vm/seg_kpm.h>
54 #include <vm/page.h>
55 #include <vm/vm_dep.h>
56 #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */
57 #include <sys/sunddi.h>
58 #include <sys/mem_config.h>
59 #include <sys/mem_cage.h>
60 #include <sys/lgrp.h>
61 #include <sys/ddi.h>
62
63 #include <sys/modctl.h>
64 #include <sys/sysevent/dr.h>
65 #include <sys/mach_descrip.h>
66 #include <sys/mdesc.h>
67 #include <sys/ds.h>
68 #include <sys/drctl.h>
69 #include <sys/dr_util.h>
70 #include <sys/dr_mem.h>
71 #include <sys/suspend.h>
72
73
74 /*
75 * DR operations are subject to Memory Alignment restrictions
76 * for both address and the size of the request.
77 */
78 #define MA_ADDR 0x10000000 /* addr alignment 256M */
79 #define MA_SIZE 0x10000000 /* size alignment 256M */
80
81 #define MBLK_IS_VALID(m) \
82 (IS_P2ALIGNED((m)->addr, MA_ADDR) && IS_P2ALIGNED((m)->size, MA_SIZE))
83
84 static memhandle_t dr_mh; /* memory handle for delete */
85
86 static struct modlmisc modlmisc = {
87 &mod_miscops,
88 "sun4v memory DR"
89 };
90
91 static struct modlinkage modlinkage = {
92 MODREV_1,
93 (void *)&modlmisc,
94 NULL
95 };
96
97 static int dr_mem_allow_unload = 0;
98
99 typedef int (*fn_t)(dr_mem_blk_t *, int *);
100
101 /*
102 * Global Domain Services (DS) Handle
103 */
104 static ds_svc_hdl_t ds_handle;
105
106 /*
107 * Supported DS Capability Versions
108 */
109 static ds_ver_t dr_mem_vers[] = { { 1, 0 } };
110 #define DR_MEM_NVERS (sizeof (dr_mem_vers) / sizeof (dr_mem_vers[0]))
111
112 /*
113 * DS Capability Description
114 */
115 static ds_capability_t dr_mem_cap = {
116 DR_MEM_DS_ID, /* svc_id */
117 dr_mem_vers, /* vers */
118 DR_MEM_NVERS /* nvers */
119 };
120
121 /*
122 * DS Callbacks
123 */
124 static void dr_mem_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
125 static void dr_mem_unreg_handler(ds_cb_arg_t arg);
126 static void dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
127
128 /*
129 * DS Client Ops Vector
130 */
131 static ds_clnt_ops_t dr_mem_ops = {
132 dr_mem_reg_handler, /* ds_reg_cb */
133 dr_mem_unreg_handler, /* ds_unreg_cb */
134 dr_mem_data_handler, /* ds_data_cb */
135 NULL /* cb_arg */
136 };
137
138 /*
139 * Operation Results
140 *
141 * Used internally to gather results while an operation on a
142 * list of mblks is in progress. In particular, it is used to
143 * keep track of which mblks have already failed so that they are
144 * not processed further, and the manner in which they failed.
145 */
146 typedef struct {
147 uint64_t addr;
148 uint64_t size;
149 uint32_t result;
150 uint32_t status;
151 char *string;
152 } dr_mem_res_t;
153
154 static char *
155 dr_mem_estr[] = {
156 "operation succeeded", /* DR_MEM_RES_OK */
157 "operation failed", /* DR_MEM_RES_FAILURE */
158 "operation was blocked", /* DR_MEM_RES_BLOCKED */
159 "memory not defined in MD", /* DR_MEM_RES_NOT_IN_MD */
160 "memory already in use", /* DR_MEM_RES_ESPAN */
161 "memory access test failed", /* DR_MEM_RES_EFAULT */
162 "resource not available", /* DR_MEM_RES_ERESOURCE */
163 "permanent pages in span", /* DR_MEM_RES_PERM */
164 "memory span busy", /* DR_MEM_RES_EBUSY */
165 "VM viability test failed", /* DR_MEM_RES_ENOTVIABLE */
166 "no pages to unconfigure", /* DR_MEM_RES_ENOWORK */
167 "operation cancelled", /* DR_MEM_RES_ECANCELLED */
168 "operation refused", /* DR_MEM_RES_EREFUSED */
169 "memory span duplicate", /* DR_MEM_RES_EDUP */
170 "invalid argument" /* DR_MEM_RES_EINVAL */
171 };
172
173 static char *
174 dr_mem_estr_detail[] = {
175 "", /* DR_MEM_SRES_NONE */
176 "memory DR disabled after migration" /* DR_MEM_SRES_OS_SUSPENDED */
177 };
178
179 typedef struct {
180 kcondvar_t cond;
181 kmutex_t lock;
182 int error;
183 int done;
184 } mem_sync_t;
185
186 /*
187 * Internal Functions
188 */
189 static int dr_mem_init(void);
190 static int dr_mem_fini(void);
191
192 static int dr_mem_list_wrk(dr_mem_hdr_t *, dr_mem_hdr_t **, int *);
193 static int dr_mem_list_query(dr_mem_hdr_t *, dr_mem_hdr_t **, int *);
194 static int dr_mem_del_stat(dr_mem_hdr_t *, dr_mem_hdr_t **, int *);
195 static int dr_mem_del_cancel(dr_mem_hdr_t *, dr_mem_hdr_t **, int *);
196
197 static int dr_mem_unconfigure(dr_mem_blk_t *, int *);
198 static int dr_mem_configure(dr_mem_blk_t *, int *);
199 static void dr_mem_query(dr_mem_blk_t *, dr_mem_query_t *);
200
201 static dr_mem_res_t *dr_mem_res_array_init(dr_mem_hdr_t *, drctl_rsrc_t *, int);
202 static void dr_mem_res_array_fini(dr_mem_res_t *res, int nres);
203 static size_t dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res,
204 dr_mem_hdr_t **respp);
205
206 static int dr_mem_find(dr_mem_blk_t *mbp);
207 static mde_cookie_t dr_mem_find_node_md(dr_mem_blk_t *, md_t *, mde_cookie_t *);
208
209 static int mem_add(pfn_t, pgcnt_t);
210 static int mem_del(pfn_t, pgcnt_t);
211
212 extern int kphysm_add_memory_dynamic(pfn_t, pgcnt_t);
213
214 int
_init(void)215 _init(void)
216 {
217 int status;
218
219 /* check that Memory DR is enabled */
220 if (dr_is_disabled(DR_TYPE_MEM))
221 return (ENOTSUP);
222
223 if ((status = dr_mem_init()) != 0) {
224 cmn_err(CE_NOTE, "Memory DR initialization failed");
225 return (status);
226 }
227
228 if ((status = mod_install(&modlinkage)) != 0) {
229 (void) dr_mem_fini();
230 }
231
232 return (status);
233 }
234
235 int
_info(struct modinfo * modinfop)236 _info(struct modinfo *modinfop)
237 {
238 return (mod_info(&modlinkage, modinfop));
239 }
240
241 int
_fini(void)242 _fini(void)
243 {
244 int status;
245
246 if (dr_mem_allow_unload == 0)
247 return (EBUSY);
248
249 if ((status = mod_remove(&modlinkage)) == 0) {
250 (void) dr_mem_fini();
251 }
252
253 return (status);
254 }
255
256 static int
dr_mem_init(void)257 dr_mem_init(void)
258 {
259 int rv;
260
261 if ((rv = ds_cap_init(&dr_mem_cap, &dr_mem_ops)) != 0) {
262 cmn_err(CE_NOTE, "dr_mem: ds_cap_init failed: %d", rv);
263 return (rv);
264 }
265
266 return (0);
267 }
268
269 static int
dr_mem_fini(void)270 dr_mem_fini(void)
271 {
272 int rv;
273
274 if ((rv = ds_cap_fini(&dr_mem_cap)) != 0) {
275 cmn_err(CE_NOTE, "dr_mem: ds_cap_fini failed: %d", rv);
276 }
277
278 return (rv);
279 }
280
281 static void
dr_mem_reg_handler(ds_cb_arg_t arg,ds_ver_t * ver,ds_svc_hdl_t hdl)282 dr_mem_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
283 {
284 DR_DBG_MEM("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg,
285 ver->major, ver->minor, hdl);
286
287 ds_handle = hdl;
288 }
289
290 static void
dr_mem_unreg_handler(ds_cb_arg_t arg)291 dr_mem_unreg_handler(ds_cb_arg_t arg)
292 {
293 DR_DBG_MEM("unreg_handler: arg=0x%p\n", arg);
294
295 ds_handle = DS_INVALID_HDL;
296 }
297
298 /*ARGSUSED*/
299 static void
dr_mem_data_handler(ds_cb_arg_t arg,void * buf,size_t buflen)300 dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
301 {
302 dr_mem_hdr_t *req = buf;
303 dr_mem_hdr_t err_resp;
304 dr_mem_hdr_t *resp = &err_resp;
305 int resp_len = 0;
306 int rv = EINVAL;
307
308 /*
309 * Sanity check the message
310 */
311 if (buflen < sizeof (dr_mem_hdr_t)) {
312 DR_DBG_MEM("incoming message short: expected at least %ld "
313 "bytes, received %ld\n", sizeof (dr_mem_hdr_t), buflen);
314 goto done;
315 }
316
317 if (req == NULL) {
318 DR_DBG_MEM("empty message: expected at least %ld bytes\n",
319 sizeof (dr_mem_hdr_t));
320 goto done;
321 }
322
323 DR_DBG_MEM("incoming request:\n");
324 DR_DBG_DUMP_MSG(buf, buflen);
325
326 /*
327 * Process the command
328 */
329 switch (req->msg_type) {
330 case DR_MEM_CONFIGURE:
331 case DR_MEM_UNCONFIGURE:
332 if (req->msg_arg == 0) {
333 DR_DBG_MEM("No mblks specified for operation\n");
334 goto done;
335 }
336 if ((rv = dr_mem_list_wrk(req, &resp, &resp_len)) != 0) {
337 DR_DBG_MEM("%s failed (%d)\n",
338 (req->msg_type == DR_MEM_CONFIGURE) ?
339 "Memory configure" : "Memory unconfigure", rv);
340 }
341 break;
342
343 case DR_MEM_UNCONF_STATUS:
344 if ((rv = dr_mem_del_stat(req, &resp, &resp_len)) != 0)
345 DR_DBG_MEM("Memory delete status failed (%d)\n", rv);
346 break;
347
348 case DR_MEM_UNCONF_CANCEL:
349 if ((rv = dr_mem_del_cancel(req, &resp, &resp_len)) != 0)
350 DR_DBG_MEM("Memory delete cancel failed (%d)\n", rv);
351 break;
352
353 case DR_MEM_QUERY:
354 if (req->msg_arg == 0) {
355 DR_DBG_MEM("No mblks specified for operation\n");
356 goto done;
357 }
358 if ((rv = dr_mem_list_query(req, &resp, &resp_len)) != 0)
359 DR_DBG_MEM("Memory query failed (%d)\n", rv);
360 break;
361
362 default:
363 cmn_err(CE_NOTE, "unsupported memory DR operation (%d)",
364 req->msg_type);
365 break;
366 }
367
368 done:
369 /* check if an error occurred */
370 if (resp == &err_resp) {
371 resp->req_num = (req) ? req->req_num : 0;
372 resp->msg_type = DR_MEM_ERROR;
373 resp->msg_arg = rv;
374 resp_len = sizeof (dr_mem_hdr_t);
375 }
376
377 DR_DBG_MEM("outgoing response:\n");
378 DR_DBG_DUMP_MSG(resp, resp_len);
379
380 /* send back the response */
381 if (ds_cap_send(ds_handle, resp, resp_len) != 0) {
382 DR_DBG_MEM("ds_send failed\n");
383 }
384
385 /* free any allocated memory */
386 if (resp != &err_resp) {
387 kmem_free(resp, resp_len);
388 }
389 }
390
391 static char *
dr_mem_get_errstr(int result,int subresult)392 dr_mem_get_errstr(int result, int subresult)
393 {
394 size_t len;
395 char *errstr;
396 const char *separator = ": ";
397
398 if (subresult == DR_MEM_SRES_NONE)
399 return (i_ddi_strdup(dr_mem_estr[result], KM_SLEEP));
400
401 len = snprintf(NULL, 0, "%s%s%s", dr_mem_estr[result],
402 separator, dr_mem_estr_detail[subresult]) + 1;
403
404 errstr = kmem_alloc(len, KM_SLEEP);
405
406 (void) snprintf(errstr, len, "%s%s%s", dr_mem_estr[result],
407 separator, dr_mem_estr_detail[subresult]);
408
409 return (errstr);
410 }
411
412 /*
413 * Common routine to config or unconfig multiple mblks.
414 *
415 * Note: Do not modify result buffer or length on error.
416 */
417 static int
dr_mem_list_wrk(dr_mem_hdr_t * req,dr_mem_hdr_t ** resp,int * resp_len)418 dr_mem_list_wrk(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len)
419 {
420 int rv;
421 int idx;
422 int count;
423 int result;
424 int subresult;
425 int status;
426 boolean_t suspend_allows_dr;
427 fn_t dr_fn;
428 int se_hint;
429 dr_mem_blk_t *req_mblks;
430 dr_mem_res_t *res;
431 int drctl_cmd;
432 int drctl_flags = 0;
433 drctl_rsrc_t *drctl_req;
434 size_t drctl_req_len;
435 drctl_resp_t *drctl_resp;
436 drctl_rsrc_t *drctl_rsrc;
437 size_t drctl_resp_len = 0;
438 drctl_cookie_t drctl_res_ck;
439
440 ASSERT((req != NULL) && (req->msg_arg != 0));
441
442 count = req->msg_arg;
443
444 /*
445 * Extract all information that is specific
446 * to the various types of operations.
447 */
448 switch (req->msg_type) {
449 case DR_MEM_CONFIGURE:
450 dr_fn = dr_mem_configure;
451 drctl_cmd = DRCTL_MEM_CONFIG_REQUEST;
452 se_hint = SE_HINT_INSERT;
453 break;
454 case DR_MEM_UNCONFIGURE:
455 dr_fn = dr_mem_unconfigure;
456 drctl_cmd = DRCTL_MEM_UNCONFIG_REQUEST;
457 se_hint = SE_HINT_REMOVE;
458 break;
459 default:
460 /* Programming error if we reach this. */
461 cmn_err(CE_NOTE, "%s: bad msg_type %d\n",
462 __func__, req->msg_type);
463 ASSERT(0);
464 return (-1);
465 }
466
467 /* the incoming array of mblks to operate on */
468 req_mblks = DR_MEM_CMD_MBLKS(req);
469
470 /* allocate drctl request msg based on incoming resource count */
471 drctl_req_len = sizeof (drctl_rsrc_t) * count;
472 drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP);
473
474 /* copy the size for the drctl call from the incoming request msg */
475 for (idx = 0; idx < count; idx++) {
476 drctl_req[idx].res_mem_addr = req_mblks[idx].addr;
477 drctl_req[idx].res_mem_size = req_mblks[idx].size;
478 }
479
480 rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req,
481 count, &drctl_resp, &drctl_resp_len, &drctl_res_ck);
482
483 ASSERT((drctl_resp != NULL) && (drctl_resp_len != 0));
484
485 if (rv != 0) {
486 DR_DBG_MEM("%s: drctl_config_init returned: %d\n",
487 __func__, rv);
488 kmem_free(drctl_resp, drctl_resp_len);
489 kmem_free(drctl_req, drctl_req_len);
490 return (rv);
491 }
492
493 ASSERT(drctl_resp->resp_type == DRCTL_RESP_OK);
494
495 drctl_rsrc = drctl_resp->resp_resources;
496
497 /* create the result scratch array */
498 res = dr_mem_res_array_init(req, drctl_rsrc, count);
499
500 /*
501 * Memory DR operations are not safe if we have been suspended and
502 * resumed. Until this limitation is lifted, check to see if memory
503 * DR operations are permitted at this time by the suspend subsystem.
504 */
505 if ((suspend_allows_dr = suspend_memdr_allowed()) == B_FALSE) {
506 result = DR_MEM_RES_BLOCKED;
507 subresult = DR_MEM_SRES_OS_SUSPENDED;
508 } else {
509 subresult = DR_MEM_SRES_NONE;
510 }
511
512 /* perform the specified operation on each of the mblks */
513 for (idx = 0; idx < count; idx++) {
514 /*
515 * If no action will be taken against the current
516 * mblk, update the drctl resource information to
517 * ensure that it gets recovered properly during
518 * the drctl fini() call.
519 */
520 if (res[idx].result != DR_MEM_RES_OK) {
521 drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE;
522 continue;
523 }
524
525 /*
526 * If memory DR operations are permitted at this time by
527 * the suspend subsystem, call the function to perform the
528 * operation, otherwise return a result indicating that the
529 * operation was blocked.
530 */
531 if (suspend_allows_dr)
532 result = (*dr_fn)(&req_mblks[idx], &status);
533
534 /* save off results of the operation */
535 res[idx].result = result;
536 res[idx].status = status;
537 res[idx].addr = req_mblks[idx].addr; /* for partial case */
538 res[idx].size = req_mblks[idx].size; /* for partial case */
539 res[idx].string = dr_mem_get_errstr(result, subresult);
540
541 /* save result for drctl fini() reusing init() msg memory */
542 drctl_req[idx].status = (result != DR_MEM_RES_OK) ?
543 DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS;
544
545 DR_DBG_MEM("%s: mblk 0x%lx.0x%lx stat %d result %d off '%s'\n",
546 __func__, req_mblks[idx].addr, req_mblks[idx].size,
547 drctl_req[idx].status, result,
548 (res[idx].string) ? res[idx].string : "");
549 }
550
551 if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0)
552 DR_DBG_MEM("%s: drctl_config_fini returned: %d\n",
553 __func__, rv);
554
555 /*
556 * Operation completed without any fatal errors.
557 * Pack the response for transmission.
558 */
559 *resp_len = dr_mem_pack_response(req, res, resp);
560
561 /* notify interested parties about the operation */
562 dr_generate_event(DR_TYPE_MEM, se_hint);
563
564 /*
565 * Deallocate any scratch memory.
566 */
567 kmem_free(drctl_resp, drctl_resp_len);
568 kmem_free(drctl_req, drctl_req_len);
569
570 dr_mem_res_array_fini(res, count);
571
572 return (0);
573 }
574
575 /*
576 * Allocate and initialize a result array based on the initial
577 * drctl operation. A valid result array is always returned.
578 */
579 static dr_mem_res_t *
dr_mem_res_array_init(dr_mem_hdr_t * req,drctl_rsrc_t * rsrc,int nrsrc)580 dr_mem_res_array_init(dr_mem_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc)
581 {
582 int idx;
583 dr_mem_res_t *res;
584 char *err_str;
585 size_t err_len;
586
587 /* allocate zero filled buffer to initialize fields */
588 res = kmem_zalloc(nrsrc * sizeof (dr_mem_res_t), KM_SLEEP);
589
590 /*
591 * Fill in the result information for each resource.
592 */
593 for (idx = 0; idx < nrsrc; idx++) {
594 res[idx].addr = rsrc[idx].res_mem_addr;
595 res[idx].size = rsrc[idx].res_mem_size;
596 res[idx].result = DR_MEM_RES_OK;
597
598 if (rsrc[idx].status == DRCTL_STATUS_ALLOW)
599 continue;
600
601 /*
602 * Update the state information for this mblk.
603 */
604 res[idx].result = DR_MEM_RES_BLOCKED;
605 res[idx].status = (req->msg_type == DR_MEM_CONFIGURE) ?
606 DR_MEM_STAT_UNCONFIGURED : DR_MEM_STAT_CONFIGURED;
607
608 /*
609 * If an error string exists, copy it out of the
610 * message buffer. This eliminates any dependency
611 * on the memory allocated for the message buffer
612 * itself.
613 */
614 if (rsrc[idx].offset != NULL) {
615 err_str = (char *)rsrc + rsrc[idx].offset;
616 err_len = strlen(err_str) + 1;
617
618 res[idx].string = kmem_alloc(err_len, KM_SLEEP);
619 bcopy(err_str, res[idx].string, err_len);
620 }
621 }
622
623 return (res);
624 }
625
626 static void
dr_mem_res_array_fini(dr_mem_res_t * res,int nres)627 dr_mem_res_array_fini(dr_mem_res_t *res, int nres)
628 {
629 int idx;
630 size_t str_len;
631
632 for (idx = 0; idx < nres; idx++) {
633 /* deallocate the error string if present */
634 if (res[idx].string) {
635 str_len = strlen(res[idx].string) + 1;
636 kmem_free(res[idx].string, str_len);
637 }
638 }
639
640 /* deallocate the result array itself */
641 kmem_free(res, sizeof (dr_mem_res_t) * nres);
642 }
643
644 /*
645 * Allocate and pack a response message for transmission based
646 * on the specified result array. A valid response message and
647 * valid size information is always returned.
648 */
649 static size_t
dr_mem_pack_response(dr_mem_hdr_t * req,dr_mem_res_t * res,dr_mem_hdr_t ** respp)650 dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, dr_mem_hdr_t **respp)
651 {
652 int idx;
653 dr_mem_hdr_t *resp;
654 dr_mem_stat_t *resp_stat;
655 size_t resp_len;
656 uint32_t curr_off;
657 caddr_t curr_str;
658 size_t str_len;
659 size_t stat_len;
660 int nstat = req->msg_arg;
661
662 /*
663 * Calculate the size of the response message
664 * and allocate an appropriately sized buffer.
665 */
666 resp_len = sizeof (dr_mem_hdr_t);
667
668 /* add the stat array size */
669 stat_len = sizeof (dr_mem_stat_t) * nstat;
670 resp_len += stat_len;
671
672 /* add the size of any error strings */
673 for (idx = 0; idx < nstat; idx++) {
674 if (res[idx].string != NULL) {
675 resp_len += strlen(res[idx].string) + 1;
676 }
677 }
678
679 /* allocate the message buffer */
680 resp = kmem_zalloc(resp_len, KM_SLEEP);
681
682 /*
683 * Fill in the header information.
684 */
685 resp->req_num = req->req_num;
686 resp->msg_type = DR_MEM_OK;
687 resp->msg_arg = nstat;
688
689 /*
690 * Fill in the stat information.
691 */
692 resp_stat = DR_MEM_RESP_STATS(resp);
693
694 /* string offsets start immediately after stat array */
695 curr_off = sizeof (dr_mem_hdr_t) + stat_len;
696 curr_str = (char *)resp_stat + stat_len;
697
698 for (idx = 0; idx < nstat; idx++) {
699 resp_stat[idx].addr = res[idx].addr;
700 resp_stat[idx].size = res[idx].size;
701 resp_stat[idx].result = res[idx].result;
702 resp_stat[idx].status = res[idx].status;
703
704 if (res[idx].string != NULL) {
705 /* copy over the error string */
706 str_len = strlen(res[idx].string) + 1;
707 bcopy(res[idx].string, curr_str, str_len);
708 resp_stat[idx].string_off = curr_off;
709
710 curr_off += str_len;
711 curr_str += str_len;
712 }
713 }
714
715 /* buffer should be exactly filled */
716 ASSERT(curr_off == resp_len);
717
718 *respp = resp;
719 return (resp_len);
720 }
721
722 static void
dr_mem_query(dr_mem_blk_t * mbp,dr_mem_query_t * mqp)723 dr_mem_query(dr_mem_blk_t *mbp, dr_mem_query_t *mqp)
724 {
725 memquery_t mq;
726
727 DR_DBG_MEM("dr_mem_query...\n");
728
729
730 (void) kphysm_del_span_query(btop(mbp->addr), btop(mbp->size), &mq);
731
732 if (!mq.phys_pages)
733 return;
734
735 mqp->addr = mbp->addr;
736 mqp->mq.phys_pages = ptob(mq.phys_pages);
737 mqp->mq.managed = ptob(mq.managed);
738 mqp->mq.nonrelocatable = ptob(mq.nonrelocatable);
739 mqp->mq.first_nonrelocatable = ptob(mq.first_nonrelocatable);
740 mqp->mq.last_nonrelocatable = ptob(mq.last_nonrelocatable);
741 /*
742 * Set to the max byte offset within the page.
743 */
744 if (mqp->mq.nonrelocatable)
745 mqp->mq.last_nonrelocatable += PAGESIZE - 1;
746 }
747
748 /*
749 * Do not modify result buffer or length on error.
750 */
751 static int
dr_mem_list_query(dr_mem_hdr_t * req,dr_mem_hdr_t ** resp,int * resp_len)752 dr_mem_list_query(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len)
753 {
754 int idx;
755 int rlen;
756 int nml;
757 struct memlist *ml;
758 struct memlist *phys_copy = NULL;
759 dr_mem_blk_t *req_mblks, mb;
760 dr_mem_hdr_t *rp;
761 dr_mem_query_t *stat;
762
763 drctl_block();
764
765 /* the incoming array of req_mblks to configure */
766 req_mblks = DR_MEM_CMD_MBLKS(req);
767
768 /* allocate a response message, should be freed by caller */
769 nml = 0;
770 rlen = sizeof (dr_mem_hdr_t);
771 if (req_mblks->addr == NULL && req_mblks->size == 0) {
772 /*
773 * Request is for domain's full view of it's memory.
774 * place a copy in phys_copy then release the memlist lock.
775 */
776 memlist_read_lock();
777 phys_copy = dr_memlist_dup(phys_install);
778 memlist_read_unlock();
779
780 for (ml = phys_copy; ml; ml = ml->ml_next)
781 nml++;
782
783 rlen += nml * sizeof (dr_mem_query_t);
784 } else {
785 rlen += req->msg_arg * sizeof (dr_mem_query_t);
786 }
787 rp = kmem_zalloc(rlen, KM_SLEEP);
788
789 /* fill in the known data */
790 rp->req_num = req->req_num;
791 rp->msg_type = DR_MEM_OK;
792 rp->msg_arg = nml ? nml : req->msg_arg;
793
794 /* stat array for the response */
795 stat = DR_MEM_RESP_QUERY(rp);
796
797 /* get the status for each of the mblocks */
798 if (nml) {
799 for (idx = 0, ml = phys_copy; ml; ml = ml->ml_next, idx++) {
800 mb.addr = ml->ml_address;
801 mb.size = ml->ml_size;
802 dr_mem_query(&mb, &stat[idx]);
803 }
804 } else {
805 for (idx = 0; idx < req->msg_arg; idx++)
806 dr_mem_query(&req_mblks[idx], &stat[idx]);
807 }
808
809 *resp = rp;
810 *resp_len = rlen;
811 if (phys_copy != NULL) {
812 dr_memlist_delete(phys_copy);
813 }
814 drctl_unblock();
815
816 return (0);
817 }
818
819 static int
cvt_err(int err)820 cvt_err(int err)
821 {
822 int rv;
823
824 switch (err) {
825 case KPHYSM_OK:
826 rv = DR_MEM_RES_OK;
827 break;
828 case KPHYSM_ESPAN:
829 rv = DR_MEM_RES_ESPAN;
830 break;
831 case KPHYSM_EFAULT:
832 rv = DR_MEM_RES_EFAULT;
833 break;
834 case KPHYSM_ERESOURCE:
835 rv = DR_MEM_RES_ERESOURCE;
836 break;
837 case KPHYSM_ENOTSUP:
838 case KPHYSM_ENOHANDLES:
839 rv = DR_MEM_RES_FAILURE;
840 break;
841 case KPHYSM_ENONRELOC:
842 rv = DR_MEM_RES_PERM;
843 break;
844 case KPHYSM_EHANDLE:
845 rv = DR_MEM_RES_FAILURE;
846 break;
847 case KPHYSM_EBUSY:
848 rv = DR_MEM_RES_EBUSY;
849 break;
850 case KPHYSM_ENOTVIABLE:
851 rv = DR_MEM_RES_ENOTVIABLE;
852 break;
853 case KPHYSM_ESEQUENCE:
854 rv = DR_MEM_RES_FAILURE;
855 break;
856 case KPHYSM_ENOWORK:
857 rv = DR_MEM_RES_ENOWORK;
858 break;
859 case KPHYSM_ECANCELLED:
860 rv = DR_MEM_RES_ECANCELLED;
861 break;
862 case KPHYSM_EREFUSED:
863 rv = DR_MEM_RES_EREFUSED;
864 break;
865 case KPHYSM_ENOTFINISHED:
866 case KPHYSM_ENOTRUNNING:
867 rv = DR_MEM_RES_FAILURE;
868 break;
869 case KPHYSM_EDUP:
870 rv = DR_MEM_RES_EDUP;
871 break;
872 default:
873 rv = DR_MEM_RES_FAILURE;
874 break;
875 }
876
877 return (rv);
878 }
879
880 static int
dr_mem_configure(dr_mem_blk_t * mbp,int * status)881 dr_mem_configure(dr_mem_blk_t *mbp, int *status)
882 {
883 int rv;
884 uint64_t addr, size;
885
886 rv = 0;
887 addr = mbp->addr;
888 size = mbp->size;
889
890 DR_DBG_MEM("dr_mem_configure...\n");
891
892 if (!MBLK_IS_VALID(mbp)) {
893 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", addr, size);
894 *status = DR_MEM_STAT_UNCONFIGURED;
895 rv = DR_MEM_RES_EINVAL;
896 } else if (rv = dr_mem_find(mbp)) {
897 DR_DBG_MEM("failed to find mblk 0x%lx.0x%lx (%d)\n",
898 addr, size, rv);
899 if (rv == EINVAL) {
900 *status = DR_MEM_STAT_NOT_PRESENT;
901 rv = DR_MEM_RES_NOT_IN_MD;
902 } else {
903 *status = DR_MEM_STAT_UNCONFIGURED;
904 rv = DR_MEM_RES_FAILURE;
905 }
906 } else {
907 rv = mem_add(btop(addr), btop(size));
908 DR_DBG_MEM("addr=0x%lx size=0x%lx rv=%d\n", addr, size, rv);
909 if (rv) {
910 *status = DR_MEM_STAT_UNCONFIGURED;
911 } else {
912 *status = DR_MEM_STAT_CONFIGURED;
913 }
914 }
915
916 return (rv);
917 }
918
919 static int
dr_mem_unconfigure(dr_mem_blk_t * mbp,int * status)920 dr_mem_unconfigure(dr_mem_blk_t *mbp, int *status)
921 {
922 int rv;
923
924 DR_DBG_MEM("dr_mem_unconfigure...\n");
925
926 if (!MBLK_IS_VALID(mbp)) {
927 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n",
928 mbp->addr, mbp->size);
929 *status = DR_MEM_STAT_CONFIGURED;
930 rv = DR_MEM_RES_EINVAL;
931 } else if (rv = mem_del(btop(mbp->addr), btop(mbp->size))) {
932 *status = DR_MEM_STAT_CONFIGURED;
933 } else {
934 *status = DR_MEM_STAT_UNCONFIGURED;
935 rv = DR_MEM_RES_OK;
936 DR_DBG_MEM("mblk 0x%lx.0x%lx unconfigured\n",
937 mbp->addr, mbp->size);
938 }
939 return (rv);
940 }
941
942 static int
dr_mem_del_stat(dr_mem_hdr_t * req,dr_mem_hdr_t ** resp,int * resp_len)943 dr_mem_del_stat(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len)
944 {
945 int status;
946 int rlen;
947 memdelstat_t del_stat, *stat;
948 dr_mem_hdr_t *rp;
949
950 /*
951 * If a mem delete is in progress, get its status.
952 */
953 status = (dr_mh && (kphysm_del_status(dr_mh, &del_stat) == KPHYSM_OK));
954
955 /* allocate a response message, should be freed by caller */
956 rlen = sizeof (dr_mem_hdr_t);
957 rlen += status * sizeof (memdelstat_t);
958 rp = kmem_zalloc(rlen, KM_SLEEP);
959
960 /* fill in the known data */
961 rp->req_num = req->req_num;
962 rp->msg_type = DR_MEM_OK;
963 rp->msg_arg = status;
964
965 if (status) {
966 /* stat struct for the response */
967 stat = DR_MEM_RESP_DEL_STAT(rp);
968 stat->phys_pages = ptob(del_stat.phys_pages);
969 stat->managed = ptob(del_stat.managed);
970 stat->collected = ptob(del_stat.collected);
971 }
972
973 *resp = rp;
974 *resp_len = rlen;
975
976 return (0);
977 }
978
979 static int
dr_mem_del_cancel(dr_mem_hdr_t * req,dr_mem_hdr_t ** resp,int * resp_len)980 dr_mem_del_cancel(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len)
981 {
982 int rlen;
983 dr_mem_hdr_t *rp;
984
985 /* allocate a response message, should be freed by caller */
986 rlen = sizeof (dr_mem_hdr_t);
987 rp = kmem_zalloc(rlen, KM_SLEEP);
988
989 /* fill in the known data */
990 rp->req_num = req->req_num;
991 rp->msg_type = DR_MEM_OK;
992 rp->msg_arg = (dr_mh && kphysm_del_cancel(dr_mh) != KPHYSM_OK) ?
993 DR_MEM_RES_EINVAL : DR_MEM_RES_OK;
994
995 *resp = rp;
996 *resp_len = rlen;
997
998 return (0);
999 }
1000
1001 static int
dr_mem_find(dr_mem_blk_t * mbp)1002 dr_mem_find(dr_mem_blk_t *mbp)
1003 {
1004 md_t *mdp = NULL;
1005 int num_nodes;
1006 int rv = 0;
1007 int listsz;
1008 mde_cookie_t *listp = NULL;
1009 mde_cookie_t memnode;
1010 char *found = "found";
1011
1012 if ((mdp = md_get_handle()) == NULL) {
1013 DR_DBG_MEM("unable to initialize machine description\n");
1014 return (-1);
1015 }
1016
1017 num_nodes = md_node_count(mdp);
1018 ASSERT(num_nodes > 0);
1019
1020 listsz = num_nodes * sizeof (mde_cookie_t);
1021 listp = kmem_zalloc(listsz, KM_SLEEP);
1022
1023 memnode = dr_mem_find_node_md(mbp, mdp, listp);
1024
1025 if (memnode == MDE_INVAL_ELEM_COOKIE) {
1026 rv = EINVAL;
1027 found = "not found";
1028 }
1029
1030 DR_DBG_MEM("mblk 0x%lx.0x%lx %s\n", mbp->addr, mbp->size, found);
1031
1032 kmem_free(listp, listsz);
1033 (void) md_fini_handle(mdp);
1034
1035 return (rv);
1036 }
1037
1038 /*
1039 * Look up a particular mblk in the MD. Returns the mde_cookie_t
1040 * representing that mblk if present, and MDE_INVAL_ELEM_COOKIE
1041 * otherwise. It is assumed the scratch array has already been
1042 * allocated so that it can accommodate the worst case scenario,
1043 * every node in the MD.
1044 */
1045 static mde_cookie_t
dr_mem_find_node_md(dr_mem_blk_t * mbp,md_t * mdp,mde_cookie_t * listp)1046 dr_mem_find_node_md(dr_mem_blk_t *mbp, md_t *mdp, mde_cookie_t *listp)
1047 {
1048 int idx;
1049 int nnodes;
1050 mde_cookie_t rootnode;
1051 uint64_t base_prop;
1052 uint64_t size_prop;
1053 mde_cookie_t result = MDE_INVAL_ELEM_COOKIE;
1054
1055 rootnode = md_root_node(mdp);
1056 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1057
1058 /*
1059 * Scan the DAG for all the mem nodes
1060 */
1061 nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "mblock"),
1062 md_find_name(mdp, "fwd"), listp);
1063
1064 if (nnodes < 0) {
1065 DR_DBG_MEM("Scan for mblks failed\n");
1066 return (result);
1067 }
1068
1069 DR_DBG_MEM("dr_mem_find_node_md: found %d mblks in the MD\n", nnodes);
1070
1071 /*
1072 * Find the mblk of interest
1073 */
1074 for (idx = 0; idx < nnodes; idx++) {
1075
1076 if (md_get_prop_val(mdp, listp[idx], "base", &base_prop)) {
1077 DR_DBG_MEM("Missing 'base' property for mblk node %d\n",
1078 idx);
1079 break;
1080 }
1081
1082 if (md_get_prop_val(mdp, listp[idx], "size", &size_prop)) {
1083 DR_DBG_MEM("Missing 'size' property for mblk node %d\n",
1084 idx);
1085 break;
1086 }
1087
1088 if (base_prop <= mbp->addr &&
1089 (base_prop + size_prop) >= (mbp->addr + mbp->size)) {
1090 /* found a match */
1091 DR_DBG_MEM("dr_mem_find_node_md: found mblk "
1092 "0x%lx.0x%lx in MD\n", mbp->addr, mbp->size);
1093 result = listp[idx];
1094 break;
1095 }
1096 }
1097
1098 if (result == MDE_INVAL_ELEM_COOKIE) {
1099 DR_DBG_MEM("mblk 0x%lx.0x%lx not in MD\n",
1100 mbp->addr, mbp->size);
1101 }
1102
1103 return (result);
1104 }
1105
1106 static int
mem_add(pfn_t base,pgcnt_t npgs)1107 mem_add(pfn_t base, pgcnt_t npgs)
1108 {
1109 int rv, rc;
1110
1111 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs);
1112
1113 if (npgs == 0)
1114 return (DR_MEM_RES_OK);
1115
1116 rv = kphysm_add_memory_dynamic(base, npgs);
1117 DR_DBG_MEM("%s: kphysm_add(0x%lx, 0x%lx) = %d", __func__, base, npgs,
1118 rv);
1119 if (rv == KPHYSM_OK) {
1120 if (rc = kcage_range_add(base, npgs, KCAGE_DOWN))
1121 cmn_err(CE_WARN, "kcage_range_add() = %d", rc);
1122 }
1123 rv = cvt_err(rv);
1124 return (rv);
1125 }
1126
1127 static void
del_done(void * arg,int error)1128 del_done(void *arg, int error)
1129 {
1130 mem_sync_t *ms = arg;
1131
1132 mutex_enter(&ms->lock);
1133 ms->error = error;
1134 ms->done = 1;
1135 cv_signal(&ms->cond);
1136 mutex_exit(&ms->lock);
1137 }
1138
1139 static int
mem_del(pfn_t base,pgcnt_t npgs)1140 mem_del(pfn_t base, pgcnt_t npgs)
1141 {
1142 int rv, err, del_range = 0;
1143 int convert = 1;
1144 mem_sync_t ms;
1145 memquery_t mq;
1146 memhandle_t mh;
1147 struct memlist *ml;
1148 struct memlist *d_ml = NULL;
1149
1150 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs);
1151
1152 if (npgs == 0)
1153 return (DR_MEM_RES_OK);
1154
1155 if ((rv = kphysm_del_gethandle(&mh)) != KPHYSM_OK) {
1156 cmn_err(CE_WARN, "%s: del_gethandle() = %d", __func__, rv);
1157 rv = cvt_err(rv);
1158 return (rv);
1159 }
1160 if ((rv = kphysm_del_span_query(base, npgs, &mq))
1161 != KPHYSM_OK) {
1162 cmn_err(CE_WARN, "%s: del_span_query() = %d", __func__, rv);
1163 goto done;
1164 }
1165 if (mq.nonrelocatable) {
1166 DR_DBG_MEM("%s: non-reloc pages = %ld",
1167 __func__, mq.nonrelocatable);
1168 rv = KPHYSM_ENONRELOC;
1169 goto done;
1170 }
1171 if (rv = kcage_range_delete(base, npgs)) {
1172 switch (rv) {
1173 case EBUSY:
1174 rv = DR_MEM_RES_ENOTVIABLE;
1175 break;
1176 default:
1177 rv = DR_MEM_RES_FAILURE;
1178 break;
1179 }
1180 convert = 0; /* conversion done */
1181 cmn_err(CE_WARN, "%s: del_range() = %d", __func__, rv);
1182 goto done;
1183 } else {
1184 del_range++;
1185 }
1186 if ((rv = kphysm_del_span(mh, base, npgs)) != KPHYSM_OK) {
1187 cmn_err(CE_WARN, "%s: del_span() = %d", __func__, rv);
1188 goto done;
1189 }
1190 if ((rv = memlist_add_span(ptob(base), ptob(npgs), &d_ml))
1191 != MEML_SPANOP_OK) {
1192 switch (rv) {
1193 case MEML_SPANOP_ESPAN:
1194 rv = DR_MEM_RES_ESPAN;
1195 break;
1196 case MEML_SPANOP_EALLOC:
1197 rv = DR_MEM_RES_ERESOURCE;
1198 break;
1199 default:
1200 rv = DR_MEM_RES_FAILURE;
1201 break;
1202 }
1203 convert = 0; /* conversion done */
1204 cmn_err(CE_WARN, "%s: add_span() = %d", __func__, rv);
1205 goto done;
1206 }
1207
1208 DR_DBG_MEM("%s: reserved=0x%lx", __func__, npgs);
1209
1210 bzero((void *) &ms, sizeof (ms));
1211
1212 mutex_init(&ms.lock, NULL, MUTEX_DRIVER, NULL);
1213 cv_init(&ms.cond, NULL, CV_DRIVER, NULL);
1214 mutex_enter(&ms.lock);
1215
1216 if ((rv = kphysm_del_start(mh, del_done, (void *) &ms)) == KPHYSM_OK) {
1217 /*
1218 * Since we've called drctl_config_init, we are the only
1219 * DR ctl operation in progress. Set dr_mh to the
1220 * delete memhandle for use by stat and cancel.
1221 */
1222 ASSERT(dr_mh == NULL);
1223 dr_mh = mh;
1224
1225 /*
1226 * Wait for completion or interrupt.
1227 */
1228 while (!ms.done) {
1229 if (cv_wait_sig(&ms.cond, &ms.lock) == 0) {
1230 /*
1231 * There is a pending signal.
1232 */
1233 (void) kphysm_del_cancel(mh);
1234 DR_DBG_MEM("%s: cancel", __func__);
1235 /*
1236 * Wait for completion.
1237 */
1238 while (!ms.done)
1239 cv_wait(&ms.cond, &ms.lock);
1240 }
1241 }
1242 dr_mh = NULL;
1243 rv = ms.error;
1244 } else {
1245 DR_DBG_MEM("%s: del_start() = %d", __func__, rv);
1246 }
1247
1248 mutex_exit(&ms.lock);
1249 cv_destroy(&ms.cond);
1250 mutex_destroy(&ms.lock);
1251
1252 done:
1253 if (rv && del_range) {
1254 /*
1255 * Add back the spans to the kcage growth list.
1256 */
1257 for (ml = d_ml; ml; ml = ml->ml_next)
1258 if (err = kcage_range_add(btop(ml->ml_address),
1259 btop(ml->ml_size), KCAGE_DOWN))
1260 cmn_err(CE_WARN, "kcage_range_add() = %d", err);
1261 }
1262 memlist_free_list(d_ml);
1263
1264 if ((err = kphysm_del_release(mh)) != KPHYSM_OK)
1265 cmn_err(CE_WARN, "%s: del_release() = %d", __func__, err);
1266 if (convert)
1267 rv = cvt_err(rv);
1268
1269 DR_DBG_MEM("%s: rv=%d", __func__, rv);
1270
1271 return (rv);
1272 }
1273