xref: /titanic_44/usr/src/uts/sun4u/sunfire/io/ac_del.c (revision 85f5803819bea86c07827a9544494e4ad327d95d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/ddi_impldefs.h>
34 #include <sys/obpdefs.h>
35 #include <sys/errno.h>
36 #include <sys/kmem.h>
37 #include <sys/vmem.h>
38 #include <sys/debug.h>
39 #include <sys/sysmacros.h>
40 #include <sys/machsystm.h>
41 #include <sys/machparam.h>
42 #include <sys/modctl.h>
43 #include <sys/fhc.h>
44 #include <sys/ac.h>
45 #include <sys/vm.h>
46 #include <sys/cpu_module.h>
47 #include <vm/seg_kmem.h>
48 #include <vm/hat_sfmmu.h>
49 #include <sys/mem_config.h>
50 #include <sys/mem_cage.h>
51 
52 extern ac_err_t ac_kpm_err_cvt(int);
53 
54 int ac_del_clean = 0;
55 
56 /*
57  * Default timeout, in seconds, for delete.
58  * Time is counted when no progress is being made.
59  */
60 static int ac_del_timeout = 60;
61 
62 #define	DEL_PAGESIZE	MMU_PAGESIZE
63 
64 struct del_status {
65 	struct del_status *next;
66 	memhandle_t	handle;
67 	volatile int	its_done;
68 	int		done_error;
69 	kcondvar_t	ac_del_cv;
70 	int		del_timeout;
71 	int		del_noprogress;
72 	ac_err_t	cancel_code;
73 	timeout_id_t	to_id;
74 	pgcnt_t		last_collected;
75 };
76 static struct del_status *ac_del_list;
77 static kmutex_t ac_del_mutex;
78 
79 static struct del_status *
ac_del_alloc_status()80 ac_del_alloc_status()
81 {
82 	struct del_status *dsp;
83 
84 	dsp = (struct del_status *)kmem_zalloc(sizeof (*dsp), KM_SLEEP);
85 	mutex_enter(&ac_del_mutex);
86 	dsp->next = ac_del_list;
87 	ac_del_list = dsp;
88 	mutex_exit(&ac_del_mutex);
89 
90 	return (dsp);
91 }
92 
93 static void
ac_del_free_status(struct del_status * dsp)94 ac_del_free_status(struct del_status *dsp)
95 {
96 	struct del_status **dspp;
97 
98 	mutex_enter(&ac_del_mutex);
99 	dspp = &ac_del_list;
100 	while (*dspp != NULL) {
101 		if (*dspp == dsp)
102 			break;
103 		dspp = &(*dspp)->next;
104 	}
105 	ASSERT(*dspp == dsp);
106 	if (*dspp == dsp) {
107 		*dspp = dsp->next;
108 	}
109 	mutex_exit(&ac_del_mutex);
110 	kmem_free((void *)dsp, sizeof (*dsp));
111 }
112 
113 static void
del_comp(void * arg,int error)114 del_comp(void *arg, int error)
115 {
116 	struct del_status *dsp;
117 
118 	dsp = (struct del_status *)arg;
119 	mutex_enter(&ac_del_mutex);
120 #ifdef DEBUG
121 	{
122 		struct del_status *adsp;
123 		for (adsp = ac_del_list; adsp != NULL; adsp = adsp->next) {
124 			if (adsp == dsp)
125 				break;
126 		}
127 		ASSERT(adsp != NULL);
128 	}
129 #endif /* DEBUG */
130 	dsp->its_done = 1;
131 	dsp->done_error = error;
132 	cv_signal(&dsp->ac_del_cv);
133 	mutex_exit(&ac_del_mutex);
134 }
135 
136 /*ARGSUSED*/
137 static void
del_to_scan(void * arg)138 del_to_scan(void *arg)
139 {
140 	struct del_status *dsp;
141 	int do_cancel;
142 	memdelstat_t dstat;
143 	int err;
144 
145 	dsp = arg;
146 
147 #ifdef DEBUG
148 	{
149 		struct del_status *adsp;
150 
151 		mutex_enter(&ac_del_mutex);
152 		for (adsp = ac_del_list; adsp != NULL; adsp = adsp->next) {
153 			if (adsp == dsp)
154 				break;
155 		}
156 		ASSERT(adsp != NULL);
157 		mutex_exit(&ac_del_mutex);
158 	}
159 #endif /* DEBUG */
160 	do_cancel = 0;
161 	err = kphysm_del_status(dsp->handle, &dstat);
162 	mutex_enter(&ac_del_mutex);
163 	if (dsp->its_done) {
164 		mutex_exit(&ac_del_mutex);
165 		return;
166 	}
167 	if ((err == KPHYSM_OK) &&
168 	    (dsp->last_collected != dstat.collected)) {
169 		dsp->del_noprogress = 0;
170 		dsp->last_collected = dstat.collected;
171 	} else {
172 		dsp->del_noprogress++;
173 		if (dsp->del_noprogress >= dsp->del_timeout) {
174 			if (dsp->cancel_code == 0)
175 				dsp->cancel_code = AC_ERR_TIMEOUT;
176 			do_cancel = 1;
177 		}
178 	}
179 	if (!do_cancel)
180 		dsp->to_id = timeout(del_to_scan, arg, hz);
181 	else
182 		dsp->to_id = 0;
183 	mutex_exit(&ac_del_mutex);
184 	if (do_cancel)
185 		(void) kphysm_del_cancel(dsp->handle);
186 }
187 
188 static void
del_to_start(struct del_status * dsp)189 del_to_start(struct del_status *dsp)
190 {
191 	if (dsp->del_timeout != 0)
192 		dsp->to_id = timeout(del_to_scan, dsp, hz);
193 }
194 
195 static void
del_to_stop(struct del_status * dsp)196 del_to_stop(struct del_status *dsp)
197 {
198 	timeout_id_t tid;
199 
200 	while ((tid = dsp->to_id) != 0) {
201 		dsp->to_id = 0;
202 		mutex_exit(&ac_del_mutex);
203 		(void) untimeout(tid);
204 		mutex_enter(&ac_del_mutex);
205 	}
206 }
207 
208 static int
ac_del_bank_add_span(memhandle_t handle,ac_cfga_pkt_t * pkt)209 ac_del_bank_add_span(
210 	memhandle_t handle,
211 	ac_cfga_pkt_t *pkt)
212 {
213 	uint64_t		decode;
214 	uint64_t		base_pa;
215 	uint64_t		bank_size;
216 	pfn_t			base;
217 	pgcnt_t			npgs;
218 	int			errs;
219 	int			ret;
220 	struct ac_soft_state	*asp = pkt->softsp;
221 	uint_t			ilv;
222 
223 	/*
224 	 * Cannot delete interleaved banks at the moment.
225 	 */
226 	ilv = (pkt->bank == Bank0) ?
227 	    INTLV0(*asp->ac_memctl) : INTLV1(*asp->ac_memctl);
228 	if (ilv != 1) {
229 		AC_ERR_SET(pkt, AC_ERR_MEM_DEINTLV);
230 		return (EINVAL);
231 	}
232 	/*
233 	 * Determine the physical location of the selected bank
234 	 */
235 	decode = (pkt->bank == Bank0) ?
236 	    *asp->ac_memdecode0 : *asp->ac_memdecode1;
237 	base_pa = GRP_REALBASE(decode);
238 	bank_size = GRP_UK2SPAN(decode);
239 
240 	base = base_pa >> PAGESHIFT;
241 	npgs = bank_size >> PAGESHIFT;
242 
243 	/*
244 	 * Delete the pages from the cage growth list.
245 	 */
246 	ret = kcage_range_delete(base, npgs);
247 	if (ret != 0) {
248 		/* TODO: Should this be a separate error? */
249 		AC_ERR_SET(pkt, AC_ERR_KPM_NONRELOC);
250 		return (EINVAL);
251 	}
252 
253 	/*
254 	 * Add to delete memory list.
255 	 */
256 
257 	if ((errs = kphysm_del_span(handle, base, npgs)) != KPHYSM_OK) {
258 		AC_ERR_SET(pkt, ac_kpm_err_cvt(errs));
259 		/*
260 		 * Restore the pages to the cage growth list.
261 		 * TODO: We should not unconditionally add back
262 		 * if we conditionally add at memory add time.
263 		 */
264 		errs = kcage_range_add(base, npgs, KCAGE_DOWN);
265 		/* TODO: deal with error return. */
266 		if (errs != 0) {
267 			AC_ERR_SET(pkt, ac_kpm_err_cvt(errs));
268 			cmn_err(CE_NOTE, "ac_del_bank_add_span(): "
269 			    "board %d, bank %d, "
270 			    "kcage_range_add() returned %d",
271 			    pkt->softsp->board, pkt->bank, errs);
272 		}
273 		return (EINVAL);
274 	}
275 	return (0);
276 }
277 
278 static void
ac_del_bank_add_cage(struct bd_list * del,enum ac_bank_id bank)279 ac_del_bank_add_cage(
280 	struct bd_list *del,
281 	enum ac_bank_id bank)
282 {
283 	uint64_t		decode;
284 	uint64_t		base_pa;
285 	uint64_t		bank_size;
286 	pfn_t			base;
287 	pgcnt_t			npgs;
288 	int			errs;
289 	struct ac_soft_state	*asp = (struct ac_soft_state *)(del->ac_softsp);
290 
291 	/*
292 	 * Determine the physical location of the selected bank
293 	 */
294 	decode = (bank == Bank0) ? *asp->ac_memdecode0 : *asp->ac_memdecode1;
295 	base_pa = GRP_REALBASE(decode);
296 	bank_size = GRP_UK2SPAN(decode);
297 
298 	base = base_pa >> PAGESHIFT;
299 	npgs = bank_size >> PAGESHIFT;
300 
301 	/*
302 	 * Restore the pages to the cage growth list.
303 	 * TODO: We should not unconditionally add back
304 	 * if we conditionally add at memory add time.
305 	 */
306 	errs = kcage_range_add(base, npgs, KCAGE_DOWN);
307 	/* TODO: deal with error return. */
308 	if (errs != 0)
309 		cmn_err(CE_NOTE, "ac_del_bank_add_cage(): "
310 		    "board %d, bank %d, "
311 		    "kcage_range_add() returned %d",
312 		    del->sc.board, bank, errs);
313 }
314 
315 static int
ac_del_bank_run(struct del_status * dsp,ac_cfga_pkt_t * pkt)316 ac_del_bank_run(struct del_status *dsp, ac_cfga_pkt_t *pkt)
317 {
318 	int errs;
319 
320 	dsp->its_done = 0;
321 	if ((errs = kphysm_del_start(dsp->handle, del_comp, (void *)dsp)) !=
322 	    KPHYSM_OK) {
323 		AC_ERR_SET(pkt, ac_kpm_err_cvt(errs));
324 		return (EINVAL);
325 	}
326 	/* Wait for it to complete. */
327 	mutex_enter(&ac_del_mutex);
328 	del_to_start(dsp);
329 	while (!dsp->its_done) {
330 		if (!cv_wait_sig(&dsp->ac_del_cv, &ac_del_mutex)) {
331 			if (dsp->cancel_code == 0)
332 				dsp->cancel_code = AC_ERR_INTR;
333 			mutex_exit(&ac_del_mutex);
334 			errs = kphysm_del_cancel(dsp->handle);
335 			mutex_enter(&ac_del_mutex);
336 			if (errs != KPHYSM_OK) {
337 				ASSERT(errs == KPHYSM_ENOTRUNNING);
338 			}
339 			break;
340 		}
341 	}
342 	/*
343 	 * If the loop exited due to a signal, we must continue to wait
344 	 * using cv_wait() as the signal is pending until syscall exit.
345 	 */
346 	while (!dsp->its_done) {
347 		cv_wait(&dsp->ac_del_cv, &ac_del_mutex);
348 	}
349 	if (dsp->done_error != KPHYSM_OK) {
350 		AC_ERR_SET(pkt, ac_kpm_err_cvt(dsp->done_error));
351 		if ((dsp->done_error == KPHYSM_ECANCELLED) ||
352 		    (dsp->done_error == KPHYSM_EREFUSED)) {
353 			errs = EINTR;
354 			if (dsp->cancel_code != 0) {
355 				AC_ERR_SET(pkt, dsp->cancel_code);
356 			}
357 		} else {
358 			errs = EINVAL;
359 		}
360 	} else
361 		errs = 0;
362 	del_to_stop(dsp);
363 	mutex_exit(&ac_del_mutex);
364 
365 	return (errs);
366 }
367 
368 
369 /*
370  * set the memory to known state for debugging
371  */
372 static void
ac_bank_write_pattern(struct bd_list * del,enum ac_bank_id bank)373 ac_bank_write_pattern(struct bd_list *del, enum ac_bank_id bank)
374 {
375 	uint64_t		decode;
376 	uint64_t		base_pa;
377 	uint64_t		limit_pa;
378 	uint64_t		bank_size;
379 	uint64_t		current_pa;
380 	caddr_t			base_va;
381 	caddr_t			fill_buf;
382 	struct ac_soft_state	*asp = (struct ac_soft_state *)(del->ac_softsp);
383 	int			linesize;
384 
385 	/*
386 	 * Determine the physical location of the selected bank
387 	 */
388 	decode = (bank == Bank0) ? *asp->ac_memdecode0 : *asp->ac_memdecode1;
389 	base_pa = GRP_REALBASE(decode);
390 	bank_size = GRP_UK2SPAN(decode);
391 	limit_pa = base_pa + bank_size;
392 	linesize = cpunodes[CPU->cpu_id].ecache_linesize;
393 
394 	/*
395 	 * We need a page_va and a fill buffer for this operation
396 	 */
397 	base_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
398 	fill_buf = kmem_zalloc(DEL_PAGESIZE, KM_SLEEP);
399 	{
400 		typedef uint32_t patt_t;
401 		patt_t *bf, *bfe, patt;
402 
403 		bf = (patt_t *)fill_buf;
404 		bfe = (patt_t *)((char *)fill_buf + DEL_PAGESIZE);
405 		patt = 0xbeaddeed;
406 		while (bf < bfe)
407 			*bf++ = patt;
408 	}
409 
410 	/*
411 	 * 'empty' the memory
412 	 */
413 	kpreempt_disable();
414 	for (current_pa = base_pa; current_pa < limit_pa;
415 	    current_pa += DEL_PAGESIZE) {
416 
417 		/* map current pa */
418 		ac_mapin(current_pa, base_va);
419 
420 		/* fill the target page */
421 		ac_blkcopy(fill_buf, base_va,
422 			DEL_PAGESIZE/linesize, linesize);
423 
424 		/* tear down translation */
425 		ac_unmap(base_va);
426 	}
427 	kpreempt_enable();
428 
429 	/*
430 	 * clean up temporary resources
431 	 */
432 	{
433 		/* Distinguish the fill buf from memory deleted! */
434 		typedef uint32_t patt_t;
435 		patt_t *bf, *bfe, patt;
436 
437 		bf = (patt_t *)fill_buf;
438 		bfe = (patt_t *)((char *)fill_buf + DEL_PAGESIZE);
439 		patt = 0xbeadfeed;
440 		while (bf < bfe)
441 			*bf++ = patt;
442 	}
443 	kmem_free(fill_buf, DEL_PAGESIZE);
444 	vmem_free(heap_arena, base_va, PAGESIZE);
445 }
446 
447 int
ac_del_memory(ac_cfga_pkt_t * pkt)448 ac_del_memory(ac_cfga_pkt_t *pkt)
449 {
450 	struct bd_list *board;
451 	struct ac_mem_info *mem_info;
452 	int busy_set;
453 	struct del_status *dsp;
454 	memdelstat_t dstat;
455 	int retval;
456 	int r_errs;
457 	struct ac_soft_state *asp;
458 
459 	if (!kcage_on) {
460 		static int cage_msg_done = 0;
461 
462 		if (!cage_msg_done) {
463 			cage_msg_done = 1;
464 			cmn_err(CE_NOTE, "ac: memory delete"
465 			    " refused: cage is off");
466 		}
467 		AC_ERR_SET(pkt, ac_kpm_err_cvt(KPHYSM_ENONRELOC));
468 		return (EINVAL);
469 	}
470 
471 	dsp = ac_del_alloc_status();
472 	if ((retval = kphysm_del_gethandle(&dsp->handle)) != KPHYSM_OK) {
473 		ac_del_free_status(dsp);
474 		AC_ERR_SET(pkt, ac_kpm_err_cvt(retval));
475 		return (EINVAL);
476 	}
477 	retval = 0;
478 	busy_set = 0;
479 
480 	board = fhc_bdlist_lock(pkt->softsp->board);
481 	if (board == NULL || board->ac_softsp == NULL) {
482 		fhc_bdlist_unlock();
483 		AC_ERR_SET(pkt, AC_ERR_BD);
484 		retval = EINVAL;
485 		goto out;
486 	}
487 	ASSERT(pkt->softsp == board->ac_softsp);
488 	asp = pkt->softsp;
489 
490 	/* verify the board is of the correct type */
491 	switch (board->sc.type) {
492 	case CPU_BOARD:
493 	case MEM_BOARD:
494 		break;
495 	default:
496 		fhc_bdlist_unlock();
497 		AC_ERR_SET(pkt, AC_ERR_BD_TYPE);
498 		retval = EINVAL;
499 		goto out;
500 	}
501 
502 	/* verify the memory condition is acceptable */
503 	mem_info = &asp->bank[pkt->bank];
504 	if (!MEM_BOARD_VISIBLE(board) || mem_info->busy ||
505 	    fhc_bd_busy(pkt->softsp->board) ||
506 	    mem_info->rstate != SYSC_CFGA_RSTATE_CONNECTED ||
507 	    mem_info->ostate != SYSC_CFGA_OSTATE_CONFIGURED) {
508 		fhc_bdlist_unlock();
509 		AC_ERR_SET(pkt, AC_ERR_BD_STATE);
510 		retval = EINVAL;
511 		goto out;
512 	}
513 
514 	if ((dsp->del_timeout = pkt->cmd_cfga.arg) == -1)
515 		dsp->del_timeout = ac_del_timeout;
516 
517 	/*
518 	 * at this point, we have an available bank to del.
519 	 * mark it busy and initiate the del function.
520 	 */
521 	mem_info->busy = TRUE;
522 	fhc_bdlist_unlock();
523 
524 	busy_set = 1;
525 
526 	retval = ac_del_bank_add_span(dsp->handle, pkt);
527 out:
528 	if (retval != 0) {
529 		r_errs = kphysm_del_release(dsp->handle);
530 		ASSERT(r_errs == KPHYSM_OK);
531 
532 		if (busy_set) {
533 			board = fhc_bdlist_lock(pkt->softsp->board);
534 			ASSERT(board != NULL && board->ac_softsp != NULL);
535 
536 			ASSERT(board->sc.type == CPU_BOARD ||
537 			    board->sc.type == MEM_BOARD);
538 			ASSERT(asp ==
539 			    (struct ac_soft_state *)(board->ac_softsp));
540 			mem_info = &asp->bank[pkt->bank];
541 			ASSERT(mem_info->busy != FALSE);
542 			ASSERT(mem_info->ostate == SYSC_CFGA_OSTATE_CONFIGURED);
543 			mem_info->busy = FALSE;
544 			fhc_bdlist_unlock();
545 		}
546 
547 		ac_del_free_status(dsp);
548 		return (retval);
549 	}
550 
551 	(void) kphysm_del_status(dsp->handle, &dstat);
552 
553 	retval = ac_del_bank_run(dsp, pkt);
554 
555 	r_errs = kphysm_del_release(dsp->handle);
556 	ASSERT(r_errs == KPHYSM_OK);
557 
558 	board = fhc_bdlist_lock(pkt->softsp->board);
559 	ASSERT(board != NULL && board->ac_softsp != NULL);
560 
561 	ASSERT(board->sc.type == CPU_BOARD || board->sc.type == MEM_BOARD);
562 	ASSERT(asp == (struct ac_soft_state *)(board->ac_softsp));
563 	mem_info = &asp->bank[pkt->bank];
564 	ASSERT(mem_info->busy != FALSE);
565 	ASSERT(mem_info->ostate == SYSC_CFGA_OSTATE_CONFIGURED);
566 	mem_info->busy = FALSE;
567 	if (retval == 0) {
568 		mem_info->ostate = SYSC_CFGA_OSTATE_UNCONFIGURED;
569 		mem_info->status_change = ddi_get_time();
570 
571 		if (ac_del_clean) {
572 			/* DEBUG - set memory to known state */
573 			ac_bank_write_pattern(board, pkt->bank);
574 		}
575 	} else {
576 		/*
577 		 * Restore the pages to the cage growth list.
578 		 */
579 		ac_del_bank_add_cage(board, pkt->bank);
580 	}
581 	fhc_bdlist_unlock();
582 
583 	ac_del_free_status(dsp);
584 
585 	return (retval);
586 }
587