xref: /titanic_51/usr/src/uts/sun4u/sunfire/io/ac_del.c (revision 381a2a9a387f449fab7d0c7e97c4184c26963abf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/ddi_impldefs.h>
34 #include <sys/obpdefs.h>
35 #include <sys/errno.h>
36 #include <sys/kmem.h>
37 #include <sys/vmem.h>
38 #include <sys/debug.h>
39 #include <sys/sysmacros.h>
40 #include <sys/machsystm.h>
41 #include <sys/machparam.h>
42 #include <sys/modctl.h>
43 #include <sys/fhc.h>
44 #include <sys/ac.h>
45 #include <sys/vm.h>
46 #include <sys/cpu_module.h>
47 #include <vm/seg_kmem.h>
48 #include <vm/hat_sfmmu.h>
49 #include <sys/mem_config.h>
50 #include <sys/mem_cage.h>
51 
52 extern ac_err_t ac_kpm_err_cvt(int);
53 
54 int ac_del_clean = 0;
55 
56 /*
57  * Default timeout, in seconds, for delete.
58  * Time is counted when no progress is being made.
59  */
60 static int ac_del_timeout = 60;
61 
62 #define	DEL_PAGESIZE	MMU_PAGESIZE
63 
64 struct del_status {
65 	struct del_status *next;
66 	memhandle_t	handle;
67 	volatile int	its_done;
68 	int		done_error;
69 	kcondvar_t	ac_del_cv;
70 	int		del_timeout;
71 	int		del_noprogress;
72 	ac_err_t	cancel_code;
73 	timeout_id_t	to_id;
74 	pgcnt_t		last_collected;
75 };
76 static struct del_status *ac_del_list;
77 static kmutex_t ac_del_mutex;
78 
79 static struct del_status *
80 ac_del_alloc_status()
81 {
82 	struct del_status *dsp;
83 
84 	dsp = (struct del_status *)kmem_zalloc(sizeof (*dsp), KM_SLEEP);
85 	mutex_enter(&ac_del_mutex);
86 	dsp->next = ac_del_list;
87 	ac_del_list = dsp;
88 	mutex_exit(&ac_del_mutex);
89 
90 	return (dsp);
91 }
92 
93 static void
94 ac_del_free_status(struct del_status *dsp)
95 {
96 	struct del_status **dspp;
97 
98 	mutex_enter(&ac_del_mutex);
99 	dspp = &ac_del_list;
100 	while (*dspp != NULL) {
101 		if (*dspp == dsp)
102 			break;
103 		dspp = &(*dspp)->next;
104 	}
105 	ASSERT(*dspp == dsp);
106 	if (*dspp == dsp) {
107 		*dspp = dsp->next;
108 	}
109 	mutex_exit(&ac_del_mutex);
110 	kmem_free((void *)dsp, sizeof (*dsp));
111 }
112 
113 static void
114 del_comp(void *arg, int error)
115 {
116 	struct del_status *dsp;
117 
118 	dsp = (struct del_status *)arg;
119 	mutex_enter(&ac_del_mutex);
120 #ifdef DEBUG
121 	{
122 		struct del_status *adsp;
123 		for (adsp = ac_del_list; adsp != NULL; adsp = adsp->next) {
124 			if (adsp == dsp)
125 				break;
126 		}
127 		ASSERT(adsp != NULL);
128 	}
129 #endif /* DEBUG */
130 	dsp->its_done = 1;
131 	dsp->done_error = error;
132 	cv_signal(&dsp->ac_del_cv);
133 	mutex_exit(&ac_del_mutex);
134 }
135 
136 /*ARGSUSED*/
137 static void
138 del_to_scan(void *arg)
139 {
140 	struct del_status *dsp;
141 	int do_cancel;
142 	memdelstat_t dstat;
143 	int err;
144 
145 	dsp = arg;
146 
147 #ifdef DEBUG
148 	{
149 		struct del_status *adsp;
150 
151 		mutex_enter(&ac_del_mutex);
152 		for (adsp = ac_del_list; adsp != NULL; adsp = adsp->next) {
153 			if (adsp == dsp)
154 				break;
155 		}
156 		ASSERT(adsp != NULL);
157 		mutex_exit(&ac_del_mutex);
158 	}
159 #endif /* DEBUG */
160 	do_cancel = 0;
161 	err = kphysm_del_status(dsp->handle, &dstat);
162 	mutex_enter(&ac_del_mutex);
163 	if (dsp->its_done) {
164 		mutex_exit(&ac_del_mutex);
165 		return;
166 	}
167 	if ((err == KPHYSM_OK) &&
168 	    (dsp->last_collected != dstat.collected)) {
169 		dsp->del_noprogress = 0;
170 		dsp->last_collected = dstat.collected;
171 	} else {
172 		dsp->del_noprogress++;
173 		if (dsp->del_noprogress >= dsp->del_timeout) {
174 			if (dsp->cancel_code == 0)
175 				dsp->cancel_code = AC_ERR_TIMEOUT;
176 			do_cancel = 1;
177 		}
178 	}
179 	if (!do_cancel)
180 		dsp->to_id = timeout(del_to_scan, arg, hz);
181 	else
182 		dsp->to_id = 0;
183 	mutex_exit(&ac_del_mutex);
184 	if (do_cancel)
185 		(void) kphysm_del_cancel(dsp->handle);
186 }
187 
188 static void
189 del_to_start(struct del_status *dsp)
190 {
191 	if (dsp->del_timeout != 0)
192 		dsp->to_id = timeout(del_to_scan, dsp, hz);
193 }
194 
195 static void
196 del_to_stop(struct del_status *dsp)
197 {
198 	timeout_id_t tid;
199 
200 	while ((tid = dsp->to_id) != 0) {
201 		dsp->to_id = 0;
202 		mutex_exit(&ac_del_mutex);
203 		(void) untimeout(tid);
204 		mutex_enter(&ac_del_mutex);
205 	}
206 }
207 
208 static int
209 ac_del_bank_add_span(
210 	memhandle_t handle,
211 	ac_cfga_pkt_t *pkt)
212 {
213 	uint64_t		decode;
214 	uint64_t		base_pa;
215 	uint64_t		bank_size;
216 	pfn_t			base;
217 	pgcnt_t			npgs;
218 	int			errs;
219 	int			ret;
220 	struct ac_soft_state	*asp = pkt->softsp;
221 	uint_t			ilv;
222 
223 	/*
224 	 * Cannot delete interleaved banks at the moment.
225 	 */
226 	ilv = (pkt->bank == Bank0) ?
227 	    INTLV0(*asp->ac_memctl) : INTLV1(*asp->ac_memctl);
228 	if (ilv != 1) {
229 		AC_ERR_SET(pkt, AC_ERR_MEM_DEINTLV);
230 		return (EINVAL);
231 	}
232 	/*
233 	 * Determine the physical location of the selected bank
234 	 */
235 	decode = (pkt->bank == Bank0) ?
236 	    *asp->ac_memdecode0 : *asp->ac_memdecode1;
237 	base_pa = GRP_REALBASE(decode);
238 	bank_size = GRP_UK2SPAN(decode);
239 
240 	base = base_pa >> PAGESHIFT;
241 	npgs = bank_size >> PAGESHIFT;
242 
243 	/*
244 	 * Delete the pages from the cage growth list.
245 	 */
246 	kcage_range_lock();
247 	ret = kcage_range_delete(base, npgs);
248 	kcage_range_unlock();
249 	if (ret != 0) {
250 		/* TODO: Should this be a separate error? */
251 		AC_ERR_SET(pkt, AC_ERR_KPM_NONRELOC);
252 		return (EINVAL);
253 	}
254 
255 	/*
256 	 * Add to delete memory list.
257 	 */
258 
259 	if ((errs = kphysm_del_span(handle, base, npgs)) != KPHYSM_OK) {
260 		AC_ERR_SET(pkt, ac_kpm_err_cvt(errs));
261 		/*
262 		 * Restore the pages to the cage growth list.
263 		 * TODO: We should not unconditionally add back
264 		 * if we conditionally add at memory add time.
265 		 */
266 		kcage_range_lock();
267 		errs = kcage_range_add(base, npgs, 1);
268 		/* TODO: deal with error return. */
269 		if (errs != 0) {
270 			AC_ERR_SET(pkt, ac_kpm_err_cvt(errs));
271 			cmn_err(CE_NOTE, "ac_del_bank_add_span(): "
272 			    "board %d, bank %d, "
273 			    "kcage_range_add() returned %d",
274 			    pkt->softsp->board, pkt->bank, errs);
275 		}
276 		kcage_range_unlock();
277 		return (EINVAL);
278 	}
279 	return (0);
280 }
281 
282 static void
283 ac_del_bank_add_cage(
284 	struct bd_list *del,
285 	enum ac_bank_id bank)
286 {
287 	uint64_t		decode;
288 	uint64_t		base_pa;
289 	uint64_t		bank_size;
290 	pfn_t			base;
291 	pgcnt_t			npgs;
292 	int			errs;
293 	struct ac_soft_state	*asp = (struct ac_soft_state *)(del->ac_softsp);
294 
295 	/*
296 	 * Determine the physical location of the selected bank
297 	 */
298 	decode = (bank == Bank0) ? *asp->ac_memdecode0 : *asp->ac_memdecode1;
299 	base_pa = GRP_REALBASE(decode);
300 	bank_size = GRP_UK2SPAN(decode);
301 
302 	base = base_pa >> PAGESHIFT;
303 	npgs = bank_size >> PAGESHIFT;
304 
305 	/*
306 	 * Restore the pages to the cage growth list.
307 	 * TODO: We should not unconditionally add back
308 	 * if we conditionally add at memory add time.
309 	 */
310 	kcage_range_lock();
311 	errs = kcage_range_add(base, npgs, 1);
312 	/* TODO: deal with error return. */
313 	if (errs != 0)
314 		cmn_err(CE_NOTE, "ac_del_bank_add_cage(): "
315 		    "board %d, bank %d, "
316 		    "kcage_range_add() returned %d",
317 		    del->sc.board, bank, errs);
318 	kcage_range_unlock();
319 }
320 
321 static int
322 ac_del_bank_run(struct del_status *dsp, ac_cfga_pkt_t *pkt)
323 {
324 	int errs;
325 
326 	dsp->its_done = 0;
327 	if ((errs = kphysm_del_start(dsp->handle, del_comp, (void *)dsp)) !=
328 	    KPHYSM_OK) {
329 		AC_ERR_SET(pkt, ac_kpm_err_cvt(errs));
330 		return (EINVAL);
331 	}
332 	/* Wait for it to complete. */
333 	mutex_enter(&ac_del_mutex);
334 	del_to_start(dsp);
335 	while (!dsp->its_done) {
336 		if (!cv_wait_sig(&dsp->ac_del_cv, &ac_del_mutex)) {
337 			if (dsp->cancel_code == 0)
338 				dsp->cancel_code = AC_ERR_INTR;
339 			mutex_exit(&ac_del_mutex);
340 			errs = kphysm_del_cancel(dsp->handle);
341 			mutex_enter(&ac_del_mutex);
342 			if (errs != KPHYSM_OK) {
343 				ASSERT(errs == KPHYSM_ENOTRUNNING);
344 			}
345 			break;
346 		}
347 	}
348 	/*
349 	 * If the loop exited due to a signal, we must continue to wait
350 	 * using cv_wait() as the signal is pending until syscall exit.
351 	 */
352 	while (!dsp->its_done) {
353 		cv_wait(&dsp->ac_del_cv, &ac_del_mutex);
354 	}
355 	if (dsp->done_error != KPHYSM_OK) {
356 		AC_ERR_SET(pkt, ac_kpm_err_cvt(dsp->done_error));
357 		if ((dsp->done_error == KPHYSM_ECANCELLED) ||
358 		    (dsp->done_error == KPHYSM_EREFUSED)) {
359 			errs = EINTR;
360 			if (dsp->cancel_code != 0) {
361 				AC_ERR_SET(pkt, dsp->cancel_code);
362 			}
363 		} else {
364 			errs = EINVAL;
365 		}
366 	} else
367 		errs = 0;
368 	del_to_stop(dsp);
369 	mutex_exit(&ac_del_mutex);
370 
371 	return (errs);
372 }
373 
374 
375 /*
376  * set the memory to known state for debugging
377  */
378 static void
379 ac_bank_write_pattern(struct bd_list *del, enum ac_bank_id bank)
380 {
381 	uint64_t		decode;
382 	uint64_t		base_pa;
383 	uint64_t		limit_pa;
384 	uint64_t		bank_size;
385 	uint64_t		current_pa;
386 	caddr_t			base_va;
387 	caddr_t			fill_buf;
388 	struct ac_soft_state	*asp = (struct ac_soft_state *)(del->ac_softsp);
389 	int			linesize;
390 
391 	/*
392 	 * Determine the physical location of the selected bank
393 	 */
394 	decode = (bank == Bank0) ? *asp->ac_memdecode0 : *asp->ac_memdecode1;
395 	base_pa = GRP_REALBASE(decode);
396 	bank_size = GRP_UK2SPAN(decode);
397 	limit_pa = base_pa + bank_size;
398 	linesize = cpunodes[CPU->cpu_id].ecache_linesize;
399 
400 	/*
401 	 * We need a page_va and a fill buffer for this operation
402 	 */
403 	base_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
404 	fill_buf = kmem_zalloc(DEL_PAGESIZE, KM_SLEEP);
405 	{
406 		typedef uint32_t patt_t;
407 		patt_t *bf, *bfe, patt;
408 
409 		bf = (patt_t *)fill_buf;
410 		bfe = (patt_t *)((char *)fill_buf + DEL_PAGESIZE);
411 		patt = 0xbeaddeed;
412 		while (bf < bfe)
413 			*bf++ = patt;
414 	}
415 
416 	/*
417 	 * 'empty' the memory
418 	 */
419 	kpreempt_disable();
420 	for (current_pa = base_pa; current_pa < limit_pa;
421 	    current_pa += DEL_PAGESIZE) {
422 
423 		/* map current pa */
424 		ac_mapin(current_pa, base_va);
425 
426 		/* fill the target page */
427 		ac_blkcopy(fill_buf, base_va,
428 			DEL_PAGESIZE/linesize, linesize);
429 
430 		/* tear down translation */
431 		ac_unmap(base_va);
432 	}
433 	kpreempt_enable();
434 
435 	/*
436 	 * clean up temporary resources
437 	 */
438 	{
439 		/* Distinguish the fill buf from memory deleted! */
440 		typedef uint32_t patt_t;
441 		patt_t *bf, *bfe, patt;
442 
443 		bf = (patt_t *)fill_buf;
444 		bfe = (patt_t *)((char *)fill_buf + DEL_PAGESIZE);
445 		patt = 0xbeadfeed;
446 		while (bf < bfe)
447 			*bf++ = patt;
448 	}
449 	kmem_free(fill_buf, DEL_PAGESIZE);
450 	vmem_free(heap_arena, base_va, PAGESIZE);
451 }
452 
453 int
454 ac_del_memory(ac_cfga_pkt_t *pkt)
455 {
456 	struct bd_list *board;
457 	struct ac_mem_info *mem_info;
458 	int busy_set;
459 	struct del_status *dsp;
460 	memdelstat_t dstat;
461 	int retval;
462 	int r_errs;
463 	struct ac_soft_state *asp;
464 
465 	if (!kcage_on) {
466 		static int cage_msg_done = 0;
467 
468 		if (!cage_msg_done) {
469 			cage_msg_done = 1;
470 			cmn_err(CE_NOTE, "ac: memory delete"
471 			    " refused: cage is off");
472 		}
473 		AC_ERR_SET(pkt, ac_kpm_err_cvt(KPHYSM_ENONRELOC));
474 		return (EINVAL);
475 	}
476 
477 	dsp = ac_del_alloc_status();
478 	if ((retval = kphysm_del_gethandle(&dsp->handle)) != KPHYSM_OK) {
479 		ac_del_free_status(dsp);
480 		AC_ERR_SET(pkt, ac_kpm_err_cvt(retval));
481 		return (EINVAL);
482 	}
483 	retval = 0;
484 	busy_set = 0;
485 
486 	board = fhc_bdlist_lock(pkt->softsp->board);
487 	if (board == NULL || board->ac_softsp == NULL) {
488 		fhc_bdlist_unlock();
489 		AC_ERR_SET(pkt, AC_ERR_BD);
490 		retval = EINVAL;
491 		goto out;
492 	}
493 	ASSERT(pkt->softsp == board->ac_softsp);
494 	asp = pkt->softsp;
495 
496 	/* verify the board is of the correct type */
497 	switch (board->sc.type) {
498 	case CPU_BOARD:
499 	case MEM_BOARD:
500 		break;
501 	default:
502 		fhc_bdlist_unlock();
503 		AC_ERR_SET(pkt, AC_ERR_BD_TYPE);
504 		retval = EINVAL;
505 		goto out;
506 	}
507 
508 	/* verify the memory condition is acceptable */
509 	mem_info = &asp->bank[pkt->bank];
510 	if (!MEM_BOARD_VISIBLE(board) || mem_info->busy ||
511 	    fhc_bd_busy(pkt->softsp->board) ||
512 	    mem_info->rstate != SYSC_CFGA_RSTATE_CONNECTED ||
513 	    mem_info->ostate != SYSC_CFGA_OSTATE_CONFIGURED) {
514 		fhc_bdlist_unlock();
515 		AC_ERR_SET(pkt, AC_ERR_BD_STATE);
516 		retval = EINVAL;
517 		goto out;
518 	}
519 
520 	if ((dsp->del_timeout = pkt->cmd_cfga.arg) == -1)
521 		dsp->del_timeout = ac_del_timeout;
522 
523 	/*
524 	 * at this point, we have an available bank to del.
525 	 * mark it busy and initiate the del function.
526 	 */
527 	mem_info->busy = TRUE;
528 	fhc_bdlist_unlock();
529 
530 	busy_set = 1;
531 
532 	retval = ac_del_bank_add_span(dsp->handle, pkt);
533 out:
534 	if (retval != 0) {
535 		r_errs = kphysm_del_release(dsp->handle);
536 		ASSERT(r_errs == KPHYSM_OK);
537 
538 		if (busy_set) {
539 			board = fhc_bdlist_lock(pkt->softsp->board);
540 			ASSERT(board != NULL && board->ac_softsp != NULL);
541 
542 			ASSERT(board->sc.type == CPU_BOARD ||
543 			    board->sc.type == MEM_BOARD);
544 			ASSERT(asp ==
545 			    (struct ac_soft_state *)(board->ac_softsp));
546 			mem_info = &asp->bank[pkt->bank];
547 			ASSERT(mem_info->busy != FALSE);
548 			ASSERT(mem_info->ostate == SYSC_CFGA_OSTATE_CONFIGURED);
549 			mem_info->busy = FALSE;
550 			fhc_bdlist_unlock();
551 		}
552 
553 		ac_del_free_status(dsp);
554 		return (retval);
555 	}
556 
557 	(void) kphysm_del_status(dsp->handle, &dstat);
558 
559 	retval = ac_del_bank_run(dsp, pkt);
560 
561 	r_errs = kphysm_del_release(dsp->handle);
562 	ASSERT(r_errs == KPHYSM_OK);
563 
564 	board = fhc_bdlist_lock(pkt->softsp->board);
565 	ASSERT(board != NULL && board->ac_softsp != NULL);
566 
567 	ASSERT(board->sc.type == CPU_BOARD || board->sc.type == MEM_BOARD);
568 	ASSERT(asp == (struct ac_soft_state *)(board->ac_softsp));
569 	mem_info = &asp->bank[pkt->bank];
570 	ASSERT(mem_info->busy != FALSE);
571 	ASSERT(mem_info->ostate == SYSC_CFGA_OSTATE_CONFIGURED);
572 	mem_info->busy = FALSE;
573 	if (retval == 0) {
574 		mem_info->ostate = SYSC_CFGA_OSTATE_UNCONFIGURED;
575 		mem_info->status_change = ddi_get_time();
576 
577 		if (ac_del_clean) {
578 			/* DEBUG - set memory to known state */
579 			ac_bank_write_pattern(board, pkt->bank);
580 		}
581 	} else {
582 		/*
583 		 * Restore the pages to the cage growth list.
584 		 */
585 		ac_del_bank_add_cage(board, pkt->bank);
586 	}
587 	fhc_bdlist_unlock();
588 
589 	ac_del_free_status(dsp);
590 
591 	return (retval);
592 }
593