xref: /linux/arch/s390/kernel/fpu.c (revision 071bf69a0220253a44acb8b2a27f7a262b9a46bf)
1 /*
2  * In-kernel vector facility support functions
3  *
4  * Copyright IBM Corp. 2015
5  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6  */
7 #include <linux/kernel.h>
8 #include <linux/cpu.h>
9 #include <linux/sched.h>
10 #include <asm/fpu/types.h>
11 #include <asm/fpu/api.h>
12 
13 /*
14  * Per-CPU variable to maintain FPU register ranges that are in use
15  * by the kernel.
16  */
17 static DEFINE_PER_CPU(u32, kernel_fpu_state);
18 
19 #define KERNEL_FPU_STATE_MASK	(KERNEL_FPU_MASK|KERNEL_FPC)
20 
21 
22 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
23 {
24 	if (!__this_cpu_read(kernel_fpu_state)) {
25 		/*
26 		 * Save user space FPU state and register contents.  Multiple
27 		 * calls because of interruptions do not matter and return
28 		 * immediately.  This also sets CIF_FPU to lazy restore FP/VX
29 		 * register contents when returning to user space.
30 		 */
31 		save_fpu_regs();
32 	}
33 
34 	/* Update flags to use the vector facility for KERNEL_FPR */
35 	if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
36 		flags |= KERNEL_VXR_LOW | KERNEL_FPC;
37 		flags &= ~KERNEL_FPR;
38 	}
39 
40 	/* Save and update current kernel VX state */
41 	state->mask = __this_cpu_read(kernel_fpu_state);
42 	__this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
43 
44 	/*
45 	 * If this is the first call to __kernel_fpu_begin(), no additional
46 	 * work is required.
47 	 */
48 	if (!(state->mask & KERNEL_FPU_STATE_MASK))
49 		return;
50 
51 	/*
52 	 * If KERNEL_FPR is still set, the vector facility is not available
53 	 * and, thus, save floating-point control and registers only.
54 	 */
55 	if (state->mask & KERNEL_FPR) {
56 		asm volatile("stfpc %0" : "=Q" (state->fpc));
57 		asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
58 		asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
59 		asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
60 		asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
61 		asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
62 		asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
63 		asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
64 		asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
65 		asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
66 		asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
67 		asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
68 		asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
69 		asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
70 		asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
71 		asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
72 		asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
73 		return;
74 	}
75 
76 	/*
77 	 * If this is a nested call to __kernel_fpu_begin(), check the saved
78 	 * state mask to save and later restore the vector registers that
79 	 * are already in use.	Let's start with checking floating-point
80 	 * controls.
81 	 */
82 	if (state->mask & KERNEL_FPC)
83 		asm volatile("stfpc %0" : "=m" (state->fpc));
84 
85 	/* Test and save vector registers */
86 	asm volatile (
87 		/*
88 		 * Test if any vector register must be saved and, if so,
89 		 * test if all register can be saved.
90 		 */
91 		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
92 		"	jz	20f\n"		/* no work -> done */
93 		"	la	1,%[vxrs]\n"	/* load save area */
94 		"	jo	18f\n"		/* -> save V0..V31 */
95 
96 		/*
97 		 * Test if V8..V23 can be saved at once... this speeds up
98 		 * for KERNEL_fpu_MID only. Otherwise continue to split the
99 		 * range of vector registers into two halves and test them
100 		 * separately.
101 		 */
102 		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
103 		"	jo	17f\n"		/* -> save V8..V23 */
104 
105 		/* Test and save the first half of 16 vector registers */
106 		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
107 		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
108 		"	jo	2f\n"		/* 11 -> save V0..V15 */
109 		"	brc	4,3f\n"		/* 01 -> save V0..V7  */
110 		"	brc	2,4f\n"		/* 10 -> save V8..V15 */
111 
112 		/* Test and save the second half of 16 vector registers */
113 		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
114 		"	jo	19f\n"		/* 11 -> save V16..V31 */
115 		"	brc	4,11f\n"	/* 01 -> save V16..V23	*/
116 		"	brc	2,12f\n"	/* 10 -> save V24..V31 */
117 		"	j	20f\n"		/* 00 -> done */
118 
119 		/*
120 		 * Below are the vstm combinations to save multiple vector
121 		 * registers at once.
122 		 */
123 		"2:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
124 		"	j	10b\n"			/* -> VXR_HIGH */
125 		"3:	.word	0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
126 		"	j	10b\n"			/* -> VXR_HIGH */
127 		"4:	.word	0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
128 		"	j	10b\n"			/* -> VXR_HIGH */
129 		"\n"
130 		"11:	.word	0xe707,0x1100,0x0c3e\n"	/* vstm 16,23,256(1) */
131 		"	j	20f\n"			/* -> done */
132 		"12:	.word	0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
133 		"	j	20f\n"			/* -> done */
134 		"\n"
135 		"17:	.word	0xe787,0x1080,0x043e\n"	/* vstm 8,23,128(1) */
136 		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
137 		"	j	1b\n"			/* -> VXR_LOW */
138 		"\n"
139 		"18:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
140 		"19:	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
141 		"20:"
142 		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
143 		: [m] "d" (state->mask)
144 		: "1", "cc");
145 }
146 EXPORT_SYMBOL(__kernel_fpu_begin);
147 
148 void __kernel_fpu_end(struct kernel_fpu *state)
149 {
150 	/* Just update the per-CPU state if there is nothing to restore */
151 	if (!(state->mask & KERNEL_FPU_STATE_MASK))
152 		goto update_fpu_state;
153 
154 	/*
155 	 * If KERNEL_FPR is specified, the vector facility is not available
156 	 * and, thus, restore floating-point control and registers only.
157 	 */
158 	if (state->mask & KERNEL_FPR) {
159 		asm volatile("lfpc %0" : : "Q" (state->fpc));
160 		asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
161 		asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
162 		asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
163 		asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
164 		asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
165 		asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
166 		asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
167 		asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
168 		asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
169 		asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
170 		asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
171 		asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
172 		asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
173 		asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
174 		asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
175 		asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
176 		goto update_fpu_state;
177 	}
178 
179 	/* Test and restore floating-point controls */
180 	if (state->mask & KERNEL_FPC)
181 		asm volatile("lfpc %0" : : "Q" (state->fpc));
182 
183 	/* Test and restore (load) vector registers */
184 	asm volatile (
185 		/*
186 		 * Test if any vector registers must be loaded and, if so,
187 		 * test if all registers can be loaded at once.
188 		 */
189 		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
190 		"	jz	20f\n"		/* no work -> done */
191 		"	la	1,%[vxrs]\n"	/* load load area */
192 		"	jo	18f\n"		/* -> load V0..V31 */
193 
194 		/*
195 		 * Test if V8..V23 can be restored at once... this speeds up
196 		 * for KERNEL_VXR_MID only. Otherwise continue to split the
197 		 * range of vector registers into two halves and test them
198 		 * separately.
199 		 */
200 		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
201 		"	jo	17f\n"		/* -> load V8..V23 */
202 
203 		/* Test and load the first half of 16 vector registers */
204 		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
205 		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
206 		"	jo	2f\n"		/* 11 -> load V0..V15 */
207 		"	brc	4,3f\n"		/* 01 -> load V0..V7  */
208 		"	brc	2,4f\n"		/* 10 -> load V8..V15 */
209 
210 		/* Test and load the second half of 16 vector registers */
211 		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
212 		"	jo	19f\n"		/* 11 -> load V16..V31 */
213 		"	brc	4,11f\n"	/* 01 -> load V16..V23	*/
214 		"	brc	2,12f\n"	/* 10 -> load V24..V31 */
215 		"	j	20f\n"		/* 00 -> done */
216 
217 		/*
218 		 * Below are the vstm combinations to load multiple vector
219 		 * registers at once.
220 		 */
221 		"2:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
222 		"	j	10b\n"			/* -> VXR_HIGH */
223 		"3:	.word	0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */
224 		"	j	10b\n"			/* -> VXR_HIGH */
225 		"4:	.word	0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */
226 		"	j	10b\n"			/* -> VXR_HIGH */
227 		"\n"
228 		"11:	.word	0xe707,0x1100,0x0c36\n"	/* vlm 16,23,256(1) */
229 		"	j	20f\n"			/* -> done */
230 		"12:	.word	0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */
231 		"	j	20f\n"			/* -> done */
232 		"\n"
233 		"17:	.word	0xe787,0x1080,0x0436\n"	/* vlm 8,23,128(1) */
234 		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
235 		"	j	1b\n"			/* -> VXR_LOW */
236 		"\n"
237 		"18:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
238 		"19:	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
239 		"20:"
240 		:
241 		: [vxrs] "Q" (*(struct vx_array *) &state->vxrs),
242 		  [m] "d" (state->mask)
243 		: "1", "cc");
244 
245 update_fpu_state:
246 	/* Update current kernel VX state */
247 	__this_cpu_write(kernel_fpu_state, state->mask);
248 }
249 EXPORT_SYMBOL(__kernel_fpu_end);
250