1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * In-kernel FPU support functions 4 * 5 * 6 * Consider these guidelines before using in-kernel FPU functions: 7 * 8 * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel 9 * use of floating-point or vector registers and instructions. 10 * 11 * 2. For kernel_fpu_begin(), specify the vector register range you want to 12 * use with the KERNEL_VXR_* constants. Consider these usage guidelines: 13 * 14 * a) If your function typically runs in process-context, use the lower 15 * half of the vector registers, for example, specify KERNEL_VXR_LOW. 16 * b) If your function typically runs in soft-irq or hard-irq context, 17 * prefer using the upper half of the vector registers, for example, 18 * specify KERNEL_VXR_HIGH. 19 * 20 * If you adhere to these guidelines, an interrupted process context 21 * does not require to save and restore vector registers because of 22 * disjoint register ranges. 23 * 24 * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions 25 * includes logic to save and restore up to 16 vector registers at once. 26 * 27 * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different 28 * struct kernel_fpu states. Vector registers that are in use by outer 29 * levels are saved and restored. You can minimize the save and restore 30 * effort by choosing disjoint vector register ranges. 31 * 32 * 5. To use vector floating-point instructions, specify the KERNEL_FPC 33 * flag to save and restore floating-point controls in addition to any 34 * vector register range. 35 * 36 * 6. To use floating-point registers and instructions only, specify the 37 * KERNEL_FPR flag. This flag triggers a save and restore of vector 38 * registers V0 to V15 and floating-point controls. 39 * 40 * Copyright IBM Corp. 2015 41 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 42 */ 43 44 #ifndef _ASM_S390_FPU_H 45 #define _ASM_S390_FPU_H 46 47 #include <linux/cpufeature.h> 48 #include <linux/processor.h> 49 #include <linux/preempt.h> 50 #include <linux/string.h> 51 #include <linux/sched.h> 52 #include <asm/sigcontext.h> 53 #include <asm/fpu-types.h> 54 #include <asm/fpu-insn.h> 55 56 enum { 57 KERNEL_FPC_BIT = 0, 58 KERNEL_VXR_V0V7_BIT, 59 KERNEL_VXR_V8V15_BIT, 60 KERNEL_VXR_V16V23_BIT, 61 KERNEL_VXR_V24V31_BIT, 62 }; 63 64 #define KERNEL_FPC BIT(KERNEL_FPC_BIT) 65 #define KERNEL_VXR_V0V7 BIT(KERNEL_VXR_V0V7_BIT) 66 #define KERNEL_VXR_V8V15 BIT(KERNEL_VXR_V8V15_BIT) 67 #define KERNEL_VXR_V16V23 BIT(KERNEL_VXR_V16V23_BIT) 68 #define KERNEL_VXR_V24V31 BIT(KERNEL_VXR_V24V31_BIT) 69 70 #define KERNEL_VXR_LOW (KERNEL_VXR_V0V7 | KERNEL_VXR_V8V15) 71 #define KERNEL_VXR_MID (KERNEL_VXR_V8V15 | KERNEL_VXR_V16V23) 72 #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31) 73 74 #define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH) 75 #define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW) 76 77 void load_fpu_state(struct fpu *state, int flags); 78 void save_fpu_state(struct fpu *state, int flags); 79 void __kernel_fpu_begin(struct kernel_fpu *state, int flags); 80 void __kernel_fpu_end(struct kernel_fpu *state, int flags); 81 82 static __always_inline void save_vx_regs(__vector128 *vxrs) 83 { 84 fpu_vstm(0, 15, &vxrs[0]); 85 fpu_vstm(16, 31, &vxrs[16]); 86 } 87 88 static __always_inline void load_vx_regs(__vector128 *vxrs) 89 { 90 fpu_vlm(0, 15, &vxrs[0]); 91 fpu_vlm(16, 31, &vxrs[16]); 92 } 93 94 static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset) 95 { 96 fpu_std(0, &fprs[0 * offset]); 97 fpu_std(1, &fprs[1 * offset]); 98 fpu_std(2, &fprs[2 * offset]); 99 fpu_std(3, &fprs[3 * offset]); 100 fpu_std(4, &fprs[4 * offset]); 101 fpu_std(5, &fprs[5 * offset]); 102 fpu_std(6, &fprs[6 * offset]); 103 fpu_std(7, &fprs[7 * offset]); 104 fpu_std(8, &fprs[8 * offset]); 105 fpu_std(9, &fprs[9 * offset]); 106 fpu_std(10, &fprs[10 * offset]); 107 fpu_std(11, &fprs[11 * offset]); 108 fpu_std(12, &fprs[12 * offset]); 109 fpu_std(13, &fprs[13 * offset]); 110 fpu_std(14, &fprs[14 * offset]); 111 fpu_std(15, &fprs[15 * offset]); 112 } 113 114 static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset) 115 { 116 fpu_ld(0, &fprs[0 * offset]); 117 fpu_ld(1, &fprs[1 * offset]); 118 fpu_ld(2, &fprs[2 * offset]); 119 fpu_ld(3, &fprs[3 * offset]); 120 fpu_ld(4, &fprs[4 * offset]); 121 fpu_ld(5, &fprs[5 * offset]); 122 fpu_ld(6, &fprs[6 * offset]); 123 fpu_ld(7, &fprs[7 * offset]); 124 fpu_ld(8, &fprs[8 * offset]); 125 fpu_ld(9, &fprs[9 * offset]); 126 fpu_ld(10, &fprs[10 * offset]); 127 fpu_ld(11, &fprs[11 * offset]); 128 fpu_ld(12, &fprs[12 * offset]); 129 fpu_ld(13, &fprs[13 * offset]); 130 fpu_ld(14, &fprs[14 * offset]); 131 fpu_ld(15, &fprs[15 * offset]); 132 } 133 134 static __always_inline void save_fp_regs(freg_t *fprs) 135 { 136 __save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t)); 137 } 138 139 static __always_inline void load_fp_regs(freg_t *fprs) 140 { 141 __load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t)); 142 } 143 144 static __always_inline void save_fp_regs_vx(__vector128 *vxrs) 145 { 146 freg_t *fprs = (freg_t *)&vxrs[0].high; 147 148 __save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); 149 } 150 151 static __always_inline void load_fp_regs_vx(__vector128 *vxrs) 152 { 153 freg_t *fprs = (freg_t *)&vxrs[0].high; 154 155 __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); 156 } 157 158 static inline void load_user_fpu_regs(void) 159 { 160 struct thread_struct *thread = ¤t->thread; 161 162 if (!thread->ufpu_flags) 163 return; 164 load_fpu_state(&thread->ufpu, thread->ufpu_flags); 165 thread->ufpu_flags = 0; 166 } 167 168 static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags) 169 { 170 save_fpu_state(&thread->ufpu, flags); 171 __atomic_or(flags, &thread->ufpu_flags); 172 } 173 174 static inline void save_user_fpu_regs(void) 175 { 176 struct thread_struct *thread = ¤t->thread; 177 int mask, flags; 178 179 mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags); 180 flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR); 181 if (flags) 182 __save_user_fpu_regs(thread, flags); 183 barrier(); 184 WRITE_ONCE(thread->kfpu_flags, mask); 185 } 186 187 static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags) 188 { 189 struct thread_struct *thread = ¤t->thread; 190 int mask, uflags; 191 192 mask = __atomic_or(flags, &thread->kfpu_flags); 193 state->hdr.mask = mask; 194 uflags = READ_ONCE(thread->ufpu_flags); 195 if ((uflags & flags) != flags) 196 __save_user_fpu_regs(thread, ~uflags & flags); 197 if (mask & flags) 198 __kernel_fpu_begin(state, flags); 199 } 200 201 static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags) 202 { 203 int mask = state->hdr.mask; 204 205 if (mask & flags) 206 __kernel_fpu_end(state, flags); 207 barrier(); 208 WRITE_ONCE(current->thread.kfpu_flags, mask); 209 } 210 211 void __kernel_fpu_invalid_size(void); 212 213 static __always_inline void kernel_fpu_check_size(int flags, unsigned int size) 214 { 215 unsigned int cnt = 0; 216 217 if (flags & KERNEL_VXR_V0V7) 218 cnt += 8; 219 if (flags & KERNEL_VXR_V8V15) 220 cnt += 8; 221 if (flags & KERNEL_VXR_V16V23) 222 cnt += 8; 223 if (flags & KERNEL_VXR_V24V31) 224 cnt += 8; 225 if (cnt != size) 226 __kernel_fpu_invalid_size(); 227 } 228 229 #define kernel_fpu_begin(state, flags) \ 230 { \ 231 typeof(state) s = (state); \ 232 int _flags = (flags); \ 233 \ 234 kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \ 235 _kernel_fpu_begin((struct kernel_fpu *)s, _flags); \ 236 } 237 238 #define kernel_fpu_end(state, flags) \ 239 { \ 240 typeof(state) s = (state); \ 241 int _flags = (flags); \ 242 \ 243 kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \ 244 _kernel_fpu_end((struct kernel_fpu *)s, _flags); \ 245 } 246 247 static inline void save_kernel_fpu_regs(struct thread_struct *thread) 248 { 249 if (!thread->kfpu_flags) 250 return; 251 save_fpu_state(&thread->kfpu, thread->kfpu_flags); 252 } 253 254 static inline void restore_kernel_fpu_regs(struct thread_struct *thread) 255 { 256 if (!thread->kfpu_flags) 257 return; 258 load_fpu_state(&thread->kfpu, thread->kfpu_flags); 259 } 260 261 static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) 262 { 263 int i; 264 265 for (i = 0; i < __NUM_FPRS; i++) 266 fprs[i].ui = vxrs[i].high; 267 } 268 269 static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) 270 { 271 int i; 272 273 for (i = 0; i < __NUM_FPRS; i++) 274 vxrs[i].high = fprs[i].ui; 275 } 276 277 static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) 278 { 279 fpregs->pad = 0; 280 fpregs->fpc = fpu->fpc; 281 convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); 282 } 283 284 static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) 285 { 286 fpu->fpc = fpregs->fpc; 287 convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); 288 } 289 290 #endif /* _ASM_S390_FPU_H */ 291