1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __ARCH_S390_PERCPU__ 3 #define __ARCH_S390_PERCPU__ 4 5 #include <linux/preempt.h> 6 #include <asm/cmpxchg.h> 7 #include <asm/march.h> 8 9 /* 10 * s390 uses its own implementation for per cpu data, the offset of 11 * the cpu local data area is cached in the cpu's lowcore memory. 12 */ 13 #define __my_cpu_offset get_lowcore()->percpu_offset 14 15 #define arch_raw_cpu_ptr(_ptr) \ 16 ({ \ 17 unsigned long lc_percpu, tcp_ptr__; \ 18 \ 19 tcp_ptr__ = (__force unsigned long)(_ptr); \ 20 lc_percpu = offsetof(struct lowcore, percpu_offset); \ 21 asm_inline volatile( \ 22 ALTERNATIVE("ag %[__ptr__],%[offzero](%%r0)\n", \ 23 "ag %[__ptr__],%[offalt](%%r0)\n", \ 24 ALT_FEATURE(MFEATURE_LOWCORE)) \ 25 : [__ptr__] "+d" (tcp_ptr__) \ 26 : [offzero] "i" (lc_percpu), \ 27 [offalt] "i" (lc_percpu + LOWCORE_ALT_ADDRESS), \ 28 "m" (((struct lowcore *)0)->percpu_offset) \ 29 : "cc"); \ 30 (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__; \ 31 }) 32 33 /* 34 * We use a compare-and-swap loop since that uses less cpu cycles than 35 * disabling and enabling interrupts like the generic variant would do. 36 */ 37 #define arch_this_cpu_to_op_simple(pcp, val, op) \ 38 ({ \ 39 typedef typeof(pcp) pcp_op_T__; \ 40 pcp_op_T__ old__, new__, prev__; \ 41 pcp_op_T__ *ptr__; \ 42 preempt_disable_notrace(); \ 43 ptr__ = raw_cpu_ptr(&(pcp)); \ 44 prev__ = READ_ONCE(*ptr__); \ 45 do { \ 46 old__ = prev__; \ 47 new__ = old__ op (val); \ 48 prev__ = cmpxchg(ptr__, old__, new__); \ 49 } while (prev__ != old__); \ 50 preempt_enable_notrace(); \ 51 new__; \ 52 }) 53 54 #define this_cpu_add_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) 55 #define this_cpu_add_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) 56 #define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) 57 #define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) 58 #define this_cpu_and_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) 59 #define this_cpu_and_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) 60 #define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) 61 #define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) 62 63 /* 64 * Macros to be used for percpu code section based on atomic instructions. 65 * 66 * Avoid the need to use preempt_disable() / preempt_disable() pairs and the 67 * conditional preempt_schedule_notrace() function calls which come with 68 * this. The idea is that this_cpu operations based on atomic instructions are 69 * guarded with mviy instructions: 70 * 71 * - The first mviy instruction writes the register number, which contains the 72 * percpu address variable to lowcore. This also indicates that a percpu 73 * code section is executed. 74 * 75 * - The first mviy instruction following the mviy instruction must be the ag 76 * instruction which adds the percpu offset to the percpu address register. 77 * 78 * - Afterwards the atomic percpu operation follows. 79 * 80 * - Then a second mviy instruction writes a zero to lowcore, which indicates 81 * the end of the percpu code section. 82 * 83 * - In case of an interrupt/exception/nmi the register number which was 84 * written to lowcore is copied to the exception frame (pt_regs), and a zero 85 * is written to lowcore. 86 * 87 * - On return to the previous context it is checked if a percpu code section 88 * was executed (saved register number not zero), and if the process was 89 * migrated to a different cpu. If the percpu offset was already added to 90 * the percpu address register (instruction address does _not_ point to the 91 * ag instruction) the content of the percpu address register is adjusted so 92 * it points to percpu variable of the new cpu. 93 * 94 * Inline assemblies making use of this typically have a code sequence like: 95 * 96 * MVIY_PERCPU(...) <- start of percpu code section 97 * AG_ALT(...) <- add percpu offset; must be the second instruction 98 * atomic_op <- atomic op 99 * MVIY_ALT(...) <- end of percpu code section 100 */ 101 102 #define MVIY_PERCPU(disp, dispalt, reg) \ 103 ".macro GEN_MVIY disp reg\n" \ 104 ".irp rs,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n" \ 105 " .ifc \\reg,%%r\\rs\n" \ 106 " mviy \\disp(%%r0),\\rs\n" \ 107 " .endif\n" \ 108 ".endr\n" \ 109 ".endm\n" \ 110 ALTERNATIVE("GEN_MVIY " __stringify(disp) " " __stringify(reg) "\n", \ 111 "GEN_MVIY " __stringify(dispalt) " " __stringify(reg) "\n", \ 112 ALT_FEATURE(MFEATURE_LOWCORE)) \ 113 ".purgem GEN_MVIY\n" 114 115 #define MVIY_ALT(disp, dispalt) \ 116 ALTERNATIVE(" mviy " disp "(%%r0),0\n", \ 117 " mviy " dispalt "(%%r0),0\n", \ 118 ALT_FEATURE(MFEATURE_LOWCORE)) 119 120 #define AG_ALT(disp, dispalt, reg) \ 121 ALTERNATIVE(" ag " reg ", " disp "(%%r0)\n", \ 122 " ag " reg ", " dispalt "(%%r0)\n", \ 123 ALT_FEATURE(MFEATURE_LOWCORE)) 124 125 #ifndef MARCH_HAS_Z196_FEATURES 126 127 #define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) 128 #define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) 129 #define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) 130 #define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) 131 #define this_cpu_and_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) 132 #define this_cpu_and_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) 133 #define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) 134 #define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) 135 136 #else /* MARCH_HAS_Z196_FEATURES */ 137 138 #define arch_this_cpu_add(pcp, val, op1, op2, szcast) \ 139 do { \ 140 unsigned long lc_pcpr, lc_pcpo; \ 141 typedef typeof(pcp) pcp_op_T__; \ 142 pcp_op_T__ val__ = (val); \ 143 pcp_op_T__ old__, *ptr__; \ 144 \ 145 lc_pcpr = offsetof(struct lowcore, percpu_register); \ 146 lc_pcpo = offsetof(struct lowcore, percpu_offset); \ 147 ptr__ = PERCPU_PTR(&(pcp)); \ 148 if (__builtin_constant_p(val__) && \ 149 ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ 150 asm volatile( \ 151 MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\ 152 AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]") \ 153 op2 " 0(%[ptr__]),%[val__]\n" \ 154 MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]") \ 155 : [ptr__] "+&a" (ptr__), "+m" (*ptr__), \ 156 "=m" (((struct lowcore *)0)->percpu_register) \ 157 : [val__] "i" ((szcast)val__), \ 158 [disppcpr] "i" (lc_pcpr), \ 159 [disppcpo] "i" (lc_pcpo), \ 160 [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS), \ 161 [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS), \ 162 "m" (((struct lowcore *)0)->percpu_offset) \ 163 : "cc"); \ 164 } else { \ 165 asm volatile( \ 166 MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\ 167 AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]") \ 168 op1 " %[old__],%[val__],0(%[ptr__])\n" \ 169 MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]") \ 170 : [old__] "=&d" (old__), \ 171 [ptr__] "+&a" (ptr__), "+m" (*ptr__), \ 172 "=m" (((struct lowcore *)0)->percpu_register) \ 173 : [val__] "d" (val__), \ 174 [disppcpr] "i" (lc_pcpr), \ 175 [disppcpo] "i" (lc_pcpo), \ 176 [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS), \ 177 [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS), \ 178 "m" (((struct lowcore *)0)->percpu_offset) \ 179 : "cc"); \ 180 } \ 181 } while (0) 182 183 #define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int) 184 #define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long) 185 186 #define arch_this_cpu_add_return(pcp, val, op) \ 187 ({ \ 188 unsigned long lc_pcpr, lc_pcpo; \ 189 typedef typeof(pcp) pcp_op_T__; \ 190 pcp_op_T__ val__ = (val); \ 191 pcp_op_T__ old__, *ptr__; \ 192 \ 193 lc_pcpr = offsetof(struct lowcore, percpu_register); \ 194 lc_pcpo = offsetof(struct lowcore, percpu_offset); \ 195 ptr__ = PERCPU_PTR(&(pcp)); \ 196 asm_inline volatile( \ 197 MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\ 198 AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]") \ 199 op " %[old__],%[val__],0(%[ptr__])\n" \ 200 MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]") \ 201 : [old__] "=&d" (old__), \ 202 [ptr__] "+&a" (ptr__), "+m" (*ptr__), \ 203 "=m" (((struct lowcore *)0)->percpu_register) \ 204 : [val__] "d" (val__), \ 205 [disppcpr] "i" (lc_pcpr), \ 206 [disppcpo] "i" (lc_pcpo), \ 207 [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS), \ 208 [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS), \ 209 "m" (((struct lowcore *)0)->percpu_offset) \ 210 : "cc"); \ 211 old__ + val__; \ 212 }) 213 214 #define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa") 215 #define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag") 216 217 #define arch_this_cpu_to_op(pcp, val, op) \ 218 do { \ 219 unsigned long lc_pcpr, lc_pcpo; \ 220 typedef typeof(pcp) pcp_op_T__; \ 221 pcp_op_T__ val__ = (val); \ 222 pcp_op_T__ old__, *ptr__; \ 223 \ 224 lc_pcpr = offsetof(struct lowcore, percpu_register); \ 225 lc_pcpo = offsetof(struct lowcore, percpu_offset); \ 226 ptr__ = PERCPU_PTR(&(pcp)); \ 227 asm_inline volatile( \ 228 MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\ 229 AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]") \ 230 op " %[old__],%[val__],0(%[ptr__])\n" \ 231 MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]") \ 232 : [old__] "=&d" (old__), \ 233 [ptr__] "+&a" (ptr__), "+m" (*ptr__), \ 234 "=m" (((struct lowcore *)0)->percpu_register) \ 235 : [val__] "d" (val__), \ 236 [disppcpr] "i" (lc_pcpr), \ 237 [disppcpo] "i" (lc_pcpo), \ 238 [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS), \ 239 [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS), \ 240 "m" (((struct lowcore *)0)->percpu_offset) \ 241 : "cc"); \ 242 } while (0) 243 244 #define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan") 245 #define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, "lang") 246 #define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao") 247 #define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog") 248 249 #endif /* MARCH_HAS_Z196_FEATURES */ 250 251 #define arch_this_cpu_read(pcp, op) \ 252 ({ \ 253 unsigned long lc_pcpr, lc_pcpo, res__; \ 254 typedef typeof(pcp) pcp_op_T__; \ 255 pcp_op_T__ *ptr__; \ 256 \ 257 lc_pcpr = offsetof(struct lowcore, percpu_register); \ 258 lc_pcpo = offsetof(struct lowcore, percpu_offset); \ 259 ptr__ = PERCPU_PTR(&(pcp)); \ 260 asm_inline volatile( \ 261 MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\ 262 AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]") \ 263 op " %[res__],0(%[ptr__])\n" \ 264 MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]") \ 265 : [res__] "=&d" (res__), [ptr__] "+&a" (ptr__), \ 266 "=m" (((struct lowcore *)0)->percpu_register) \ 267 : [disppcpr] "i" (lc_pcpr), \ 268 [disppcpo] "i" (lc_pcpo), \ 269 [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS), \ 270 [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS), \ 271 "m" (*ptr__), \ 272 "m" (((struct lowcore *)0)->percpu_offset) \ 273 : "cc"); \ 274 (pcp_op_T__)res__; \ 275 }) 276 277 #define this_cpu_read_1(pcp) arch_this_cpu_read(pcp, "llgc") 278 #define this_cpu_read_2(pcp) arch_this_cpu_read(pcp, "llgh") 279 #define this_cpu_read_4(pcp) arch_this_cpu_read(pcp, "llgf") 280 #define this_cpu_read_8(pcp) arch_this_cpu_read(pcp, "lg") 281 282 #define arch_this_cpu_write(pcp, val, op) \ 283 do { \ 284 unsigned long lc_pcpr, lc_pcpo; \ 285 typedef typeof(pcp) pcp_op_T__; \ 286 pcp_op_T__ *ptr__, val__ = (val); \ 287 \ 288 lc_pcpr = offsetof(struct lowcore, percpu_register); \ 289 lc_pcpo = offsetof(struct lowcore, percpu_offset); \ 290 ptr__ = PERCPU_PTR(&(pcp)); \ 291 asm_inline volatile( \ 292 MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\ 293 AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]") \ 294 op " %[val__],0(%[ptr__])\n" \ 295 MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]") \ 296 : [ptr__] "+&a" (ptr__), "=m" (*ptr__), \ 297 "=m" (((struct lowcore *)0)->percpu_register) \ 298 : [val__] "d" (val__), \ 299 [disppcpr] "i" (lc_pcpr), \ 300 [disppcpo] "i" (lc_pcpo), \ 301 [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS), \ 302 [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS), \ 303 "m" (((struct lowcore *)0)->percpu_offset) \ 304 : "cc"); \ 305 } while (0) 306 307 #define this_cpu_write_1(pcp, val) arch_this_cpu_write(pcp, val, "stc") 308 #define this_cpu_write_2(pcp, val) arch_this_cpu_write(pcp, val, "sth") 309 #define this_cpu_write_4(pcp, val) arch_this_cpu_write(pcp, val, "st") 310 #define this_cpu_write_8(pcp, val) arch_this_cpu_write(pcp, val, "stg") 311 312 #define arch_this_cpu_cmpxchg(pcp, oval, nval) \ 313 ({ \ 314 typedef typeof(pcp) pcp_op_T__; \ 315 pcp_op_T__ ret__; \ 316 pcp_op_T__ *ptr__; \ 317 preempt_disable_notrace(); \ 318 ptr__ = raw_cpu_ptr(&(pcp)); \ 319 ret__ = cmpxchg(ptr__, oval, nval); \ 320 preempt_enable_notrace(); \ 321 ret__; \ 322 }) 323 324 #define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) 325 #define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) 326 #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) 327 #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) 328 329 #define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) 330 331 #define this_cpu_cmpxchg128(pcp, oval, nval) \ 332 ({ \ 333 typedef typeof(pcp) pcp_op_T__; \ 334 u128 old__, new__, ret__; \ 335 pcp_op_T__ *ptr__; \ 336 old__ = oval; \ 337 new__ = nval; \ 338 preempt_disable_notrace(); \ 339 ptr__ = raw_cpu_ptr(&(pcp)); \ 340 ret__ = cmpxchg128((void *)ptr__, old__, new__); \ 341 preempt_enable_notrace(); \ 342 ret__; \ 343 }) 344 345 #define arch_this_cpu_xchg(pcp, nval) \ 346 ({ \ 347 typeof(pcp) *ptr__; \ 348 typeof(pcp) ret__; \ 349 preempt_disable_notrace(); \ 350 ptr__ = raw_cpu_ptr(&(pcp)); \ 351 ret__ = xchg(ptr__, nval); \ 352 preempt_enable_notrace(); \ 353 ret__; \ 354 }) 355 356 #define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval) 357 #define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval) 358 #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval) 359 #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval) 360 361 #include <asm-generic/percpu.h> 362 363 #endif /* __ARCH_S390_PERCPU__ */ 364