xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- SIModeRegisterDefaults.cpp ------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIModeRegisterDefaults.h"
10 #include "GCNSubtarget.h"
11 
12 using namespace llvm;
13 
SIModeRegisterDefaults(const Function & F,const GCNSubtarget & ST)14 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
15                                                const GCNSubtarget &ST) {
16   *this = getDefaultForCallingConv(F.getCallingConv());
17 
18   if (ST.hasIEEEMode()) {
19     StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
20     if (!IEEEAttr.empty())
21       IEEE = IEEEAttr == "true";
22   }
23 
24   if (ST.hasDX10ClampMode()) {
25     StringRef DX10ClampAttr =
26         F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
27     if (!DX10ClampAttr.empty())
28       DX10Clamp = DX10ClampAttr == "true";
29   }
30 
31   StringRef DenormF32Attr =
32       F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
33   if (!DenormF32Attr.empty())
34     FP32Denormals = parseDenormalFPAttribute(DenormF32Attr);
35 
36   StringRef DenormAttr =
37       F.getFnAttribute("denormal-fp-math").getValueAsString();
38   if (!DenormAttr.empty()) {
39     DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
40     if (DenormF32Attr.empty())
41       FP32Denormals = DenormMode;
42     FP64FP16Denormals = DenormMode;
43   }
44 }
45 
46 using namespace AMDGPU;
47 
48 /// Combine f32 and f64 rounding modes into a combined rounding mode value.
getModeRegisterRoundMode(uint32_t HWFP32Val,uint32_t HWFP64Val)49 static constexpr uint32_t getModeRegisterRoundMode(uint32_t HWFP32Val,
50                                                    uint32_t HWFP64Val) {
51   return HWFP32Val << F32FltRoundOffset | HWFP64Val << F64FltRoundOffset;
52 }
53 
encodeFltRoundsTable(uint32_t FltRoundsVal,uint32_t HWF32Val,uint32_t HWF64Val)54 static constexpr uint64_t encodeFltRoundsTable(uint32_t FltRoundsVal,
55                                                uint32_t HWF32Val,
56                                                uint32_t HWF64Val) {
57   uint32_t ModeVal = getModeRegisterRoundMode(HWF32Val, HWF64Val);
58   if (FltRoundsVal > TowardNegative)
59     FltRoundsVal -= ExtendedFltRoundOffset;
60 
61   uint32_t BitIndex = ModeVal << 2;
62   return static_cast<uint64_t>(FltRoundsVal) << BitIndex;
63 }
64 
65 // Encode FLT_ROUNDS value where the two rounding modes are the same and use a
66 // standard value
67 static constexpr uint64_t
encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode,uint32_t HWVal)68 encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode, uint32_t HWVal) {
69   return encodeFltRoundsTable(FltRoundsMode, HWVal, HWVal);
70 }
71 
72 // Convert mode register encoded rounding mode to AMDGPUFltRounds
73 static constexpr AMDGPUFltRounds
decodeIndexFltRoundConversionTable(uint32_t HWMode)74 decodeIndexFltRoundConversionTable(uint32_t HWMode) {
75   uint32_t TableRead = (FltRoundConversionTable >> (HWMode << 2)) & 0xf;
76   if (TableRead > TowardNegative)
77     TableRead += ExtendedFltRoundOffset;
78   return static_cast<AMDGPUFltRounds>(TableRead);
79 }
80 
81 static constexpr uint32_t HWTowardZero = FP_ROUND_ROUND_TO_ZERO;
82 static constexpr uint32_t HWNearestTiesToEven = FP_ROUND_ROUND_TO_NEAREST;
83 static constexpr uint32_t HWTowardPositive = FP_ROUND_ROUND_TO_INF;
84 static constexpr uint32_t HWTowardNegative = FP_ROUND_ROUND_TO_NEGINF;
85 
86 const uint64_t AMDGPU::FltRoundConversionTable =
87     encodeFltRoundsTableSame(TowardZeroF32_TowardZeroF64, HWTowardZero) |
88     encodeFltRoundsTableSame(NearestTiesToEvenF32_NearestTiesToEvenF64,
89                              HWNearestTiesToEven) |
90     encodeFltRoundsTableSame(TowardPositiveF32_TowardPositiveF64,
91                              HWTowardPositive) |
92     encodeFltRoundsTableSame(TowardNegativeF32_TowardNegativeF64,
93                              HWTowardNegative) |
94 
95     encodeFltRoundsTable(TowardZeroF32_NearestTiesToEvenF64, HWTowardZero,
96                          HWNearestTiesToEven) |
97     encodeFltRoundsTable(TowardZeroF32_TowardPositiveF64, HWTowardZero,
98                          HWTowardPositive) |
99     encodeFltRoundsTable(TowardZeroF32_TowardNegativeF64, HWTowardZero,
100                          HWTowardNegative) |
101 
102     encodeFltRoundsTable(NearestTiesToEvenF32_TowardZeroF64,
103                          HWNearestTiesToEven, HWTowardZero) |
104     encodeFltRoundsTable(NearestTiesToEvenF32_TowardPositiveF64,
105                          HWNearestTiesToEven, HWTowardPositive) |
106     encodeFltRoundsTable(NearestTiesToEvenF32_TowardNegativeF64,
107                          HWNearestTiesToEven, HWTowardNegative) |
108 
109     encodeFltRoundsTable(TowardPositiveF32_TowardZeroF64, HWTowardPositive,
110                          HWTowardZero) |
111     encodeFltRoundsTable(TowardPositiveF32_NearestTiesToEvenF64,
112                          HWTowardPositive, HWNearestTiesToEven) |
113     encodeFltRoundsTable(TowardPositiveF32_TowardNegativeF64, HWTowardPositive,
114                          HWTowardNegative) |
115 
116     encodeFltRoundsTable(TowardNegativeF32_TowardZeroF64, HWTowardNegative,
117                          HWTowardZero) |
118     encodeFltRoundsTable(TowardNegativeF32_NearestTiesToEvenF64,
119                          HWTowardNegative, HWNearestTiesToEven) |
120     encodeFltRoundsTable(TowardNegativeF32_TowardPositiveF64, HWTowardNegative,
121                          HWTowardPositive);
122 
123 // Verify evaluation of FltRoundConversionTable
124 
125 // If both modes are the same, should return the standard values.
126 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
127                   HWTowardZero, HWTowardZero)) == AMDGPUFltRounds::TowardZero);
128 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
129                   HWNearestTiesToEven, HWNearestTiesToEven)) ==
130               AMDGPUFltRounds::NearestTiesToEven);
131 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
132                   HWTowardPositive, HWTowardPositive)) ==
133               AMDGPUFltRounds::TowardPositive);
134 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
135                   HWTowardNegative, HWTowardNegative)) ==
136               AMDGPUFltRounds::TowardNegative);
137 
138 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
139                   HWTowardZero, HWNearestTiesToEven)) ==
140               TowardZeroF32_NearestTiesToEvenF64);
141 static_assert(decodeIndexFltRoundConversionTable(
142                   getModeRegisterRoundMode(HWTowardZero, HWTowardPositive)) ==
143               TowardZeroF32_TowardPositiveF64);
144 static_assert(decodeIndexFltRoundConversionTable(
145                   getModeRegisterRoundMode(HWTowardZero, HWTowardNegative)) ==
146               TowardZeroF32_TowardNegativeF64);
147 
148 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
149                   HWNearestTiesToEven, HWTowardZero)) ==
150               NearestTiesToEvenF32_TowardZeroF64);
151 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
152                   HWNearestTiesToEven, HWTowardPositive)) ==
153               NearestTiesToEvenF32_TowardPositiveF64);
154 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
155                   HWNearestTiesToEven, HWTowardNegative)) ==
156               NearestTiesToEvenF32_TowardNegativeF64);
157 
158 static_assert(decodeIndexFltRoundConversionTable(
159                   getModeRegisterRoundMode(HWTowardPositive, HWTowardZero)) ==
160               TowardPositiveF32_TowardZeroF64);
161 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
162                   HWTowardPositive, HWNearestTiesToEven)) ==
163               TowardPositiveF32_NearestTiesToEvenF64);
164 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
165                   HWTowardPositive, HWTowardNegative)) ==
166               TowardPositiveF32_TowardNegativeF64);
167 
168 static_assert(decodeIndexFltRoundConversionTable(
169                   getModeRegisterRoundMode(HWTowardNegative, HWTowardZero)) ==
170               TowardNegativeF32_TowardZeroF64);
171 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
172                   HWTowardNegative, HWNearestTiesToEven)) ==
173               TowardNegativeF32_NearestTiesToEvenF64);
174 static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
175                   HWTowardNegative, HWTowardPositive)) ==
176               TowardNegativeF32_TowardPositiveF64);
177 
178 // Decode FLT_ROUNDS into the hardware value where the two rounding modes are
179 // the same and use a standard value
encodeFltRoundsToHWTableSame(uint32_t HWVal,uint32_t FltRoundsVal)180 static constexpr uint64_t encodeFltRoundsToHWTableSame(uint32_t HWVal,
181                                                        uint32_t FltRoundsVal) {
182   if (FltRoundsVal > TowardNegative)
183     FltRoundsVal -= ExtendedFltRoundOffset;
184 
185   return static_cast<uint64_t>(getModeRegisterRoundMode(HWVal, HWVal))
186          << (FltRoundsVal << 2);
187 }
188 
189 /// Decode FLT_ROUNDS into the hardware value where the two rounding modes
190 /// different and use an extended value.
encodeFltRoundsToHWTable(uint32_t HWF32Val,uint32_t HWF64Val,uint32_t FltRoundsVal)191 static constexpr uint64_t encodeFltRoundsToHWTable(uint32_t HWF32Val,
192                                                    uint32_t HWF64Val,
193                                                    uint32_t FltRoundsVal) {
194   if (FltRoundsVal > TowardNegative)
195     FltRoundsVal -= ExtendedFltRoundOffset;
196   return static_cast<uint64_t>(getModeRegisterRoundMode(HWF32Val, HWF64Val))
197          << (FltRoundsVal << 2);
198 }
199 
200 const uint64_t AMDGPU::FltRoundToHWConversionTable =
201     encodeFltRoundsToHWTableSame(HWTowardZero, TowardZeroF32_TowardZeroF64) |
202     encodeFltRoundsToHWTableSame(HWNearestTiesToEven,
203                                  NearestTiesToEvenF32_NearestTiesToEvenF64) |
204     encodeFltRoundsToHWTableSame(HWTowardPositive,
205                                  TowardPositiveF32_TowardPositiveF64) |
206     encodeFltRoundsToHWTableSame(HWTowardNegative,
207                                  TowardNegativeF32_TowardNegativeF64) |
208 
209     encodeFltRoundsToHWTable(HWTowardZero, HWNearestTiesToEven,
210                              TowardZeroF32_NearestTiesToEvenF64) |
211     encodeFltRoundsToHWTable(HWTowardZero, HWTowardPositive,
212                              TowardZeroF32_TowardPositiveF64) |
213     encodeFltRoundsToHWTable(HWTowardZero, HWTowardNegative,
214                              TowardZeroF32_TowardNegativeF64) |
215 
216     encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardZero,
217                              NearestTiesToEvenF32_TowardZeroF64) |
218     encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardPositive,
219                              NearestTiesToEvenF32_TowardPositiveF64) |
220     encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardNegative,
221                              NearestTiesToEvenF32_TowardNegativeF64) |
222 
223     encodeFltRoundsToHWTable(HWTowardPositive, HWTowardZero,
224                              TowardPositiveF32_TowardZeroF64) |
225     encodeFltRoundsToHWTable(HWTowardPositive, HWNearestTiesToEven,
226                              TowardPositiveF32_NearestTiesToEvenF64) |
227     encodeFltRoundsToHWTable(HWTowardPositive, HWTowardNegative,
228                              TowardPositiveF32_TowardNegativeF64) |
229 
230     encodeFltRoundsToHWTable(HWTowardNegative, HWTowardZero,
231                              TowardNegativeF32_TowardZeroF64) |
232     encodeFltRoundsToHWTable(HWTowardNegative, HWNearestTiesToEven,
233                              TowardNegativeF32_NearestTiesToEvenF64) |
234     encodeFltRoundsToHWTable(HWTowardNegative, HWTowardPositive,
235                              TowardNegativeF32_TowardPositiveF64);
236 
237 /// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
238 static constexpr uint32_t
decodeFltRoundToHWConversionTable(uint64_t FltRoundToHWConversionTable,uint32_t FltRounds)239 decodeFltRoundToHWConversionTable(uint64_t FltRoundToHWConversionTable,
240                                   uint32_t FltRounds) {
241   uint32_t IndexVal = FltRounds;
242   if (IndexVal > TowardNegative)
243     IndexVal -= ExtendedFltRoundOffset;
244   return (FltRoundToHWConversionTable >> (IndexVal << 2)) & 0xf;
245 }
246 
decodeFltRoundToHWConversionTable(uint32_t FltRounds)247 uint32_t AMDGPU::decodeFltRoundToHWConversionTable(uint32_t FltRounds) {
248   return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
249                                              FltRounds);
250 }
251 
decodeFltRoundToHW(uint32_t FltRounds)252 static constexpr uint32_t decodeFltRoundToHW(uint32_t FltRounds) {
253   return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
254                                              FltRounds);
255 }
256 
257 // Verify evaluation of FltRoundToHWConversionTable
258 
259 static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardZero) ==
260               getModeRegisterRoundMode(HWTowardZero, HWTowardZero));
261 static_assert(decodeFltRoundToHW(AMDGPUFltRounds::NearestTiesToEven) ==
262               getModeRegisterRoundMode(HWNearestTiesToEven,
263                                        HWNearestTiesToEven));
264 static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardPositive) ==
265               getModeRegisterRoundMode(HWTowardPositive, HWTowardPositive));
266 static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardNegative) ==
267               getModeRegisterRoundMode(HWTowardNegative, HWTowardNegative));
268 
269 static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardPositiveF64) ==
270               getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardPositive));
271 static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardNegativeF64) ==
272               getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardNegative));
273 static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardZeroF64) ==
274               getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardZero));
275 
276 static_assert(decodeFltRoundToHW(TowardPositiveF32_NearestTiesToEvenF64) ==
277               getModeRegisterRoundMode(HWTowardPositive, HWNearestTiesToEven));
278 static_assert(decodeFltRoundToHW(TowardPositiveF32_TowardNegativeF64) ==
279               getModeRegisterRoundMode(HWTowardPositive, HWTowardNegative));
280 static_assert(decodeFltRoundToHW(TowardPositiveF32_TowardZeroF64) ==
281               getModeRegisterRoundMode(HWTowardPositive, HWTowardZero));
282 
283 static_assert(decodeFltRoundToHW(TowardNegativeF32_NearestTiesToEvenF64) ==
284               getModeRegisterRoundMode(HWTowardNegative, HWNearestTiesToEven));
285 static_assert(decodeFltRoundToHW(TowardNegativeF32_TowardPositiveF64) ==
286               getModeRegisterRoundMode(HWTowardNegative, HWTowardPositive));
287 static_assert(decodeFltRoundToHW(TowardNegativeF32_TowardZeroF64) ==
288               getModeRegisterRoundMode(HWTowardNegative, HWTowardZero));
289 
290 static_assert(decodeFltRoundToHW(TowardZeroF32_NearestTiesToEvenF64) ==
291               getModeRegisterRoundMode(HWTowardZero, HWNearestTiesToEven));
292 static_assert(decodeFltRoundToHW(TowardZeroF32_TowardPositiveF64) ==
293               getModeRegisterRoundMode(HWTowardZero, HWTowardPositive));
294 static_assert(decodeFltRoundToHW(TowardZeroF32_TowardNegativeF64) ==
295               getModeRegisterRoundMode(HWTowardZero, HWTowardNegative));
296