avxvnniint8intrin.h - OpenGrok cross reference for /freebsd/contrib/llvm-project/clang/lib/Headers/avxvnniint8intrin.h

Lines Matching +full:4 +full:- +full:bit
1 /*===-------- avxvnniint8intrin.h - AVXVNNIINT8 intrinsics -----------===
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7  *===-----------------------------------------------------------------------===
25 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
26 ///    corresponding signed 8-bit integers in \a __B, producing 4 intermediate
27 ///    signed 16-bit results. Sum these 4 results with the corresponding
28 ///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
39 ///    A 128-bit vector of [16 x char].
41 ///    A 128-bit vector of [16 x char].
43 ///    A 128-bit vector of [4 x int].
47 /// 	tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])
48 /// 	tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])
49 /// 	tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])
50 /// 	tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])
62 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
63 ///    corresponding signed 8-bit integers in \a __B, producing 4 intermediate
64 ///    signed 16-bit results. Sum these 4 results with the corresponding
65 ///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
76 ///    A 256-bit vector of [32 x char].
78 ///    A 256-bit vector of [32 x char].
80 ///    A 256-bit vector of [8 x int].
84 /// 	tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])
85 /// 	tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])
86 /// 	tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])
87 /// 	tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])
98 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
99 ///    corresponding signed 8-bit integers in \a __B, producing 4 intermediate
100 ///    signed 16-bit results. Sum these 4 results with the corresponding
101 ///    32-bit integer in \a __W with signed saturation, and store the packed
102 ///    32-bit results in \a dst.
113 ///    A 128-bit vector of [16 x char].
115 ///    A 128-bit vector of [16 x char].
117 ///    A 128-bit vector of [4 x int].
121 /// 	tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])
122 /// 	tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])
123 /// 	tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])
124 /// 	tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])
136 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
137 ///    corresponding signed 8-bit integers in \a __B, producing 4 intermediate
138 ///    signed 16-bit results. Sum these 4 results with the corresponding
139 ///    32-bit integer in \a __W with signed saturation, and store the packed
140 ///    32-bit results in \a dst.
151 ///    A 256-bit vector of [32 x char].
153 ///    A 256-bit vector of [32 x char].
155 ///    A 256-bit vector of [8 x int].
159 /// 	tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])
160 /// 	tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])
161 /// 	tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])
162 /// 	tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])
173 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
174 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
175 ///    signed 16-bit results. Sum these 4 results with the corresponding
176 ///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
187 ///    A 128-bit vector of [16 x char].
189 ///    A 128-bit vector of [16 x unsigned char].
191 ///    A 128-bit vector of [4 x int].
195 /// 	tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]))
196 /// 	tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]))
197 /// 	tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]))
198 /// 	tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]))
210 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
211 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
212 ///    signed 16-bit results. Sum these 4 results with the corresponding
213 ///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
224 ///    A 256-bit vector of [32 x char].
226 ///    A 256-bit vector of [32 x unsigned char].
228 ///    A 256-bit vector of [8 x int].
232 /// 	tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]))
233 /// 	tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]))
234 /// 	tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]))
235 /// 	tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]))
246 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
247 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
248 ///    signed 16-bit results. Sum these 4 results with the corresponding
249 ///    32-bit integer in \a __W with signed saturation, and store the packed
250 ///    32-bit results in \a dst.
261 ///    A 128-bit vector of [16 x char].
263 ///    A 128-bit vector of [16 x unsigned char].
265 ///    A 128-bit vector of [4 x int].
269 /// 	tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]))
270 /// 	tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]))
271 /// 	tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]))
272 /// 	tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]))
284 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
285 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
286 ///    signed 16-bit results. Sum these 4 results with the corresponding
287 ///    32-bit integer in \a __W with signed saturation, and store the packed
288 ///    32-bit results in \a dst.
299 ///    A 256-bit vector of [32 x char].
301 ///    A 256-bit vector of [32 x unsigned char].
303 ///    A 256-bit vector of [8 x int].
307 /// 	tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]))
308 /// 	tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]))
309 /// 	tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]))
310 /// 	tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]))
321 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
322 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
323 ///    signed 16-bit results. Sum these 4 results with the corresponding
324 ///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
335 ///    A 128-bit vector of [16 x unsigned char].
337 ///    A 128-bit vector of [16 x unsigned char].
339 ///    A 128-bit vector of [4 x int].
343 /// 	tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])
344 /// 	tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])
345 /// 	tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])
346 /// 	tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])
358 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
359 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
360 ///    signed 16-bit results. Sum these 4 results with the corresponding
361 ///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
372 ///    A 256-bit vector of [32 x unsigned char].
374 ///    A 256-bit vector of [32 x unsigned char].
376 ///    A 256-bit vector of [8 x int].
380 /// 	tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])
381 /// 	tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])
382 /// 	tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])
383 /// 	tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])
394 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
395 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
396 ///    signed 16-bit results. Sum these 4 results with the corresponding
397 ///    32-bit integer in \a __W with signed saturation, and store the packed
398 ///    32-bit results in \a dst.
409 ///    A 128-bit vector of [16 x unsigned char].
411 ///    A 128-bit vector of [16 x unsigned char].
413 ///    A 128-bit vector of [4 x int].
417 /// 	tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])
418 /// 	tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])
419 /// 	tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])
420 /// 	tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])
432 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
433 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
434 ///    signed 16-bit results. Sum these 4 results with the corresponding
435 ///    32-bit integer in \a __W with signed saturation, and store the packed
436 ///    32-bit results in \a dst.
447 ///    A 256-bit vector of [32 x unsigned char].
449 ///    A 256-bit vector of [32 x unsigned char].
451 ///    A 256-bit vector of [8 x int].
455 /// 	tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])
456 /// 	tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])
457 /// 	tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])
458 /// 	tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])