Skip to main content

core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21pub const fn _mm512_abs_epi32(a: __m512i) -> __m512i {
22    unsafe {
23        let a = a.as_i32x16();
24        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
25        transmute(r)
26    }
27}
28
29/// Computes the absolute value of packed 32-bit integers in `a`, and store the
30/// unsigned results in `dst` using writemask `k` (elements are copied from
31/// `src` when the corresponding mask bit is not set).
32///
33/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
34#[inline]
35#[target_feature(enable = "avx512f")]
36#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37#[cfg_attr(test, assert_instr(vpabsd))]
38#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39pub const fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
40    unsafe {
41        let abs = _mm512_abs_epi32(a).as_i32x16();
42        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
43    }
44}
45
46/// Computes the absolute value of packed 32-bit integers in `a`, and store the
47/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
48/// the corresponding mask bit is not set).
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
51#[inline]
52#[target_feature(enable = "avx512f")]
53#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
54#[cfg_attr(test, assert_instr(vpabsd))]
55#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
56pub const fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
57    unsafe {
58        let abs = _mm512_abs_epi32(a).as_i32x16();
59        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
60    }
61}
62
63/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
64///
65/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
66#[inline]
67#[target_feature(enable = "avx512f,avx512vl")]
68#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
69#[cfg_attr(test, assert_instr(vpabsd))]
70#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
71pub const fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
72    unsafe {
73        let abs = _mm256_abs_epi32(a).as_i32x8();
74        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
75    }
76}
77
78/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
79///
80/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
81#[inline]
82#[target_feature(enable = "avx512f,avx512vl")]
83#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
84#[cfg_attr(test, assert_instr(vpabsd))]
85#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
86pub const fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
87    unsafe {
88        let abs = _mm256_abs_epi32(a).as_i32x8();
89        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
90    }
91}
92
93/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
96#[inline]
97#[target_feature(enable = "avx512f,avx512vl")]
98#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
99#[cfg_attr(test, assert_instr(vpabsd))]
100#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
101pub const fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
102    unsafe {
103        let abs = _mm_abs_epi32(a).as_i32x4();
104        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
105    }
106}
107
108/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
109///
110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
111#[inline]
112#[target_feature(enable = "avx512f,avx512vl")]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114#[cfg_attr(test, assert_instr(vpabsd))]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
117    unsafe {
118        let abs = _mm_abs_epi32(a).as_i32x4();
119        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
120    }
121}
122
123/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
124///
125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
126#[inline]
127#[target_feature(enable = "avx512f")]
128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
129#[cfg_attr(test, assert_instr(vpabsq))]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm512_abs_epi64(a: __m512i) -> __m512i {
132    unsafe {
133        let a = a.as_i64x8();
134        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
135        transmute(r)
136    }
137}
138
139/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
142#[inline]
143#[target_feature(enable = "avx512f")]
144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
145#[cfg_attr(test, assert_instr(vpabsq))]
146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
147pub const fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
148    unsafe {
149        let abs = _mm512_abs_epi64(a).as_i64x8();
150        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
151    }
152}
153
154/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
157#[inline]
158#[target_feature(enable = "avx512f")]
159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
160#[cfg_attr(test, assert_instr(vpabsq))]
161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
162pub const fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
163    unsafe {
164        let abs = _mm512_abs_epi64(a).as_i64x8();
165        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
166    }
167}
168
169/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
170///
171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
172#[inline]
173#[target_feature(enable = "avx512f,avx512vl")]
174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
175#[cfg_attr(test, assert_instr(vpabsq))]
176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
177pub const fn _mm256_abs_epi64(a: __m256i) -> __m256i {
178    unsafe {
179        let a = a.as_i64x4();
180        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
181        transmute(r)
182    }
183}
184
185/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
188#[inline]
189#[target_feature(enable = "avx512f,avx512vl")]
190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
191#[cfg_attr(test, assert_instr(vpabsq))]
192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193pub const fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
194    unsafe {
195        let abs = _mm256_abs_epi64(a).as_i64x4();
196        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
197    }
198}
199
200/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
201///
202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
203#[inline]
204#[target_feature(enable = "avx512f,avx512vl")]
205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
206#[cfg_attr(test, assert_instr(vpabsq))]
207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
208pub const fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
209    unsafe {
210        let abs = _mm256_abs_epi64(a).as_i64x4();
211        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
212    }
213}
214
215/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
216///
217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
218#[inline]
219#[target_feature(enable = "avx512f,avx512vl")]
220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
221#[cfg_attr(test, assert_instr(vpabsq))]
222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
223pub const fn _mm_abs_epi64(a: __m128i) -> __m128i {
224    unsafe {
225        let a = a.as_i64x2();
226        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
227        transmute(r)
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
239pub const fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
240    unsafe {
241        let abs = _mm_abs_epi64(a).as_i64x2();
242        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
243    }
244}
245
246/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
249#[inline]
250#[target_feature(enable = "avx512f,avx512vl")]
251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
252#[cfg_attr(test, assert_instr(vpabsq))]
253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
254pub const fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
255    unsafe {
256        let abs = _mm_abs_epi64(a).as_i64x2();
257        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
258    }
259}
260
261/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
264#[inline]
265#[target_feature(enable = "avx512f")]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267#[cfg_attr(test, assert_instr(vpandd))]
268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
269pub const fn _mm512_abs_ps(v2: __m512) -> __m512 {
270    unsafe { simd_fabs(v2) }
271}
272
273/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
274///
275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
276#[inline]
277#[target_feature(enable = "avx512f")]
278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
279#[cfg_attr(test, assert_instr(vpandd))]
280#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
281pub const fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
282    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
283}
284
285/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
288#[inline]
289#[target_feature(enable = "avx512f")]
290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
291#[cfg_attr(test, assert_instr(vpandq))]
292#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
293pub const fn _mm512_abs_pd(v2: __m512d) -> __m512d {
294    unsafe { simd_fabs(v2) }
295}
296
297/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
300#[inline]
301#[target_feature(enable = "avx512f")]
302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
303#[cfg_attr(test, assert_instr(vpandq))]
304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
305pub const fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
306    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
307}
308
309/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
312#[inline]
313#[target_feature(enable = "avx512f")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vmovdqa32))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
318    unsafe {
319        let mov = a.as_i32x16();
320        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
321    }
322}
323
324/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
327#[inline]
328#[target_feature(enable = "avx512f")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vmovdqa32))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
333    unsafe {
334        let mov = a.as_i32x16();
335        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
336    }
337}
338
339/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
340///
341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
342#[inline]
343#[target_feature(enable = "avx512f,avx512vl")]
344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
345#[cfg_attr(test, assert_instr(vmovdqa32))]
346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
347pub const fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
348    unsafe {
349        let mov = a.as_i32x8();
350        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
351    }
352}
353
354/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
355///
356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
357#[inline]
358#[target_feature(enable = "avx512f,avx512vl")]
359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
360#[cfg_attr(test, assert_instr(vmovdqa32))]
361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
362pub const fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
363    unsafe {
364        let mov = a.as_i32x8();
365        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
366    }
367}
368
369/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
370///
371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
372#[inline]
373#[target_feature(enable = "avx512f,avx512vl")]
374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
375#[cfg_attr(test, assert_instr(vmovdqa32))]
376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
377pub const fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
378    unsafe {
379        let mov = a.as_i32x4();
380        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
381    }
382}
383
384/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
387#[inline]
388#[target_feature(enable = "avx512f,avx512vl")]
389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390#[cfg_attr(test, assert_instr(vmovdqa32))]
391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
392pub const fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
393    unsafe {
394        let mov = a.as_i32x4();
395        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
396    }
397}
398
399/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
400///
401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
402#[inline]
403#[target_feature(enable = "avx512f")]
404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
405#[cfg_attr(test, assert_instr(vmovdqa64))]
406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
407pub const fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
408    unsafe {
409        let mov = a.as_i64x8();
410        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
411    }
412}
413
414/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
415///
416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
417#[inline]
418#[target_feature(enable = "avx512f")]
419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
420#[cfg_attr(test, assert_instr(vmovdqa64))]
421#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
422pub const fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
423    unsafe {
424        let mov = a.as_i64x8();
425        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
437pub const fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
438    unsafe {
439        let mov = a.as_i64x4();
440        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
441    }
442}
443
444/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
445///
446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
447#[inline]
448#[target_feature(enable = "avx512f,avx512vl")]
449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
450#[cfg_attr(test, assert_instr(vmovdqa64))]
451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
452pub const fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
453    unsafe {
454        let mov = a.as_i64x4();
455        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
456    }
457}
458
459/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
460///
461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
462#[inline]
463#[target_feature(enable = "avx512f,avx512vl")]
464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
465#[cfg_attr(test, assert_instr(vmovdqa64))]
466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
467pub const fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
468    unsafe {
469        let mov = a.as_i64x2();
470        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
471    }
472}
473
474/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
475///
476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
477#[inline]
478#[target_feature(enable = "avx512f,avx512vl")]
479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
480#[cfg_attr(test, assert_instr(vmovdqa64))]
481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
482pub const fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
483    unsafe {
484        let mov = a.as_i64x2();
485        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
486    }
487}
488
489/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
490///
491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
492#[inline]
493#[target_feature(enable = "avx512f")]
494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
495#[cfg_attr(test, assert_instr(vmovaps))]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
498    unsafe {
499        let mov = a.as_f32x16();
500        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
501    }
502}
503
504/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
505///
506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
507#[inline]
508#[target_feature(enable = "avx512f")]
509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510#[cfg_attr(test, assert_instr(vmovaps))]
511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
512pub const fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
513    unsafe {
514        let mov = a.as_f32x16();
515        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
516    }
517}
518
519/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
520///
521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
522#[inline]
523#[target_feature(enable = "avx512f,avx512vl")]
524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
525#[cfg_attr(test, assert_instr(vmovaps))]
526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
527pub const fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
528    unsafe {
529        let mov = a.as_f32x8();
530        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
531    }
532}
533
534/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
537#[inline]
538#[target_feature(enable = "avx512f,avx512vl")]
539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
540#[cfg_attr(test, assert_instr(vmovaps))]
541#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
542pub const fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
543    unsafe {
544        let mov = a.as_f32x8();
545        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
546    }
547}
548
549/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
550///
551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
552#[inline]
553#[target_feature(enable = "avx512f,avx512vl")]
554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
555#[cfg_attr(test, assert_instr(vmovaps))]
556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
557pub const fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
558    unsafe {
559        let mov = a.as_f32x4();
560        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
561    }
562}
563
564/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
565///
566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
567#[inline]
568#[target_feature(enable = "avx512f,avx512vl")]
569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
570#[cfg_attr(test, assert_instr(vmovaps))]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
573    unsafe {
574        let mov = a.as_f32x4();
575        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
576    }
577}
578
579/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
580///
581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
582#[inline]
583#[target_feature(enable = "avx512f")]
584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
585#[cfg_attr(test, assert_instr(vmovapd))]
586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
587pub const fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
588    unsafe {
589        let mov = a.as_f64x8();
590        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
591    }
592}
593
594/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595///
596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
597#[inline]
598#[target_feature(enable = "avx512f")]
599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
600#[cfg_attr(test, assert_instr(vmovapd))]
601#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
602pub const fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
603    unsafe {
604        let mov = a.as_f64x8();
605        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
606    }
607}
608
609/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
610///
611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
612#[inline]
613#[target_feature(enable = "avx512f,avx512vl")]
614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
615#[cfg_attr(test, assert_instr(vmovapd))]
616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
617pub const fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
618    unsafe {
619        let mov = a.as_f64x4();
620        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
621    }
622}
623
624/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
625///
626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
627#[inline]
628#[target_feature(enable = "avx512f,avx512vl")]
629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
630#[cfg_attr(test, assert_instr(vmovapd))]
631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
632pub const fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
633    unsafe {
634        let mov = a.as_f64x4();
635        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
636    }
637}
638
639/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
640///
641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
642#[inline]
643#[target_feature(enable = "avx512f,avx512vl")]
644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
645#[cfg_attr(test, assert_instr(vmovapd))]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
648    unsafe {
649        let mov = a.as_f64x2();
650        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
651    }
652}
653
654/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
655///
656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
657#[inline]
658#[target_feature(enable = "avx512f,avx512vl")]
659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
660#[cfg_attr(test, assert_instr(vmovapd))]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
663    unsafe {
664        let mov = a.as_f64x2();
665        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
666    }
667}
668
669/// Add packed 32-bit integers in a and b, and store the results in dst.
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
672#[inline]
673#[target_feature(enable = "avx512f")]
674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675#[cfg_attr(test, assert_instr(vpaddd))]
676#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
677pub const fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
678    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
679}
680
681/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
682///
683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
684#[inline]
685#[target_feature(enable = "avx512f")]
686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
687#[cfg_attr(test, assert_instr(vpaddd))]
688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
689pub const fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
690    unsafe {
691        let add = _mm512_add_epi32(a, b).as_i32x16();
692        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
693    }
694}
695
696/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
697///
698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
699#[inline]
700#[target_feature(enable = "avx512f")]
701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
702#[cfg_attr(test, assert_instr(vpaddd))]
703#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
704pub const fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
705    unsafe {
706        let add = _mm512_add_epi32(a, b).as_i32x16();
707        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
708    }
709}
710
711/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
712///
713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
714#[inline]
715#[target_feature(enable = "avx512f,avx512vl")]
716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
717#[cfg_attr(test, assert_instr(vpaddd))]
718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
719pub const fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
720    unsafe {
721        let add = _mm256_add_epi32(a, b).as_i32x8();
722        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
723    }
724}
725
726/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
727///
728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
729#[inline]
730#[target_feature(enable = "avx512f,avx512vl")]
731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
732#[cfg_attr(test, assert_instr(vpaddd))]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
735    unsafe {
736        let add = _mm256_add_epi32(a, b).as_i32x8();
737        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
738    }
739}
740
741/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
742///
743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
744#[inline]
745#[target_feature(enable = "avx512f,avx512vl")]
746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
747#[cfg_attr(test, assert_instr(vpaddd))]
748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
749pub const fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
750    unsafe {
751        let add = _mm_add_epi32(a, b).as_i32x4();
752        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
753    }
754}
755
756/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
757///
758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
759#[inline]
760#[target_feature(enable = "avx512f,avx512vl")]
761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
762#[cfg_attr(test, assert_instr(vpaddd))]
763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
764pub const fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
765    unsafe {
766        let add = _mm_add_epi32(a, b).as_i32x4();
767        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
768    }
769}
770
771/// Add packed 64-bit integers in a and b, and store the results in dst.
772///
773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
774#[inline]
775#[target_feature(enable = "avx512f")]
776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
777#[cfg_attr(test, assert_instr(vpaddq))]
778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
779pub const fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
780    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
781}
782
783/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
784///
785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
786#[inline]
787#[target_feature(enable = "avx512f")]
788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
789#[cfg_attr(test, assert_instr(vpaddq))]
790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
791pub const fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
792    unsafe {
793        let add = _mm512_add_epi64(a, b).as_i64x8();
794        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
795    }
796}
797
798/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
799///
800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
801#[inline]
802#[target_feature(enable = "avx512f")]
803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
804#[cfg_attr(test, assert_instr(vpaddq))]
805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
806pub const fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
807    unsafe {
808        let add = _mm512_add_epi64(a, b).as_i64x8();
809        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
810    }
811}
812
813/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
816#[inline]
817#[target_feature(enable = "avx512f,avx512vl")]
818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
819#[cfg_attr(test, assert_instr(vpaddq))]
820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
821pub const fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
822    unsafe {
823        let add = _mm256_add_epi64(a, b).as_i64x4();
824        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
825    }
826}
827
828/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
829///
830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
831#[inline]
832#[target_feature(enable = "avx512f,avx512vl")]
833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
834#[cfg_attr(test, assert_instr(vpaddq))]
835#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
836pub const fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
837    unsafe {
838        let add = _mm256_add_epi64(a, b).as_i64x4();
839        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
840    }
841}
842
843/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
844///
845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
846#[inline]
847#[target_feature(enable = "avx512f,avx512vl")]
848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
849#[cfg_attr(test, assert_instr(vpaddq))]
850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
851pub const fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
852    unsafe {
853        let add = _mm_add_epi64(a, b).as_i64x2();
854        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
855    }
856}
857
858/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
859///
860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
861#[inline]
862#[target_feature(enable = "avx512f,avx512vl")]
863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
864#[cfg_attr(test, assert_instr(vpaddq))]
865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
866pub const fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867    unsafe {
868        let add = _mm_add_epi64(a, b).as_i64x2();
869        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
870    }
871}
872
873/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
876#[inline]
877#[target_feature(enable = "avx512f")]
878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879#[cfg_attr(test, assert_instr(vaddps))]
880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
881pub const fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
882    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
883}
884
885/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
886///
887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
888#[inline]
889#[target_feature(enable = "avx512f")]
890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
891#[cfg_attr(test, assert_instr(vaddps))]
892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
893pub const fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
894    unsafe {
895        let add = _mm512_add_ps(a, b).as_f32x16();
896        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
897    }
898}
899
900/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
901///
902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
903#[inline]
904#[target_feature(enable = "avx512f")]
905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
906#[cfg_attr(test, assert_instr(vaddps))]
907#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
908pub const fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
909    unsafe {
910        let add = _mm512_add_ps(a, b).as_f32x16();
911        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
912    }
913}
914
915/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
916///
917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
918#[inline]
919#[target_feature(enable = "avx512f,avx512vl")]
920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
921#[cfg_attr(test, assert_instr(vaddps))]
922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
923pub const fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
924    unsafe {
925        let add = _mm256_add_ps(a, b).as_f32x8();
926        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
927    }
928}
929
930/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
931///
932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
933#[inline]
934#[target_feature(enable = "avx512f,avx512vl")]
935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
936#[cfg_attr(test, assert_instr(vaddps))]
937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
938pub const fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
939    unsafe {
940        let add = _mm256_add_ps(a, b).as_f32x8();
941        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
942    }
943}
944
945/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
946///
947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
948#[inline]
949#[target_feature(enable = "avx512f,avx512vl")]
950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
951#[cfg_attr(test, assert_instr(vaddps))]
952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
953pub const fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
954    unsafe {
955        let add = _mm_add_ps(a, b).as_f32x4();
956        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
957    }
958}
959
960/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
961///
962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
963#[inline]
964#[target_feature(enable = "avx512f,avx512vl")]
965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
966#[cfg_attr(test, assert_instr(vaddps))]
967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
968pub const fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
969    unsafe {
970        let add = _mm_add_ps(a, b).as_f32x4();
971        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
972    }
973}
974
975/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
976///
977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
978#[inline]
979#[target_feature(enable = "avx512f")]
980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
981#[cfg_attr(test, assert_instr(vaddpd))]
982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
983pub const fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
984    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
985}
986
987/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
988///
989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
990#[inline]
991#[target_feature(enable = "avx512f")]
992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
993#[cfg_attr(test, assert_instr(vaddpd))]
994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
995pub const fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
996    unsafe {
997        let add = _mm512_add_pd(a, b).as_f64x8();
998        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
999    }
1000}
1001
1002/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1003///
1004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
1005#[inline]
1006#[target_feature(enable = "avx512f")]
1007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1008#[cfg_attr(test, assert_instr(vaddpd))]
1009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1010pub const fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1011    unsafe {
1012        let add = _mm512_add_pd(a, b).as_f64x8();
1013        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
1014    }
1015}
1016
1017/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1018///
1019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
1020#[inline]
1021#[target_feature(enable = "avx512f,avx512vl")]
1022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1023#[cfg_attr(test, assert_instr(vaddpd))]
1024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1025pub const fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1026    unsafe {
1027        let add = _mm256_add_pd(a, b).as_f64x4();
1028        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
1029    }
1030}
1031
1032/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1033///
1034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
1035#[inline]
1036#[target_feature(enable = "avx512f,avx512vl")]
1037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1038#[cfg_attr(test, assert_instr(vaddpd))]
1039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1040pub const fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1041    unsafe {
1042        let add = _mm256_add_pd(a, b).as_f64x4();
1043        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
1044    }
1045}
1046
1047/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1048///
1049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
1050#[inline]
1051#[target_feature(enable = "avx512f,avx512vl")]
1052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1053#[cfg_attr(test, assert_instr(vaddpd))]
1054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1055pub const fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1056    unsafe {
1057        let add = _mm_add_pd(a, b).as_f64x2();
1058        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
1059    }
1060}
1061
1062/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1063///
1064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
1065#[inline]
1066#[target_feature(enable = "avx512f,avx512vl")]
1067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1068#[cfg_attr(test, assert_instr(vaddpd))]
1069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1070pub const fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1071    unsafe {
1072        let add = _mm_add_pd(a, b).as_f64x2();
1073        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1074    }
1075}
1076
1077/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1078///
1079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1080#[inline]
1081#[target_feature(enable = "avx512f")]
1082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1083#[cfg_attr(test, assert_instr(vpsubd))]
1084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1085pub const fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1086    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1087}
1088
1089/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1090///
1091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1092#[inline]
1093#[target_feature(enable = "avx512f")]
1094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095#[cfg_attr(test, assert_instr(vpsubd))]
1096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1097pub const fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1098    unsafe {
1099        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1100        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1101    }
1102}
1103
1104/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1105///
1106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1107#[inline]
1108#[target_feature(enable = "avx512f")]
1109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110#[cfg_attr(test, assert_instr(vpsubd))]
1111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1112pub const fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1113    unsafe {
1114        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1115        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1116    }
1117}
1118
1119/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1120///
1121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1122#[inline]
1123#[target_feature(enable = "avx512f,avx512vl")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[cfg_attr(test, assert_instr(vpsubd))]
1126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1127pub const fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1128    unsafe {
1129        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1130        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1131    }
1132}
1133
1134/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1135///
1136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1137#[inline]
1138#[target_feature(enable = "avx512f,avx512vl")]
1139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1140#[cfg_attr(test, assert_instr(vpsubd))]
1141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1142pub const fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1143    unsafe {
1144        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1145        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1146    }
1147}
1148
1149/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1150///
1151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1152#[inline]
1153#[target_feature(enable = "avx512f,avx512vl")]
1154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1155#[cfg_attr(test, assert_instr(vpsubd))]
1156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1157pub const fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1158    unsafe {
1159        let sub = _mm_sub_epi32(a, b).as_i32x4();
1160        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1161    }
1162}
1163
1164/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1165///
1166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1167#[inline]
1168#[target_feature(enable = "avx512f,avx512vl")]
1169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1170#[cfg_attr(test, assert_instr(vpsubd))]
1171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1172pub const fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1173    unsafe {
1174        let sub = _mm_sub_epi32(a, b).as_i32x4();
1175        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1176    }
1177}
1178
1179/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1180///
1181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1182#[inline]
1183#[target_feature(enable = "avx512f")]
1184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1185#[cfg_attr(test, assert_instr(vpsubq))]
1186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1187pub const fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1188    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1189}
1190
1191/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1192///
1193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1194#[inline]
1195#[target_feature(enable = "avx512f")]
1196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1197#[cfg_attr(test, assert_instr(vpsubq))]
1198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1199pub const fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1200    unsafe {
1201        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1202        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1203    }
1204}
1205
1206/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1212#[cfg_attr(test, assert_instr(vpsubq))]
1213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1214pub const fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1215    unsafe {
1216        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1217        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1218    }
1219}
1220
1221/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1222///
1223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1224#[inline]
1225#[target_feature(enable = "avx512f,avx512vl")]
1226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1227#[cfg_attr(test, assert_instr(vpsubq))]
1228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1229pub const fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1230    unsafe {
1231        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1232        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1233    }
1234}
1235
1236/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1237///
1238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1239#[inline]
1240#[target_feature(enable = "avx512f,avx512vl")]
1241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1242#[cfg_attr(test, assert_instr(vpsubq))]
1243#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1244pub const fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1245    unsafe {
1246        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1247        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1248    }
1249}
1250
1251/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1252///
1253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1254#[inline]
1255#[target_feature(enable = "avx512f,avx512vl")]
1256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1257#[cfg_attr(test, assert_instr(vpsubq))]
1258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1259pub const fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1260    unsafe {
1261        let sub = _mm_sub_epi64(a, b).as_i64x2();
1262        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1263    }
1264}
1265
1266/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1267///
1268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1269#[inline]
1270#[target_feature(enable = "avx512f,avx512vl")]
1271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1272#[cfg_attr(test, assert_instr(vpsubq))]
1273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1274pub const fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1275    unsafe {
1276        let sub = _mm_sub_epi64(a, b).as_i64x2();
1277        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1278    }
1279}
1280
1281/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1284#[inline]
1285#[target_feature(enable = "avx512f")]
1286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287#[cfg_attr(test, assert_instr(vsubps))]
1288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1289pub const fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1290    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1291}
1292
1293/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1294///
1295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1296#[inline]
1297#[target_feature(enable = "avx512f")]
1298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1299#[cfg_attr(test, assert_instr(vsubps))]
1300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1301pub const fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1302    unsafe {
1303        let sub = _mm512_sub_ps(a, b).as_f32x16();
1304        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1305    }
1306}
1307
1308/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1309///
1310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1311#[inline]
1312#[target_feature(enable = "avx512f")]
1313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1314#[cfg_attr(test, assert_instr(vsubps))]
1315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1316pub const fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1317    unsafe {
1318        let sub = _mm512_sub_ps(a, b).as_f32x16();
1319        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1320    }
1321}
1322
1323/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1324///
1325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1326#[inline]
1327#[target_feature(enable = "avx512f,avx512vl")]
1328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1329#[cfg_attr(test, assert_instr(vsubps))]
1330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1331pub const fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1332    unsafe {
1333        let sub = _mm256_sub_ps(a, b).as_f32x8();
1334        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1335    }
1336}
1337
1338/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1339///
1340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1341#[inline]
1342#[target_feature(enable = "avx512f,avx512vl")]
1343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1344#[cfg_attr(test, assert_instr(vsubps))]
1345#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1346pub const fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1347    unsafe {
1348        let sub = _mm256_sub_ps(a, b).as_f32x8();
1349        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1350    }
1351}
1352
1353/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1354///
1355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1356#[inline]
1357#[target_feature(enable = "avx512f,avx512vl")]
1358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1359#[cfg_attr(test, assert_instr(vsubps))]
1360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1361pub const fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1362    unsafe {
1363        let sub = _mm_sub_ps(a, b).as_f32x4();
1364        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1365    }
1366}
1367
1368/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1369///
1370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1371#[inline]
1372#[target_feature(enable = "avx512f,avx512vl")]
1373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1374#[cfg_attr(test, assert_instr(vsubps))]
1375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1376pub const fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1377    unsafe {
1378        let sub = _mm_sub_ps(a, b).as_f32x4();
1379        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1380    }
1381}
1382
1383/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1384///
1385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1386#[inline]
1387#[target_feature(enable = "avx512f")]
1388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1389#[cfg_attr(test, assert_instr(vsubpd))]
1390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1391pub const fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1392    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1393}
1394
1395/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1396///
1397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1398#[inline]
1399#[target_feature(enable = "avx512f")]
1400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1401#[cfg_attr(test, assert_instr(vsubpd))]
1402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1403pub const fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1404    unsafe {
1405        let sub = _mm512_sub_pd(a, b).as_f64x8();
1406        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1407    }
1408}
1409
1410/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1413#[inline]
1414#[target_feature(enable = "avx512f")]
1415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416#[cfg_attr(test, assert_instr(vsubpd))]
1417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1418pub const fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1419    unsafe {
1420        let sub = _mm512_sub_pd(a, b).as_f64x8();
1421        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1422    }
1423}
1424
1425/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1426///
1427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1428#[inline]
1429#[target_feature(enable = "avx512f,avx512vl")]
1430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1431#[cfg_attr(test, assert_instr(vsubpd))]
1432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1433pub const fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1434    unsafe {
1435        let sub = _mm256_sub_pd(a, b).as_f64x4();
1436        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1437    }
1438}
1439
1440/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1441///
1442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1443#[inline]
1444#[target_feature(enable = "avx512f,avx512vl")]
1445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1446#[cfg_attr(test, assert_instr(vsubpd))]
1447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1448pub const fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1449    unsafe {
1450        let sub = _mm256_sub_pd(a, b).as_f64x4();
1451        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1452    }
1453}
1454
1455/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1456///
1457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1458#[inline]
1459#[target_feature(enable = "avx512f,avx512vl")]
1460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1461#[cfg_attr(test, assert_instr(vsubpd))]
1462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1463pub const fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1464    unsafe {
1465        let sub = _mm_sub_pd(a, b).as_f64x2();
1466        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1467    }
1468}
1469
1470/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1476#[cfg_attr(test, assert_instr(vsubpd))]
1477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1478pub const fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1479    unsafe {
1480        let sub = _mm_sub_pd(a, b).as_f64x2();
1481        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1482    }
1483}
1484
1485/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1486///
1487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1488#[inline]
1489#[target_feature(enable = "avx512f")]
1490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1491#[cfg_attr(test, assert_instr(vpmuldq))]
1492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1493pub const fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1494    unsafe {
1495        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1496        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1497        transmute(simd_mul(a, b))
1498    }
1499}
1500
1501/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1502///
1503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1504#[inline]
1505#[target_feature(enable = "avx512f")]
1506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1507#[cfg_attr(test, assert_instr(vpmuldq))]
1508#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1509pub const fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1510    unsafe {
1511        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1512        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1513    }
1514}
1515
1516/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1517///
1518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1519#[inline]
1520#[target_feature(enable = "avx512f")]
1521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1522#[cfg_attr(test, assert_instr(vpmuldq))]
1523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1524pub const fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1525    unsafe {
1526        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1527        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1528    }
1529}
1530
1531/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1534#[inline]
1535#[target_feature(enable = "avx512f,avx512vl")]
1536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1537#[cfg_attr(test, assert_instr(vpmuldq))]
1538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1539pub const fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1540    unsafe {
1541        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1542        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1543    }
1544}
1545
1546/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1547///
1548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1549#[inline]
1550#[target_feature(enable = "avx512f,avx512vl")]
1551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1552#[cfg_attr(test, assert_instr(vpmuldq))]
1553#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1554pub const fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1555    unsafe {
1556        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1557        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1558    }
1559}
1560
1561/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1562///
1563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1564#[inline]
1565#[target_feature(enable = "avx512f,avx512vl")]
1566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1567#[cfg_attr(test, assert_instr(vpmuldq))]
1568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1569pub const fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1570    unsafe {
1571        let mul = _mm_mul_epi32(a, b).as_i64x2();
1572        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1573    }
1574}
1575
1576/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1577///
1578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1579#[inline]
1580#[target_feature(enable = "avx512f,avx512vl")]
1581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1582#[cfg_attr(test, assert_instr(vpmuldq))]
1583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1584pub const fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1585    unsafe {
1586        let mul = _mm_mul_epi32(a, b).as_i64x2();
1587        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1588    }
1589}
1590
1591/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1594#[inline]
1595#[target_feature(enable = "avx512f")]
1596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1597#[cfg_attr(test, assert_instr(vpmulld))]
1598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1599pub const fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1600    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1601}
1602
1603/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1604///
1605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1606#[inline]
1607#[target_feature(enable = "avx512f")]
1608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1609#[cfg_attr(test, assert_instr(vpmulld))]
1610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1611pub const fn _mm512_mask_mullo_epi32(
1612    src: __m512i,
1613    k: __mmask16,
1614    a: __m512i,
1615    b: __m512i,
1616) -> __m512i {
1617    unsafe {
1618        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1619        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1620    }
1621}
1622
1623/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1624///
1625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1626#[inline]
1627#[target_feature(enable = "avx512f")]
1628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1629#[cfg_attr(test, assert_instr(vpmulld))]
1630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1631pub const fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1632    unsafe {
1633        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1634        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1635    }
1636}
1637
1638/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1639///
1640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1641#[inline]
1642#[target_feature(enable = "avx512f,avx512vl")]
1643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1644#[cfg_attr(test, assert_instr(vpmulld))]
1645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1646pub const fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1647    unsafe {
1648        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1649        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1650    }
1651}
1652
1653/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1654///
1655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1656#[inline]
1657#[target_feature(enable = "avx512f,avx512vl")]
1658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1659#[cfg_attr(test, assert_instr(vpmulld))]
1660#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1661pub const fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1662    unsafe {
1663        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1664        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1665    }
1666}
1667
1668/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1671#[inline]
1672#[target_feature(enable = "avx512f,avx512vl")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpmulld))]
1675#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1676pub const fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1677    unsafe {
1678        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1679        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1680    }
1681}
1682
1683/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1684///
1685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1686#[inline]
1687#[target_feature(enable = "avx512f,avx512vl")]
1688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1689#[cfg_attr(test, assert_instr(vpmulld))]
1690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1691pub const fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1692    unsafe {
1693        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1694        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1695    }
1696}
1697
1698/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1699///
1700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1701///
1702/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1703#[inline]
1704#[target_feature(enable = "avx512f")]
1705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1707pub const fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1708    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1709}
1710
1711/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1712///
1713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1714///
1715/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1716#[inline]
1717#[target_feature(enable = "avx512f")]
1718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1719#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1720pub const fn _mm512_mask_mullox_epi64(
1721    src: __m512i,
1722    k: __mmask8,
1723    a: __m512i,
1724    b: __m512i,
1725) -> __m512i {
1726    unsafe {
1727        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1728        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1729    }
1730}
1731
1732/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1733///
1734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1735#[inline]
1736#[target_feature(enable = "avx512f")]
1737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1738#[cfg_attr(test, assert_instr(vpmuludq))]
1739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1740pub const fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1741    unsafe {
1742        let a = a.as_u64x8();
1743        let b = b.as_u64x8();
1744        let mask = u64x8::splat(u32::MAX as u64);
1745        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1746    }
1747}
1748
1749/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1750///
1751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1752#[inline]
1753#[target_feature(enable = "avx512f")]
1754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1755#[cfg_attr(test, assert_instr(vpmuludq))]
1756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1757pub const fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1758    unsafe {
1759        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1760        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1761    }
1762}
1763
1764/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1765///
1766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1767#[inline]
1768#[target_feature(enable = "avx512f")]
1769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1770#[cfg_attr(test, assert_instr(vpmuludq))]
1771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1772pub const fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1773    unsafe {
1774        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1775        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1776    }
1777}
1778
1779/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1780///
1781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1782#[inline]
1783#[target_feature(enable = "avx512f,avx512vl")]
1784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1785#[cfg_attr(test, assert_instr(vpmuludq))]
1786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787pub const fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1788    unsafe {
1789        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1790        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1791    }
1792}
1793
1794/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1795///
1796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1797#[inline]
1798#[target_feature(enable = "avx512f,avx512vl")]
1799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800#[cfg_attr(test, assert_instr(vpmuludq))]
1801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1802pub const fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1803    unsafe {
1804        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1805        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1806    }
1807}
1808
1809/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1812#[inline]
1813#[target_feature(enable = "avx512f,avx512vl")]
1814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815#[cfg_attr(test, assert_instr(vpmuludq))]
1816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1817pub const fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1818    unsafe {
1819        let mul = _mm_mul_epu32(a, b).as_u64x2();
1820        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1821    }
1822}
1823
1824/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1825///
1826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1827#[inline]
1828#[target_feature(enable = "avx512f,avx512vl")]
1829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1830#[cfg_attr(test, assert_instr(vpmuludq))]
1831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1832pub const fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1833    unsafe {
1834        let mul = _mm_mul_epu32(a, b).as_u64x2();
1835        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1836    }
1837}
1838
1839/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1840///
1841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1842#[inline]
1843#[target_feature(enable = "avx512f")]
1844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1845#[cfg_attr(test, assert_instr(vmulps))]
1846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1847pub const fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1848    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1849}
1850
1851/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1852///
1853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1854#[inline]
1855#[target_feature(enable = "avx512f")]
1856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1857#[cfg_attr(test, assert_instr(vmulps))]
1858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1859pub const fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1860    unsafe {
1861        let mul = _mm512_mul_ps(a, b).as_f32x16();
1862        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1863    }
1864}
1865
1866/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1869#[inline]
1870#[target_feature(enable = "avx512f")]
1871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872#[cfg_attr(test, assert_instr(vmulps))]
1873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1874pub const fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1875    unsafe {
1876        let mul = _mm512_mul_ps(a, b).as_f32x16();
1877        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1878    }
1879}
1880
1881/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1882///
1883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1884#[inline]
1885#[target_feature(enable = "avx512f,avx512vl")]
1886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1887#[cfg_attr(test, assert_instr(vmulps))]
1888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1889pub const fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1890    unsafe {
1891        let mul = _mm256_mul_ps(a, b).as_f32x8();
1892        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1893    }
1894}
1895
1896/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1899#[inline]
1900#[target_feature(enable = "avx512f,avx512vl")]
1901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902#[cfg_attr(test, assert_instr(vmulps))]
1903#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1904pub const fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1905    unsafe {
1906        let mul = _mm256_mul_ps(a, b).as_f32x8();
1907        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1908    }
1909}
1910
1911/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1912///
1913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1914#[inline]
1915#[target_feature(enable = "avx512f,avx512vl")]
1916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1917#[cfg_attr(test, assert_instr(vmulps))]
1918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1919pub const fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1920    unsafe {
1921        let mul = _mm_mul_ps(a, b).as_f32x4();
1922        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1923    }
1924}
1925
1926/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1927///
1928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1929#[inline]
1930#[target_feature(enable = "avx512f,avx512vl")]
1931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1932#[cfg_attr(test, assert_instr(vmulps))]
1933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1934pub const fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1935    unsafe {
1936        let mul = _mm_mul_ps(a, b).as_f32x4();
1937        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1938    }
1939}
1940
1941/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1942///
1943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1944#[inline]
1945#[target_feature(enable = "avx512f")]
1946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1947#[cfg_attr(test, assert_instr(vmulpd))]
1948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1949pub const fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1950    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1951}
1952
1953/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1954///
1955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1956#[inline]
1957#[target_feature(enable = "avx512f")]
1958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1959#[cfg_attr(test, assert_instr(vmulpd))]
1960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1961pub const fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1962    unsafe {
1963        let mul = _mm512_mul_pd(a, b).as_f64x8();
1964        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1965    }
1966}
1967
1968/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1971#[inline]
1972#[target_feature(enable = "avx512f")]
1973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1974#[cfg_attr(test, assert_instr(vmulpd))]
1975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1976pub const fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1977    unsafe {
1978        let mul = _mm512_mul_pd(a, b).as_f64x8();
1979        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1980    }
1981}
1982
1983/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1984///
1985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1986#[inline]
1987#[target_feature(enable = "avx512f,avx512vl")]
1988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1989#[cfg_attr(test, assert_instr(vmulpd))]
1990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1991pub const fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1992    unsafe {
1993        let mul = _mm256_mul_pd(a, b).as_f64x4();
1994        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1995    }
1996}
1997
1998/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1999///
2000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
2001#[inline]
2002#[target_feature(enable = "avx512f,avx512vl")]
2003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2004#[cfg_attr(test, assert_instr(vmulpd))]
2005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2006pub const fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2007    unsafe {
2008        let mul = _mm256_mul_pd(a, b).as_f64x4();
2009        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
2010    }
2011}
2012
2013/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2014///
2015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
2016#[inline]
2017#[target_feature(enable = "avx512f,avx512vl")]
2018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2019#[cfg_attr(test, assert_instr(vmulpd))]
2020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2021pub const fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2022    unsafe {
2023        let mul = _mm_mul_pd(a, b).as_f64x2();
2024        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
2025    }
2026}
2027
2028/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2029///
2030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
2031#[inline]
2032#[target_feature(enable = "avx512f,avx512vl")]
2033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2034#[cfg_attr(test, assert_instr(vmulpd))]
2035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2036pub const fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2037    unsafe {
2038        let mul = _mm_mul_pd(a, b).as_f64x2();
2039        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
2040    }
2041}
2042
2043/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2044///
2045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
2046#[inline]
2047#[target_feature(enable = "avx512f")]
2048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2049#[cfg_attr(test, assert_instr(vdivps))]
2050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2051pub const fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
2052    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
2053}
2054
2055/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2056///
2057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
2058#[inline]
2059#[target_feature(enable = "avx512f")]
2060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2061#[cfg_attr(test, assert_instr(vdivps))]
2062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2063pub const fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2064    unsafe {
2065        let div = _mm512_div_ps(a, b).as_f32x16();
2066        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
2067    }
2068}
2069
2070/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2071///
2072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
2073#[inline]
2074#[target_feature(enable = "avx512f")]
2075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2076#[cfg_attr(test, assert_instr(vdivps))]
2077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2078pub const fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2079    unsafe {
2080        let div = _mm512_div_ps(a, b).as_f32x16();
2081        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
2082    }
2083}
2084
2085/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2086///
2087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
2088#[inline]
2089#[target_feature(enable = "avx512f,avx512vl")]
2090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2091#[cfg_attr(test, assert_instr(vdivps))]
2092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2093pub const fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2094    unsafe {
2095        let div = _mm256_div_ps(a, b).as_f32x8();
2096        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
2097    }
2098}
2099
2100/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2101///
2102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
2103#[inline]
2104#[target_feature(enable = "avx512f,avx512vl")]
2105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2106#[cfg_attr(test, assert_instr(vdivps))]
2107#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2108pub const fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2109    unsafe {
2110        let div = _mm256_div_ps(a, b).as_f32x8();
2111        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
2112    }
2113}
2114
2115/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
2118#[inline]
2119#[target_feature(enable = "avx512f,avx512vl")]
2120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2121#[cfg_attr(test, assert_instr(vdivps))]
2122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2123pub const fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2124    unsafe {
2125        let div = _mm_div_ps(a, b).as_f32x4();
2126        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
2127    }
2128}
2129
2130/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2131///
2132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
2133#[inline]
2134#[target_feature(enable = "avx512f,avx512vl")]
2135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2136#[cfg_attr(test, assert_instr(vdivps))]
2137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2138pub const fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2139    unsafe {
2140        let div = _mm_div_ps(a, b).as_f32x4();
2141        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
2142    }
2143}
2144
2145/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2146///
2147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
2148#[inline]
2149#[target_feature(enable = "avx512f")]
2150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2151#[cfg_attr(test, assert_instr(vdivpd))]
2152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2153pub const fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
2154    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2155}
2156
2157/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2160#[inline]
2161#[target_feature(enable = "avx512f")]
2162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163#[cfg_attr(test, assert_instr(vdivpd))]
2164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2165pub const fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2166    unsafe {
2167        let div = _mm512_div_pd(a, b).as_f64x8();
2168        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2169    }
2170}
2171
2172/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2173///
2174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2175#[inline]
2176#[target_feature(enable = "avx512f")]
2177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2178#[cfg_attr(test, assert_instr(vdivpd))]
2179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2180pub const fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2181    unsafe {
2182        let div = _mm512_div_pd(a, b).as_f64x8();
2183        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2184    }
2185}
2186
2187/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2188///
2189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2190#[inline]
2191#[target_feature(enable = "avx512f,avx512vl")]
2192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2193#[cfg_attr(test, assert_instr(vdivpd))]
2194#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2195pub const fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2196    unsafe {
2197        let div = _mm256_div_pd(a, b).as_f64x4();
2198        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2199    }
2200}
2201
2202/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2203///
2204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2205#[inline]
2206#[target_feature(enable = "avx512f,avx512vl")]
2207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2208#[cfg_attr(test, assert_instr(vdivpd))]
2209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2210pub const fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2211    unsafe {
2212        let div = _mm256_div_pd(a, b).as_f64x4();
2213        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2214    }
2215}
2216
2217/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2218///
2219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2220#[inline]
2221#[target_feature(enable = "avx512f,avx512vl")]
2222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2223#[cfg_attr(test, assert_instr(vdivpd))]
2224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2225pub const fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2226    unsafe {
2227        let div = _mm_div_pd(a, b).as_f64x2();
2228        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2229    }
2230}
2231
2232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2233///
2234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2235#[inline]
2236#[target_feature(enable = "avx512f,avx512vl")]
2237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2238#[cfg_attr(test, assert_instr(vdivpd))]
2239#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2240pub const fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2241    unsafe {
2242        let div = _mm_div_pd(a, b).as_f64x2();
2243        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2244    }
2245}
2246
2247/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2248///
2249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2250#[inline]
2251#[target_feature(enable = "avx512f")]
2252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2253#[cfg_attr(test, assert_instr(vpmaxsd))]
2254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2255pub const fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2256    unsafe { simd_imax(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2257}
2258
2259/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2260///
2261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2262#[inline]
2263#[target_feature(enable = "avx512f")]
2264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2265#[cfg_attr(test, assert_instr(vpmaxsd))]
2266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2267pub const fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2268    unsafe {
2269        let max = _mm512_max_epi32(a, b).as_i32x16();
2270        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2271    }
2272}
2273
2274/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2275///
2276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2277#[inline]
2278#[target_feature(enable = "avx512f")]
2279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2280#[cfg_attr(test, assert_instr(vpmaxsd))]
2281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2282pub const fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2283    unsafe {
2284        let max = _mm512_max_epi32(a, b).as_i32x16();
2285        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2286    }
2287}
2288
2289/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2290///
2291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2292#[inline]
2293#[target_feature(enable = "avx512f,avx512vl")]
2294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2295#[cfg_attr(test, assert_instr(vpmaxsd))]
2296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2297pub const fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2298    unsafe {
2299        let max = _mm256_max_epi32(a, b).as_i32x8();
2300        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2301    }
2302}
2303
2304/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2307#[inline]
2308#[target_feature(enable = "avx512f,avx512vl")]
2309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310#[cfg_attr(test, assert_instr(vpmaxsd))]
2311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2312pub const fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2313    unsafe {
2314        let max = _mm256_max_epi32(a, b).as_i32x8();
2315        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2316    }
2317}
2318
2319/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2320///
2321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2322#[inline]
2323#[target_feature(enable = "avx512f,avx512vl")]
2324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2325#[cfg_attr(test, assert_instr(vpmaxsd))]
2326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2327pub const fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2328    unsafe {
2329        let max = _mm_max_epi32(a, b).as_i32x4();
2330        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2331    }
2332}
2333
2334/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2335///
2336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2337#[inline]
2338#[target_feature(enable = "avx512f,avx512vl")]
2339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2340#[cfg_attr(test, assert_instr(vpmaxsd))]
2341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2342pub const fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2343    unsafe {
2344        let max = _mm_max_epi32(a, b).as_i32x4();
2345        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2346    }
2347}
2348
2349/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2350///
2351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2352#[inline]
2353#[target_feature(enable = "avx512f")]
2354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2355#[cfg_attr(test, assert_instr(vpmaxsq))]
2356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2357pub const fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2358    unsafe { simd_imax(a.as_i64x8(), b.as_i64x8()).as_m512i() }
2359}
2360
2361/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2364#[inline]
2365#[target_feature(enable = "avx512f")]
2366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367#[cfg_attr(test, assert_instr(vpmaxsq))]
2368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2369pub const fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2370    unsafe {
2371        let max = _mm512_max_epi64(a, b).as_i64x8();
2372        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2373    }
2374}
2375
2376/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2377///
2378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2379#[inline]
2380#[target_feature(enable = "avx512f")]
2381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2382#[cfg_attr(test, assert_instr(vpmaxsq))]
2383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2384pub const fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2385    unsafe {
2386        let max = _mm512_max_epi64(a, b).as_i64x8();
2387        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2388    }
2389}
2390
2391/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2392///
2393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2394#[inline]
2395#[target_feature(enable = "avx512f,avx512vl")]
2396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2397#[cfg_attr(test, assert_instr(vpmaxsq))]
2398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2399pub const fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2400    unsafe { simd_imax(a.as_i64x4(), b.as_i64x4()).as_m256i() }
2401}
2402
2403/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404///
2405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2406#[inline]
2407#[target_feature(enable = "avx512f,avx512vl")]
2408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409#[cfg_attr(test, assert_instr(vpmaxsq))]
2410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2411pub const fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2412    unsafe {
2413        let max = _mm256_max_epi64(a, b).as_i64x4();
2414        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2415    }
2416}
2417
2418/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2419///
2420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2421#[inline]
2422#[target_feature(enable = "avx512f,avx512vl")]
2423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2424#[cfg_attr(test, assert_instr(vpmaxsq))]
2425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2426pub const fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2427    unsafe {
2428        let max = _mm256_max_epi64(a, b).as_i64x4();
2429        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2430    }
2431}
2432
2433/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2434///
2435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2436#[inline]
2437#[target_feature(enable = "avx512f,avx512vl")]
2438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2439#[cfg_attr(test, assert_instr(vpmaxsq))]
2440#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2441pub const fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2442    unsafe { simd_imax(a.as_i64x2(), b.as_i64x2()).as_m128i() }
2443}
2444
2445/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2446///
2447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2448#[inline]
2449#[target_feature(enable = "avx512f,avx512vl")]
2450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2451#[cfg_attr(test, assert_instr(vpmaxsq))]
2452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2453pub const fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2454    unsafe {
2455        let max = _mm_max_epi64(a, b).as_i64x2();
2456        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2457    }
2458}
2459
2460/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461///
2462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2463#[inline]
2464#[target_feature(enable = "avx512f,avx512vl")]
2465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466#[cfg_attr(test, assert_instr(vpmaxsq))]
2467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2468pub const fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2469    unsafe {
2470        let max = _mm_max_epi64(a, b).as_i64x2();
2471        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2472    }
2473}
2474
2475/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2476///
2477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2478#[inline]
2479#[target_feature(enable = "avx512f")]
2480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2481#[cfg_attr(test, assert_instr(vmaxps))]
2482pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2483    unsafe {
2484        transmute(vmaxps(
2485            a.as_f32x16(),
2486            b.as_f32x16(),
2487            _MM_FROUND_CUR_DIRECTION,
2488        ))
2489    }
2490}
2491
2492/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2493///
2494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2495#[inline]
2496#[target_feature(enable = "avx512f")]
2497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2498#[cfg_attr(test, assert_instr(vmaxps))]
2499pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2500    unsafe {
2501        let max = _mm512_max_ps(a, b).as_f32x16();
2502        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2503    }
2504}
2505
2506/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2507///
2508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2509#[inline]
2510#[target_feature(enable = "avx512f")]
2511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2512#[cfg_attr(test, assert_instr(vmaxps))]
2513pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2514    unsafe {
2515        let max = _mm512_max_ps(a, b).as_f32x16();
2516        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2517    }
2518}
2519
2520/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2521///
2522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2523#[inline]
2524#[target_feature(enable = "avx512f,avx512vl")]
2525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2526#[cfg_attr(test, assert_instr(vmaxps))]
2527pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2528    unsafe {
2529        let max = _mm256_max_ps(a, b).as_f32x8();
2530        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2531    }
2532}
2533
2534/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2535///
2536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2537#[inline]
2538#[target_feature(enable = "avx512f,avx512vl")]
2539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2540#[cfg_attr(test, assert_instr(vmaxps))]
2541pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2542    unsafe {
2543        let max = _mm256_max_ps(a, b).as_f32x8();
2544        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2545    }
2546}
2547
2548/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2549///
2550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2551#[inline]
2552#[target_feature(enable = "avx512f,avx512vl")]
2553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2554#[cfg_attr(test, assert_instr(vmaxps))]
2555pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2556    unsafe {
2557        let max = _mm_max_ps(a, b).as_f32x4();
2558        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2559    }
2560}
2561
2562/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2563///
2564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2565#[inline]
2566#[target_feature(enable = "avx512f,avx512vl")]
2567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2568#[cfg_attr(test, assert_instr(vmaxps))]
2569pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2570    unsafe {
2571        let max = _mm_max_ps(a, b).as_f32x4();
2572        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2573    }
2574}
2575
2576/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2577///
2578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2579#[inline]
2580#[target_feature(enable = "avx512f")]
2581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2582#[cfg_attr(test, assert_instr(vmaxpd))]
2583pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2584    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2585}
2586
2587/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2588///
2589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2590#[inline]
2591#[target_feature(enable = "avx512f")]
2592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2593#[cfg_attr(test, assert_instr(vmaxpd))]
2594pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2595    unsafe {
2596        let max = _mm512_max_pd(a, b).as_f64x8();
2597        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2598    }
2599}
2600
2601/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2604#[inline]
2605#[target_feature(enable = "avx512f")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vmaxpd))]
2608pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2609    unsafe {
2610        let max = _mm512_max_pd(a, b).as_f64x8();
2611        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2612    }
2613}
2614
2615/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2616///
2617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2618#[inline]
2619#[target_feature(enable = "avx512f,avx512vl")]
2620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2621#[cfg_attr(test, assert_instr(vmaxpd))]
2622pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2623    unsafe {
2624        let max = _mm256_max_pd(a, b).as_f64x4();
2625        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2626    }
2627}
2628
2629/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2632#[inline]
2633#[target_feature(enable = "avx512f,avx512vl")]
2634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635#[cfg_attr(test, assert_instr(vmaxpd))]
2636pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2637    unsafe {
2638        let max = _mm256_max_pd(a, b).as_f64x4();
2639        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2640    }
2641}
2642
2643/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2646#[inline]
2647#[target_feature(enable = "avx512f,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vmaxpd))]
2650pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2651    unsafe {
2652        let max = _mm_max_pd(a, b).as_f64x2();
2653        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2654    }
2655}
2656
2657/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2658///
2659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2660#[inline]
2661#[target_feature(enable = "avx512f,avx512vl")]
2662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2663#[cfg_attr(test, assert_instr(vmaxpd))]
2664pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2665    unsafe {
2666        let max = _mm_max_pd(a, b).as_f64x2();
2667        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2668    }
2669}
2670
2671/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2672///
2673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2674#[inline]
2675#[target_feature(enable = "avx512f")]
2676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2677#[cfg_attr(test, assert_instr(vpmaxud))]
2678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2679pub const fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2680    unsafe { simd_imax(a.as_u32x16(), b.as_u32x16()).as_m512i() }
2681}
2682
2683/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2684///
2685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2686#[inline]
2687#[target_feature(enable = "avx512f")]
2688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2689#[cfg_attr(test, assert_instr(vpmaxud))]
2690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2691pub const fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2692    unsafe {
2693        let max = _mm512_max_epu32(a, b).as_u32x16();
2694        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2695    }
2696}
2697
2698/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2699///
2700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2701#[inline]
2702#[target_feature(enable = "avx512f")]
2703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2704#[cfg_attr(test, assert_instr(vpmaxud))]
2705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2706pub const fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2707    unsafe {
2708        let max = _mm512_max_epu32(a, b).as_u32x16();
2709        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2710    }
2711}
2712
2713/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2714///
2715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2716#[inline]
2717#[target_feature(enable = "avx512f,avx512vl")]
2718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2719#[cfg_attr(test, assert_instr(vpmaxud))]
2720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2721pub const fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2722    unsafe {
2723        let max = _mm256_max_epu32(a, b).as_u32x8();
2724        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2725    }
2726}
2727
2728/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2731#[inline]
2732#[target_feature(enable = "avx512f,avx512vl")]
2733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734#[cfg_attr(test, assert_instr(vpmaxud))]
2735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2736pub const fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2737    unsafe {
2738        let max = _mm256_max_epu32(a, b).as_u32x8();
2739        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2740    }
2741}
2742
2743/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2744///
2745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2746#[inline]
2747#[target_feature(enable = "avx512f,avx512vl")]
2748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2749#[cfg_attr(test, assert_instr(vpmaxud))]
2750#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2751pub const fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2752    unsafe {
2753        let max = _mm_max_epu32(a, b).as_u32x4();
2754        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2755    }
2756}
2757
2758/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2759///
2760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2761#[inline]
2762#[target_feature(enable = "avx512f,avx512vl")]
2763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2764#[cfg_attr(test, assert_instr(vpmaxud))]
2765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2766pub const fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2767    unsafe {
2768        let max = _mm_max_epu32(a, b).as_u32x4();
2769        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2770    }
2771}
2772
2773/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2774///
2775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2776#[inline]
2777#[target_feature(enable = "avx512f")]
2778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2779#[cfg_attr(test, assert_instr(vpmaxuq))]
2780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2781pub const fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2782    unsafe { simd_imax(a.as_u64x8(), b.as_u64x8()).as_m512i() }
2783}
2784
2785/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2786///
2787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2788#[inline]
2789#[target_feature(enable = "avx512f")]
2790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2791#[cfg_attr(test, assert_instr(vpmaxuq))]
2792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2793pub const fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2794    unsafe {
2795        let max = _mm512_max_epu64(a, b).as_u64x8();
2796        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2797    }
2798}
2799
2800/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2801///
2802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2803#[inline]
2804#[target_feature(enable = "avx512f")]
2805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2806#[cfg_attr(test, assert_instr(vpmaxuq))]
2807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2808pub const fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2809    unsafe {
2810        let max = _mm512_max_epu64(a, b).as_u64x8();
2811        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2812    }
2813}
2814
2815/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2816///
2817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2818#[inline]
2819#[target_feature(enable = "avx512f,avx512vl")]
2820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2821#[cfg_attr(test, assert_instr(vpmaxuq))]
2822#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2823pub const fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2824    unsafe { simd_imax(a.as_u64x4(), b.as_u64x4()).as_m256i() }
2825}
2826
2827/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2828///
2829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2830#[inline]
2831#[target_feature(enable = "avx512f,avx512vl")]
2832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833#[cfg_attr(test, assert_instr(vpmaxuq))]
2834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2835pub const fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2836    unsafe {
2837        let max = _mm256_max_epu64(a, b).as_u64x4();
2838        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2839    }
2840}
2841
2842/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2843///
2844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2845#[inline]
2846#[target_feature(enable = "avx512f,avx512vl")]
2847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2848#[cfg_attr(test, assert_instr(vpmaxuq))]
2849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2850pub const fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2851    unsafe {
2852        let max = _mm256_max_epu64(a, b).as_u64x4();
2853        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2854    }
2855}
2856
2857/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2858///
2859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2860#[inline]
2861#[target_feature(enable = "avx512f,avx512vl")]
2862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2863#[cfg_attr(test, assert_instr(vpmaxuq))]
2864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2865pub const fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2866    unsafe { simd_imax(a.as_u64x2(), b.as_u64x2()).as_m128i() }
2867}
2868
2869/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2870///
2871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2872#[inline]
2873#[target_feature(enable = "avx512f,avx512vl")]
2874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2875#[cfg_attr(test, assert_instr(vpmaxuq))]
2876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2877pub const fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2878    unsafe {
2879        let max = _mm_max_epu64(a, b).as_u64x2();
2880        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2881    }
2882}
2883
2884/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2885///
2886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2887#[inline]
2888#[target_feature(enable = "avx512f,avx512vl")]
2889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2890#[cfg_attr(test, assert_instr(vpmaxuq))]
2891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2892pub const fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2893    unsafe {
2894        let max = _mm_max_epu64(a, b).as_u64x2();
2895        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2896    }
2897}
2898
2899/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2900///
2901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2902#[inline]
2903#[target_feature(enable = "avx512f")]
2904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2905#[cfg_attr(test, assert_instr(vpminsd))]
2906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2907pub const fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2908    unsafe { simd_imin(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2909}
2910
2911/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2912///
2913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2914#[inline]
2915#[target_feature(enable = "avx512f")]
2916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2917#[cfg_attr(test, assert_instr(vpminsd))]
2918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2919pub const fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2920    unsafe {
2921        let min = _mm512_min_epi32(a, b).as_i32x16();
2922        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2923    }
2924}
2925
2926/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2927///
2928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2929#[inline]
2930#[target_feature(enable = "avx512f")]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932#[cfg_attr(test, assert_instr(vpminsd))]
2933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2934pub const fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2935    unsafe {
2936        let min = _mm512_min_epi32(a, b).as_i32x16();
2937        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2938    }
2939}
2940
2941/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2942///
2943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2944#[inline]
2945#[target_feature(enable = "avx512f,avx512vl")]
2946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2947#[cfg_attr(test, assert_instr(vpminsd))]
2948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2949pub const fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2950    unsafe {
2951        let min = _mm256_min_epi32(a, b).as_i32x8();
2952        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2953    }
2954}
2955
2956/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2957///
2958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2959#[inline]
2960#[target_feature(enable = "avx512f,avx512vl")]
2961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2962#[cfg_attr(test, assert_instr(vpminsd))]
2963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2964pub const fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2965    unsafe {
2966        let min = _mm256_min_epi32(a, b).as_i32x8();
2967        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2968    }
2969}
2970
2971/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2972///
2973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2974#[inline]
2975#[target_feature(enable = "avx512f,avx512vl")]
2976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2977#[cfg_attr(test, assert_instr(vpminsd))]
2978#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2979pub const fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2980    unsafe {
2981        let min = _mm_min_epi32(a, b).as_i32x4();
2982        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2983    }
2984}
2985
2986/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2987///
2988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2989#[inline]
2990#[target_feature(enable = "avx512f,avx512vl")]
2991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2992#[cfg_attr(test, assert_instr(vpminsd))]
2993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2994pub const fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2995    unsafe {
2996        let min = _mm_min_epi32(a, b).as_i32x4();
2997        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2998    }
2999}
3000
3001/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3002///
3003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
3004#[inline]
3005#[target_feature(enable = "avx512f")]
3006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3007#[cfg_attr(test, assert_instr(vpminsq))]
3008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3009pub const fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
3010    unsafe { simd_imin(a.as_i64x8(), b.as_i64x8()).as_m512i() }
3011}
3012
3013/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3014///
3015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
3016#[inline]
3017#[target_feature(enable = "avx512f")]
3018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3019#[cfg_attr(test, assert_instr(vpminsq))]
3020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3021pub const fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3022    unsafe {
3023        let min = _mm512_min_epi64(a, b).as_i64x8();
3024        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
3025    }
3026}
3027
3028/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3029///
3030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
3031#[inline]
3032#[target_feature(enable = "avx512f")]
3033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3034#[cfg_attr(test, assert_instr(vpminsq))]
3035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3036pub const fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3037    unsafe {
3038        let min = _mm512_min_epi64(a, b).as_i64x8();
3039        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
3040    }
3041}
3042
3043/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3044///
3045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
3046#[inline]
3047#[target_feature(enable = "avx512f,avx512vl")]
3048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3049#[cfg_attr(test, assert_instr(vpminsq))]
3050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3051pub const fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
3052    unsafe { simd_imin(a.as_i64x4(), b.as_i64x4()).as_m256i() }
3053}
3054
3055/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3056///
3057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
3058#[inline]
3059#[target_feature(enable = "avx512f,avx512vl")]
3060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3061#[cfg_attr(test, assert_instr(vpminsq))]
3062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3063pub const fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3064    unsafe {
3065        let min = _mm256_min_epi64(a, b).as_i64x4();
3066        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
3067    }
3068}
3069
3070/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3071///
3072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
3073#[inline]
3074#[target_feature(enable = "avx512f,avx512vl")]
3075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3076#[cfg_attr(test, assert_instr(vpminsq))]
3077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3078pub const fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3079    unsafe {
3080        let min = _mm256_min_epi64(a, b).as_i64x4();
3081        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
3082    }
3083}
3084
3085/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3086///
3087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
3088#[inline]
3089#[target_feature(enable = "avx512f,avx512vl")]
3090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3091#[cfg_attr(test, assert_instr(vpminsq))]
3092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3093pub const fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
3094    unsafe { simd_imin(a.as_i64x2(), b.as_i64x2()).as_m128i() }
3095}
3096
3097/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3098///
3099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
3100#[inline]
3101#[target_feature(enable = "avx512f,avx512vl")]
3102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3103#[cfg_attr(test, assert_instr(vpminsq))]
3104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3105pub const fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3106    unsafe {
3107        let min = _mm_min_epi64(a, b).as_i64x2();
3108        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
3109    }
3110}
3111
3112/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3113///
3114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
3115#[inline]
3116#[target_feature(enable = "avx512f,avx512vl")]
3117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3118#[cfg_attr(test, assert_instr(vpminsq))]
3119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3120pub const fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3121    unsafe {
3122        let min = _mm_min_epi64(a, b).as_i64x2();
3123        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
3124    }
3125}
3126
3127/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
3128///
3129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
3130#[inline]
3131#[target_feature(enable = "avx512f")]
3132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3133#[cfg_attr(test, assert_instr(vminps))]
3134pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
3135    unsafe {
3136        transmute(vminps(
3137            a.as_f32x16(),
3138            b.as_f32x16(),
3139            _MM_FROUND_CUR_DIRECTION,
3140        ))
3141    }
3142}
3143
3144/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3145///
3146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
3147#[inline]
3148#[target_feature(enable = "avx512f")]
3149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3150#[cfg_attr(test, assert_instr(vminps))]
3151pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
3152    unsafe {
3153        let min = _mm512_min_ps(a, b).as_f32x16();
3154        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
3155    }
3156}
3157
3158/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3159///
3160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3161#[inline]
3162#[target_feature(enable = "avx512f")]
3163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3164#[cfg_attr(test, assert_instr(vminps))]
3165pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3166    unsafe {
3167        let min = _mm512_min_ps(a, b).as_f32x16();
3168        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3169    }
3170}
3171
3172/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3173///
3174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3175#[inline]
3176#[target_feature(enable = "avx512f,avx512vl")]
3177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3178#[cfg_attr(test, assert_instr(vminps))]
3179pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3180    unsafe {
3181        let min = _mm256_min_ps(a, b).as_f32x8();
3182        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3183    }
3184}
3185
3186/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3187///
3188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3189#[inline]
3190#[target_feature(enable = "avx512f,avx512vl")]
3191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3192#[cfg_attr(test, assert_instr(vminps))]
3193pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3194    unsafe {
3195        let min = _mm256_min_ps(a, b).as_f32x8();
3196        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3197    }
3198}
3199
3200/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3201///
3202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3203#[inline]
3204#[target_feature(enable = "avx512f,avx512vl")]
3205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3206#[cfg_attr(test, assert_instr(vminps))]
3207pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3208    unsafe {
3209        let min = _mm_min_ps(a, b).as_f32x4();
3210        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3211    }
3212}
3213
3214/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3215///
3216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3217#[inline]
3218#[target_feature(enable = "avx512f,avx512vl")]
3219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3220#[cfg_attr(test, assert_instr(vminps))]
3221pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3222    unsafe {
3223        let min = _mm_min_ps(a, b).as_f32x4();
3224        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3225    }
3226}
3227
3228/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3229///
3230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3231#[inline]
3232#[target_feature(enable = "avx512f")]
3233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3234#[cfg_attr(test, assert_instr(vminpd))]
3235pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3236    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3237}
3238
3239/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3240///
3241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3242#[inline]
3243#[target_feature(enable = "avx512f")]
3244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3245#[cfg_attr(test, assert_instr(vminpd))]
3246pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3247    unsafe {
3248        let min = _mm512_min_pd(a, b).as_f64x8();
3249        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3250    }
3251}
3252
3253/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3254///
3255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3256#[inline]
3257#[target_feature(enable = "avx512f")]
3258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3259#[cfg_attr(test, assert_instr(vminpd))]
3260pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3261    unsafe {
3262        let min = _mm512_min_pd(a, b).as_f64x8();
3263        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3264    }
3265}
3266
3267/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3270#[inline]
3271#[target_feature(enable = "avx512f,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vminpd))]
3274pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3275    unsafe {
3276        let min = _mm256_min_pd(a, b).as_f64x4();
3277        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3278    }
3279}
3280
3281/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3282///
3283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3284#[inline]
3285#[target_feature(enable = "avx512f,avx512vl")]
3286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3287#[cfg_attr(test, assert_instr(vminpd))]
3288pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3289    unsafe {
3290        let min = _mm256_min_pd(a, b).as_f64x4();
3291        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3292    }
3293}
3294
3295/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3296///
3297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3298#[inline]
3299#[target_feature(enable = "avx512f,avx512vl")]
3300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3301#[cfg_attr(test, assert_instr(vminpd))]
3302pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3303    unsafe {
3304        let min = _mm_min_pd(a, b).as_f64x2();
3305        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3306    }
3307}
3308
3309/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3310///
3311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3312#[inline]
3313#[target_feature(enable = "avx512f,avx512vl")]
3314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3315#[cfg_attr(test, assert_instr(vminpd))]
3316pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3317    unsafe {
3318        let min = _mm_min_pd(a, b).as_f64x2();
3319        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3320    }
3321}
3322
3323/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3324///
3325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3326#[inline]
3327#[target_feature(enable = "avx512f")]
3328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3329#[cfg_attr(test, assert_instr(vpminud))]
3330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3331pub const fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3332    unsafe { simd_imin(a.as_u32x16(), b.as_u32x16()).as_m512i() }
3333}
3334
3335/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3336///
3337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3338#[inline]
3339#[target_feature(enable = "avx512f")]
3340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3341#[cfg_attr(test, assert_instr(vpminud))]
3342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3343pub const fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3344    unsafe {
3345        let min = _mm512_min_epu32(a, b).as_u32x16();
3346        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3347    }
3348}
3349
3350/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3351///
3352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3353#[inline]
3354#[target_feature(enable = "avx512f")]
3355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3356#[cfg_attr(test, assert_instr(vpminud))]
3357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3358pub const fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3359    unsafe {
3360        let min = _mm512_min_epu32(a, b).as_u32x16();
3361        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3362    }
3363}
3364
3365/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3366///
3367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3368#[inline]
3369#[target_feature(enable = "avx512f,avx512vl")]
3370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3371#[cfg_attr(test, assert_instr(vpminud))]
3372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3373pub const fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3374    unsafe {
3375        let min = _mm256_min_epu32(a, b).as_u32x8();
3376        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3377    }
3378}
3379
3380/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3381///
3382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3383#[inline]
3384#[target_feature(enable = "avx512f,avx512vl")]
3385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3386#[cfg_attr(test, assert_instr(vpminud))]
3387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3388pub const fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3389    unsafe {
3390        let min = _mm256_min_epu32(a, b).as_u32x8();
3391        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3392    }
3393}
3394
3395/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3396///
3397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3398#[inline]
3399#[target_feature(enable = "avx512f,avx512vl")]
3400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3401#[cfg_attr(test, assert_instr(vpminud))]
3402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3403pub const fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3404    unsafe {
3405        let min = _mm_min_epu32(a, b).as_u32x4();
3406        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3407    }
3408}
3409
3410/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3413#[inline]
3414#[target_feature(enable = "avx512f,avx512vl")]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416#[cfg_attr(test, assert_instr(vpminud))]
3417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3418pub const fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3419    unsafe {
3420        let min = _mm_min_epu32(a, b).as_u32x4();
3421        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3422    }
3423}
3424
3425/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3426///
3427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3428#[inline]
3429#[target_feature(enable = "avx512f")]
3430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3431#[cfg_attr(test, assert_instr(vpminuq))]
3432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3433pub const fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3434    unsafe { simd_imin(a.as_u64x8(), b.as_u64x8()).as_m512i() }
3435}
3436
3437/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3438///
3439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3440#[inline]
3441#[target_feature(enable = "avx512f")]
3442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3443#[cfg_attr(test, assert_instr(vpminuq))]
3444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3445pub const fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3446    unsafe {
3447        let min = _mm512_min_epu64(a, b).as_u64x8();
3448        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3449    }
3450}
3451
3452/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3453///
3454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3455#[inline]
3456#[target_feature(enable = "avx512f")]
3457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3458#[cfg_attr(test, assert_instr(vpminuq))]
3459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3460pub const fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3461    unsafe {
3462        let min = _mm512_min_epu64(a, b).as_u64x8();
3463        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3464    }
3465}
3466
3467/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3470#[inline]
3471#[target_feature(enable = "avx512f,avx512vl")]
3472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3473#[cfg_attr(test, assert_instr(vpminuq))]
3474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3475pub const fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3476    unsafe { simd_imin(a.as_u64x4(), b.as_u64x4()).as_m256i() }
3477}
3478
3479/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3480///
3481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3482#[inline]
3483#[target_feature(enable = "avx512f,avx512vl")]
3484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3485#[cfg_attr(test, assert_instr(vpminuq))]
3486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3487pub const fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3488    unsafe {
3489        let min = _mm256_min_epu64(a, b).as_u64x4();
3490        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3491    }
3492}
3493
3494/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3495///
3496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3497#[inline]
3498#[target_feature(enable = "avx512f,avx512vl")]
3499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3500#[cfg_attr(test, assert_instr(vpminuq))]
3501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3502pub const fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3503    unsafe {
3504        let min = _mm256_min_epu64(a, b).as_u64x4();
3505        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3506    }
3507}
3508
3509/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3512#[inline]
3513#[target_feature(enable = "avx512f,avx512vl")]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515#[cfg_attr(test, assert_instr(vpminuq))]
3516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3517pub const fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3518    unsafe { simd_imin(a.as_u64x2(), b.as_u64x2()).as_m128i() }
3519}
3520
3521/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3522///
3523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3524#[inline]
3525#[target_feature(enable = "avx512f,avx512vl")]
3526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3527#[cfg_attr(test, assert_instr(vpminuq))]
3528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3529pub const fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3530    unsafe {
3531        let min = _mm_min_epu64(a, b).as_u64x2();
3532        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3533    }
3534}
3535
3536/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3537///
3538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3539#[inline]
3540#[target_feature(enable = "avx512f,avx512vl")]
3541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3542#[cfg_attr(test, assert_instr(vpminuq))]
3543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3544pub const fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3545    unsafe {
3546        let min = _mm_min_epu64(a, b).as_u64x2();
3547        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3548    }
3549}
3550
3551/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3552///
3553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3554#[inline]
3555#[target_feature(enable = "avx512f")]
3556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3557#[cfg_attr(test, assert_instr(vsqrtps))]
3558pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3559    unsafe { simd_fsqrt(a) }
3560}
3561
3562/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3563///
3564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3565#[inline]
3566#[target_feature(enable = "avx512f")]
3567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3568#[cfg_attr(test, assert_instr(vsqrtps))]
3569pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3570    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3571}
3572
3573/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3574///
3575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3576#[inline]
3577#[target_feature(enable = "avx512f")]
3578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3579#[cfg_attr(test, assert_instr(vsqrtps))]
3580pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3581    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3582}
3583
3584/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3585///
3586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3587#[inline]
3588#[target_feature(enable = "avx512f,avx512vl")]
3589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3590#[cfg_attr(test, assert_instr(vsqrtps))]
3591pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3592    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3593}
3594
3595/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3596///
3597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3598#[inline]
3599#[target_feature(enable = "avx512f,avx512vl")]
3600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3601#[cfg_attr(test, assert_instr(vsqrtps))]
3602pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3603    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3604}
3605
3606/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3607///
3608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3609#[inline]
3610#[target_feature(enable = "avx512f,avx512vl")]
3611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3612#[cfg_attr(test, assert_instr(vsqrtps))]
3613pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3614    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3615}
3616
3617/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3618///
3619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3620#[inline]
3621#[target_feature(enable = "avx512f,avx512vl")]
3622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3623#[cfg_attr(test, assert_instr(vsqrtps))]
3624pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3625    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3626}
3627
3628/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3629///
3630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3631#[inline]
3632#[target_feature(enable = "avx512f")]
3633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3634#[cfg_attr(test, assert_instr(vsqrtpd))]
3635pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3636    unsafe { simd_fsqrt(a) }
3637}
3638
3639/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3642#[inline]
3643#[target_feature(enable = "avx512f")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vsqrtpd))]
3646pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3647    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3648}
3649
3650/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3651///
3652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3653#[inline]
3654#[target_feature(enable = "avx512f")]
3655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3656#[cfg_attr(test, assert_instr(vsqrtpd))]
3657pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3658    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3659}
3660
3661/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3662///
3663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3664#[inline]
3665#[target_feature(enable = "avx512f,avx512vl")]
3666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3667#[cfg_attr(test, assert_instr(vsqrtpd))]
3668pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3669    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3670}
3671
3672/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3673///
3674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3675#[inline]
3676#[target_feature(enable = "avx512f,avx512vl")]
3677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3678#[cfg_attr(test, assert_instr(vsqrtpd))]
3679pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3680    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3681}
3682
3683/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3684///
3685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3686#[inline]
3687#[target_feature(enable = "avx512f,avx512vl")]
3688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3689#[cfg_attr(test, assert_instr(vsqrtpd))]
3690pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3691    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3692}
3693
3694/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3695///
3696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3697#[inline]
3698#[target_feature(enable = "avx512f,avx512vl")]
3699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3700#[cfg_attr(test, assert_instr(vsqrtpd))]
3701pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3702    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3703}
3704
3705/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3706///
3707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3708#[inline]
3709#[target_feature(enable = "avx512f")]
3710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3711#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3713pub const fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3714    unsafe { simd_fma(a, b, c) }
3715}
3716
3717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3718///
3719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3720#[inline]
3721#[target_feature(enable = "avx512f")]
3722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3723#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3725pub const fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3726    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3727}
3728
3729/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3730///
3731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3732#[inline]
3733#[target_feature(enable = "avx512f")]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3736#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3737pub const fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3738    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3739}
3740
3741/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3742///
3743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3744#[inline]
3745#[target_feature(enable = "avx512f")]
3746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3747#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3749pub const fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3750    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3751}
3752
3753/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3760#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3761pub const fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3762    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3763}
3764
3765/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3766///
3767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3768#[inline]
3769#[target_feature(enable = "avx512f,avx512vl")]
3770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3771#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3773pub const fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3774    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3775}
3776
3777/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3778///
3779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3780#[inline]
3781#[target_feature(enable = "avx512f,avx512vl")]
3782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3783#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3785pub const fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3786    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3787}
3788
3789/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3790///
3791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3792#[inline]
3793#[target_feature(enable = "avx512f,avx512vl")]
3794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3795#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3797pub const fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3798    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3799}
3800
3801/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3802///
3803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3804#[inline]
3805#[target_feature(enable = "avx512f,avx512vl")]
3806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3807#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3809pub const fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3810    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3811}
3812
3813/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3814///
3815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3816#[inline]
3817#[target_feature(enable = "avx512f,avx512vl")]
3818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3819#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3821pub const fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3822    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3823}
3824
3825/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3826///
3827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3828#[inline]
3829#[target_feature(enable = "avx512f")]
3830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3831#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3833pub const fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3834    unsafe { simd_fma(a, b, c) }
3835}
3836
3837/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3838///
3839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3840#[inline]
3841#[target_feature(enable = "avx512f")]
3842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3843#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3845pub const fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3846    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3847}
3848
3849/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3850///
3851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3852#[inline]
3853#[target_feature(enable = "avx512f")]
3854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3855#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3856#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3857pub const fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3858    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3859}
3860
3861/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3862///
3863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3864#[inline]
3865#[target_feature(enable = "avx512f")]
3866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3869pub const fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3870    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3871}
3872
3873/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3874///
3875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3876#[inline]
3877#[target_feature(enable = "avx512f,avx512vl")]
3878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3879#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3881pub const fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3882    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3888#[inline]
3889#[target_feature(enable = "avx512f,avx512vl")]
3890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3891#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3893pub const fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3894    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3895}
3896
3897/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3898///
3899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3900#[inline]
3901#[target_feature(enable = "avx512f,avx512vl")]
3902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3903#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3905pub const fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3906    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3907}
3908
3909/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3910///
3911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3912#[inline]
3913#[target_feature(enable = "avx512f,avx512vl")]
3914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3915#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3917pub const fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3918    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3919}
3920
3921/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3922///
3923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3924#[inline]
3925#[target_feature(enable = "avx512f,avx512vl")]
3926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3927#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3929pub const fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3930    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3931}
3932
3933/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3934///
3935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3936#[inline]
3937#[target_feature(enable = "avx512f,avx512vl")]
3938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3939#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3941pub const fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3942    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3943}
3944
3945/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3946///
3947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3948#[inline]
3949#[target_feature(enable = "avx512f")]
3950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3951#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3953pub const fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3954    unsafe { simd_fma(a, b, simd_neg(c)) }
3955}
3956
3957/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3958///
3959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3960#[inline]
3961#[target_feature(enable = "avx512f")]
3962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3963#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3965pub const fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3966    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3967}
3968
3969/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3970///
3971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3972#[inline]
3973#[target_feature(enable = "avx512f")]
3974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3975#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3977pub const fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3978    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3979}
3980
3981/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3982///
3983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3984#[inline]
3985#[target_feature(enable = "avx512f")]
3986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3987#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3989pub const fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3990    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3991}
3992
3993/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3996#[inline]
3997#[target_feature(enable = "avx512f,avx512vl")]
3998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4001pub const fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4002    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
4003}
4004
4005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4006///
4007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
4008#[inline]
4009#[target_feature(enable = "avx512f,avx512vl")]
4010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4011#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4013pub const fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4014    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
4015}
4016
4017/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4018///
4019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
4020#[inline]
4021#[target_feature(enable = "avx512f,avx512vl")]
4022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4023#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4025pub const fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4026    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
4027}
4028
4029/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4030///
4031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
4032#[inline]
4033#[target_feature(enable = "avx512f,avx512vl")]
4034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4035#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4037pub const fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4038    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
4039}
4040
4041/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4042///
4043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
4044#[inline]
4045#[target_feature(enable = "avx512f,avx512vl")]
4046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4047#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4049pub const fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4050    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
4051}
4052
4053/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4054///
4055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
4056#[inline]
4057#[target_feature(enable = "avx512f,avx512vl")]
4058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4059#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4061pub const fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4062    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
4063}
4064
4065/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
4066///
4067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
4068#[inline]
4069#[target_feature(enable = "avx512f")]
4070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4071#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4073pub const fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4074    unsafe { simd_fma(a, b, simd_neg(c)) }
4075}
4076
4077/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4078///
4079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
4080#[inline]
4081#[target_feature(enable = "avx512f")]
4082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4083#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4085pub const fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4086    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
4087}
4088
4089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4090///
4091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
4092#[inline]
4093#[target_feature(enable = "avx512f")]
4094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4095#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4097pub const fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4098    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
4099}
4100
4101/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4102///
4103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
4104#[inline]
4105#[target_feature(enable = "avx512f")]
4106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4107#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4109pub const fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4110    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
4111}
4112
4113/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4114///
4115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
4116#[inline]
4117#[target_feature(enable = "avx512f,avx512vl")]
4118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4119#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4121pub const fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4122    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
4123}
4124
4125/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4126///
4127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
4128#[inline]
4129#[target_feature(enable = "avx512f,avx512vl")]
4130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4131#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4133pub const fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4134    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
4135}
4136
4137/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4138///
4139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
4140#[inline]
4141#[target_feature(enable = "avx512f,avx512vl")]
4142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4143#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4145pub const fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4146    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
4147}
4148
4149/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4150///
4151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
4152#[inline]
4153#[target_feature(enable = "avx512f,avx512vl")]
4154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4155#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4157pub const fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4158    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4167#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4169pub const fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4170    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
4171}
4172
4173/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4174///
4175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
4176#[inline]
4177#[target_feature(enable = "avx512f,avx512vl")]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4181pub const fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4182    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
4183}
4184
4185/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4186///
4187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
4188#[inline]
4189#[target_feature(enable = "avx512f")]
4190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4191#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4193pub const fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4194    unsafe {
4195        let add = simd_fma(a, b, c);
4196        let sub = simd_fma(a, b, simd_neg(c));
4197        simd_shuffle!(
4198            add,
4199            sub,
4200            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
4201        )
4202    }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4208#[inline]
4209#[target_feature(enable = "avx512f")]
4210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4213pub const fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4214    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4215}
4216
4217/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4218///
4219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4220#[inline]
4221#[target_feature(enable = "avx512f")]
4222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4223#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4225pub const fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4226    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4227}
4228
4229/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4230///
4231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4232#[inline]
4233#[target_feature(enable = "avx512f")]
4234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4235#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4237pub const fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4238    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4239}
4240
4241/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4242///
4243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4244#[inline]
4245#[target_feature(enable = "avx512f,avx512vl")]
4246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4247#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4249pub const fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4250    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4251}
4252
4253/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4254///
4255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4256#[inline]
4257#[target_feature(enable = "avx512f,avx512vl")]
4258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4259#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4261pub const fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4262    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4263}
4264
4265/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4266///
4267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4268#[inline]
4269#[target_feature(enable = "avx512f,avx512vl")]
4270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4271#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4273pub const fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4274    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4275}
4276
4277/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4278///
4279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4280#[inline]
4281#[target_feature(enable = "avx512f,avx512vl")]
4282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4283#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4285pub const fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4286    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4287}
4288
4289/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4290///
4291/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4292#[inline]
4293#[target_feature(enable = "avx512f,avx512vl")]
4294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4295#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4297pub const fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4298    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4307#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4308#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4309pub const fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4310    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4311}
4312
4313/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4314///
4315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4316#[inline]
4317#[target_feature(enable = "avx512f")]
4318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4319#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4320#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4321pub const fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4322    unsafe {
4323        let add = simd_fma(a, b, c);
4324        let sub = simd_fma(a, b, simd_neg(c));
4325        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4326    }
4327}
4328
4329/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4330///
4331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4332#[inline]
4333#[target_feature(enable = "avx512f")]
4334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4335#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4337pub const fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4338    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4339}
4340
4341/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4342///
4343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4344#[inline]
4345#[target_feature(enable = "avx512f")]
4346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4347#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4349pub const fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4350    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4351}
4352
4353/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4354///
4355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4356#[inline]
4357#[target_feature(enable = "avx512f")]
4358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4359#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4361pub const fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4362    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4363}
4364
4365/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4366///
4367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4368#[inline]
4369#[target_feature(enable = "avx512f,avx512vl")]
4370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4371#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4373pub const fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4374    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4375}
4376
4377/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4378///
4379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4380#[inline]
4381#[target_feature(enable = "avx512f,avx512vl")]
4382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4383#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4385pub const fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4386    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4387}
4388
4389/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4390///
4391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4392#[inline]
4393#[target_feature(enable = "avx512f,avx512vl")]
4394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4395#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4397pub const fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4398    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4399}
4400
4401/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4402///
4403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4404#[inline]
4405#[target_feature(enable = "avx512f,avx512vl")]
4406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4407#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4409pub const fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4410    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4411}
4412
4413/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4414///
4415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4416#[inline]
4417#[target_feature(enable = "avx512f,avx512vl")]
4418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4419#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4421pub const fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4422    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4423}
4424
4425/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4426///
4427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4428#[inline]
4429#[target_feature(enable = "avx512f,avx512vl")]
4430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4431#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4433pub const fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4435}
4436
4437/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4440#[inline]
4441#[target_feature(enable = "avx512f")]
4442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4445pub const fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4446    unsafe {
4447        let add = simd_fma(a, b, c);
4448        let sub = simd_fma(a, b, simd_neg(c));
4449        simd_shuffle!(
4450            add,
4451            sub,
4452            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4453        )
4454    }
4455}
4456
4457/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4458///
4459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4460#[inline]
4461#[target_feature(enable = "avx512f")]
4462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4463#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4464#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4465pub const fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4466    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4467}
4468
4469/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4470///
4471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4472#[inline]
4473#[target_feature(enable = "avx512f")]
4474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4475#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4477pub const fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4487#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4489pub const fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4490    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4491}
4492
4493/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4494///
4495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4496#[inline]
4497#[target_feature(enable = "avx512f,avx512vl")]
4498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4499#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4502    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4503}
4504
4505/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4506///
4507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4508#[inline]
4509#[target_feature(enable = "avx512f,avx512vl")]
4510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4511#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4513pub const fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4514    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4515}
4516
4517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4518///
4519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4520#[inline]
4521#[target_feature(enable = "avx512f,avx512vl")]
4522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4523#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4525pub const fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4526    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4527}
4528
4529/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4530///
4531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4532#[inline]
4533#[target_feature(enable = "avx512f,avx512vl")]
4534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4535#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4537pub const fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4538    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4539}
4540
4541/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4542///
4543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4544#[inline]
4545#[target_feature(enable = "avx512f,avx512vl")]
4546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4547#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4549pub const fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4550    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4551}
4552
4553/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4556#[inline]
4557#[target_feature(enable = "avx512f,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4561pub const fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4562    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4563}
4564
4565/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4566///
4567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4568#[inline]
4569#[target_feature(enable = "avx512f")]
4570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4571#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4573pub const fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4574    unsafe {
4575        let add = simd_fma(a, b, c);
4576        let sub = simd_fma(a, b, simd_neg(c));
4577        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4578    }
4579}
4580
4581/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4584#[inline]
4585#[target_feature(enable = "avx512f")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4589pub const fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4590    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4591}
4592
4593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4594///
4595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4596#[inline]
4597#[target_feature(enable = "avx512f")]
4598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4599#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4601pub const fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4602    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4603}
4604
4605/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4606///
4607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4608#[inline]
4609#[target_feature(enable = "avx512f")]
4610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4612#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4613pub const fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4614    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4615}
4616
4617/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4618///
4619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4620#[inline]
4621#[target_feature(enable = "avx512f,avx512vl")]
4622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4623#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4625pub const fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4626    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4627}
4628
4629/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4630///
4631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4632#[inline]
4633#[target_feature(enable = "avx512f,avx512vl")]
4634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4635#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4637pub const fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4638    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4639}
4640
4641/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4642///
4643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4644#[inline]
4645#[target_feature(enable = "avx512f,avx512vl")]
4646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4647#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4649pub const fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4650    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4651}
4652
4653/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4654///
4655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4656#[inline]
4657#[target_feature(enable = "avx512f,avx512vl")]
4658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4659#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4660#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4661pub const fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4662    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4663}
4664
4665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4666///
4667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4668#[inline]
4669#[target_feature(enable = "avx512f,avx512vl")]
4670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4671#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4673pub const fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4674    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4675}
4676
4677/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4678///
4679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4680#[inline]
4681#[target_feature(enable = "avx512f,avx512vl")]
4682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4683#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4685pub const fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4686    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4687}
4688
4689/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4690///
4691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4692#[inline]
4693#[target_feature(enable = "avx512f")]
4694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4695#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4697pub const fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_fma(simd_neg(a), b, c) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4709pub const fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4710    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4711}
4712
4713/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4714///
4715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4716#[inline]
4717#[target_feature(enable = "avx512f")]
4718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4719#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4721pub const fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4722    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4723}
4724
4725/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4726///
4727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4728#[inline]
4729#[target_feature(enable = "avx512f")]
4730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4731#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4733pub const fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4734    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4735}
4736
4737/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4738///
4739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4740#[inline]
4741#[target_feature(enable = "avx512f,avx512vl")]
4742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4743#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4746    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4747}
4748
4749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4750///
4751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4752#[inline]
4753#[target_feature(enable = "avx512f,avx512vl")]
4754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4755#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4757pub const fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4758    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4759}
4760
4761/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4762///
4763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4764#[inline]
4765#[target_feature(enable = "avx512f,avx512vl")]
4766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4767#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4769pub const fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4770    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4771}
4772
4773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4774///
4775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4776#[inline]
4777#[target_feature(enable = "avx512f,avx512vl")]
4778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4779#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4781pub const fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4782    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4783}
4784
4785/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4786///
4787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4788#[inline]
4789#[target_feature(enable = "avx512f,avx512vl")]
4790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4791#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4793pub const fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4794    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4795}
4796
4797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4800#[inline]
4801#[target_feature(enable = "avx512f,avx512vl")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4805pub const fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4806    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4807}
4808
4809/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4810///
4811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4812#[inline]
4813#[target_feature(enable = "avx512f")]
4814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4815#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4817pub const fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4818    unsafe { simd_fma(simd_neg(a), b, c) }
4819}
4820
4821/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4822///
4823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4824#[inline]
4825#[target_feature(enable = "avx512f")]
4826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4827#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4829pub const fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4830    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4836#[inline]
4837#[target_feature(enable = "avx512f")]
4838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4839#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4841pub const fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4842    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4843}
4844
4845/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4846///
4847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4848#[inline]
4849#[target_feature(enable = "avx512f")]
4850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4851#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4853pub const fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4854    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4855}
4856
4857/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4858///
4859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4860#[inline]
4861#[target_feature(enable = "avx512f,avx512vl")]
4862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4863#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4865pub const fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4866    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4867}
4868
4869/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4870///
4871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4872#[inline]
4873#[target_feature(enable = "avx512f,avx512vl")]
4874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4875#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4877pub const fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4878    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4879}
4880
4881/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4882///
4883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4884#[inline]
4885#[target_feature(enable = "avx512f,avx512vl")]
4886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4887#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4889pub const fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4890    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4891}
4892
4893/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4894///
4895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4896#[inline]
4897#[target_feature(enable = "avx512f,avx512vl")]
4898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4899#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4901pub const fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4902    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4903}
4904
4905/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4906///
4907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4908#[inline]
4909#[target_feature(enable = "avx512f,avx512vl")]
4910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4911#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4913pub const fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4914    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4915}
4916
4917/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4918///
4919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4920#[inline]
4921#[target_feature(enable = "avx512f,avx512vl")]
4922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4923#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4925pub const fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4926    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4927}
4928
4929/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4930///
4931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4932#[inline]
4933#[target_feature(enable = "avx512f")]
4934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4935#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4937pub const fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4938    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4939}
4940
4941/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4942///
4943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4944#[inline]
4945#[target_feature(enable = "avx512f")]
4946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4947#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4949pub const fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4950    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4951}
4952
4953/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4954///
4955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4956#[inline]
4957#[target_feature(enable = "avx512f")]
4958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4959#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4961pub const fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4962    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4963}
4964
4965/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4968#[inline]
4969#[target_feature(enable = "avx512f")]
4970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4971#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4973pub const fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4974    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4975}
4976
4977/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4978///
4979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4980#[inline]
4981#[target_feature(enable = "avx512f,avx512vl")]
4982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4983#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4985pub const fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4986    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4987}
4988
4989/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4990///
4991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4992#[inline]
4993#[target_feature(enable = "avx512f,avx512vl")]
4994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4995#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4997pub const fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4998    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4999}
5000
5001/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5002///
5003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
5004#[inline]
5005#[target_feature(enable = "avx512f,avx512vl")]
5006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5007#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5009pub const fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
5010    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
5011}
5012
5013/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5014///
5015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
5016#[inline]
5017#[target_feature(enable = "avx512f,avx512vl")]
5018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5019#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5021pub const fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
5022    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
5023}
5024
5025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5026///
5027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
5028#[inline]
5029#[target_feature(enable = "avx512f,avx512vl")]
5030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5031#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5033pub const fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
5034    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
5035}
5036
5037/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5038///
5039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
5040#[inline]
5041#[target_feature(enable = "avx512f,avx512vl")]
5042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5043#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5045pub const fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
5046    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
5047}
5048
5049/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
5050///
5051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
5052#[inline]
5053#[target_feature(enable = "avx512f")]
5054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5055#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5057pub const fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5058    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
5059}
5060
5061/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5062///
5063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
5064#[inline]
5065#[target_feature(enable = "avx512f")]
5066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5067#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5069pub const fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
5070    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
5071}
5072
5073/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5074///
5075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
5076#[inline]
5077#[target_feature(enable = "avx512f")]
5078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5079#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5081pub const fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5082    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
5083}
5084
5085/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5086///
5087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
5088#[inline]
5089#[target_feature(enable = "avx512f")]
5090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5091#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5093pub const fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
5094    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
5095}
5096
5097/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
5100#[inline]
5101#[target_feature(enable = "avx512f,avx512vl")]
5102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5103#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5105pub const fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
5106    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
5107}
5108
5109/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5110///
5111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
5112#[inline]
5113#[target_feature(enable = "avx512f,avx512vl")]
5114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5115#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5117pub const fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
5118    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
5119}
5120
5121/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5122///
5123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
5124#[inline]
5125#[target_feature(enable = "avx512f,avx512vl")]
5126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5127#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5129pub const fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
5130    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
5131}
5132
5133/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5134///
5135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
5136#[inline]
5137#[target_feature(enable = "avx512f,avx512vl")]
5138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5139#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5141pub const fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
5142    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
5143}
5144
5145/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5146///
5147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
5148#[inline]
5149#[target_feature(enable = "avx512f,avx512vl")]
5150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5151#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5153pub const fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
5154    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
5155}
5156
5157/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5158///
5159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
5160#[inline]
5161#[target_feature(enable = "avx512f,avx512vl")]
5162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5163#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5165pub const fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
5166    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
5167}
5168
5169/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5170///
5171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
5172#[inline]
5173#[target_feature(enable = "avx512f")]
5174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5175#[cfg_attr(test, assert_instr(vrcp14ps))]
5176pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
5177    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5178}
5179
5180/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
5183#[inline]
5184#[target_feature(enable = "avx512f")]
5185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186#[cfg_attr(test, assert_instr(vrcp14ps))]
5187pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5188    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5189}
5190
5191/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5192///
5193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
5194#[inline]
5195#[target_feature(enable = "avx512f")]
5196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5197#[cfg_attr(test, assert_instr(vrcp14ps))]
5198pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
5199    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5200}
5201
5202/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5203///
5204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
5205#[inline]
5206#[target_feature(enable = "avx512f,avx512vl")]
5207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5208#[cfg_attr(test, assert_instr(vrcp14ps))]
5209pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
5210    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5211}
5212
5213/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5214///
5215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
5216#[inline]
5217#[target_feature(enable = "avx512f,avx512vl")]
5218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5219#[cfg_attr(test, assert_instr(vrcp14ps))]
5220pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5221    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5222}
5223
5224/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5225///
5226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
5227#[inline]
5228#[target_feature(enable = "avx512f,avx512vl")]
5229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5230#[cfg_attr(test, assert_instr(vrcp14ps))]
5231pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
5232    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5233}
5234
5235/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5236///
5237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
5238#[inline]
5239#[target_feature(enable = "avx512f,avx512vl")]
5240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5241#[cfg_attr(test, assert_instr(vrcp14ps))]
5242pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
5243    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5244}
5245
5246/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5247///
5248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
5249#[inline]
5250#[target_feature(enable = "avx512f,avx512vl")]
5251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5252#[cfg_attr(test, assert_instr(vrcp14ps))]
5253pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5254    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5255}
5256
5257/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5258///
5259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
5260#[inline]
5261#[target_feature(enable = "avx512f,avx512vl")]
5262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5263#[cfg_attr(test, assert_instr(vrcp14ps))]
5264pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
5265    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5266}
5267
5268/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5269///
5270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
5271#[inline]
5272#[target_feature(enable = "avx512f")]
5273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5274#[cfg_attr(test, assert_instr(vrcp14pd))]
5275pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
5276    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5277}
5278
5279/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5282#[inline]
5283#[target_feature(enable = "avx512f")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[cfg_attr(test, assert_instr(vrcp14pd))]
5286pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5287    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5288}
5289
5290/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5293#[inline]
5294#[target_feature(enable = "avx512f")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[cfg_attr(test, assert_instr(vrcp14pd))]
5297pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5298    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5299}
5300
5301/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5302///
5303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5304#[inline]
5305#[target_feature(enable = "avx512f,avx512vl")]
5306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5307#[cfg_attr(test, assert_instr(vrcp14pd))]
5308pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5309    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5310}
5311
5312/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5313///
5314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5315#[inline]
5316#[target_feature(enable = "avx512f,avx512vl")]
5317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5318#[cfg_attr(test, assert_instr(vrcp14pd))]
5319pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5320    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5321}
5322
5323/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5324///
5325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5326#[inline]
5327#[target_feature(enable = "avx512f,avx512vl")]
5328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5329#[cfg_attr(test, assert_instr(vrcp14pd))]
5330pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5331    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5332}
5333
5334/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5335///
5336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5337#[inline]
5338#[target_feature(enable = "avx512f,avx512vl")]
5339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5340#[cfg_attr(test, assert_instr(vrcp14pd))]
5341pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5342    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5343}
5344
5345/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5346///
5347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5348#[inline]
5349#[target_feature(enable = "avx512f,avx512vl")]
5350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5351#[cfg_attr(test, assert_instr(vrcp14pd))]
5352pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5353    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5354}
5355
5356/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5357///
5358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5359#[inline]
5360#[target_feature(enable = "avx512f,avx512vl")]
5361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5362#[cfg_attr(test, assert_instr(vrcp14pd))]
5363pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5364    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5365}
5366
5367/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5368///
5369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5370#[inline]
5371#[target_feature(enable = "avx512f")]
5372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5373#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5374pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5375    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5376}
5377
5378/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5379///
5380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5381#[inline]
5382#[target_feature(enable = "avx512f")]
5383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5384#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5385pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5386    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5387}
5388
5389/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5390///
5391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5392#[inline]
5393#[target_feature(enable = "avx512f")]
5394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5395#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5396pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5397    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5398}
5399
5400/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5401///
5402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5403#[inline]
5404#[target_feature(enable = "avx512f,avx512vl")]
5405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5407pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5408    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5409}
5410
5411/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5412///
5413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5414#[inline]
5415#[target_feature(enable = "avx512f,avx512vl")]
5416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5417#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5418pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5419    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5420}
5421
5422/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5425#[inline]
5426#[target_feature(enable = "avx512f,avx512vl")]
5427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5428#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5429pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5430    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5431}
5432
5433/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5434///
5435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5436#[inline]
5437#[target_feature(enable = "avx512f,avx512vl")]
5438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5439#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5440pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5441    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5442}
5443
5444/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5445///
5446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5447#[inline]
5448#[target_feature(enable = "avx512f,avx512vl")]
5449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5450#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5451pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5452    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5453}
5454
5455/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5456///
5457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5458#[inline]
5459#[target_feature(enable = "avx512f,avx512vl")]
5460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5461#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5462pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5463    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5464}
5465
5466/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5467///
5468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5469#[inline]
5470#[target_feature(enable = "avx512f")]
5471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5472#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5473pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5474    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5475}
5476
5477/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5478///
5479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5480#[inline]
5481#[target_feature(enable = "avx512f")]
5482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5483#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5484pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5485    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5486}
5487
5488/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5489///
5490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5491#[inline]
5492#[target_feature(enable = "avx512f")]
5493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5494#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5495pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5496    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5497}
5498
5499/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5500///
5501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5502#[inline]
5503#[target_feature(enable = "avx512f,avx512vl")]
5504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5505#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5506pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5507    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5508}
5509
5510/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5511///
5512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5513#[inline]
5514#[target_feature(enable = "avx512f,avx512vl")]
5515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5516#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5517pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5518    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5519}
5520
5521/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5522///
5523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5524#[inline]
5525#[target_feature(enable = "avx512f,avx512vl")]
5526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5527#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5528pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5529    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5530}
5531
5532/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5533///
5534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5535#[inline]
5536#[target_feature(enable = "avx512f,avx512vl")]
5537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5538#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5539pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5540    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5541}
5542
5543/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5544///
5545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5546#[inline]
5547#[target_feature(enable = "avx512f,avx512vl")]
5548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5549#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5550pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5551    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5552}
5553
5554/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5555///
5556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5557#[inline]
5558#[target_feature(enable = "avx512f,avx512vl")]
5559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5560#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5561pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5562    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5563}
5564
5565/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5566///
5567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5568#[inline]
5569#[target_feature(enable = "avx512f")]
5570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5571#[cfg_attr(test, assert_instr(vgetexpps))]
5572pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5573    unsafe {
5574        transmute(vgetexpps(
5575            a.as_f32x16(),
5576            f32x16::ZERO,
5577            0b11111111_11111111,
5578            _MM_FROUND_CUR_DIRECTION,
5579        ))
5580    }
5581}
5582
5583/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[cfg_attr(test, assert_instr(vgetexpps))]
5590pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5591    unsafe {
5592        transmute(vgetexpps(
5593            a.as_f32x16(),
5594            src.as_f32x16(),
5595            k,
5596            _MM_FROUND_CUR_DIRECTION,
5597        ))
5598    }
5599}
5600
5601/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5602///
5603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5604#[inline]
5605#[target_feature(enable = "avx512f")]
5606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5607#[cfg_attr(test, assert_instr(vgetexpps))]
5608pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5609    unsafe {
5610        transmute(vgetexpps(
5611            a.as_f32x16(),
5612            f32x16::ZERO,
5613            k,
5614            _MM_FROUND_CUR_DIRECTION,
5615        ))
5616    }
5617}
5618
5619/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5620///
5621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5622#[inline]
5623#[target_feature(enable = "avx512f,avx512vl")]
5624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5625#[cfg_attr(test, assert_instr(vgetexpps))]
5626pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5627    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5628}
5629
5630/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5631///
5632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5633#[inline]
5634#[target_feature(enable = "avx512f,avx512vl")]
5635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5636#[cfg_attr(test, assert_instr(vgetexpps))]
5637pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5639}
5640
5641/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5642///
5643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5644#[inline]
5645#[target_feature(enable = "avx512f,avx512vl")]
5646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5647#[cfg_attr(test, assert_instr(vgetexpps))]
5648pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5649    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5650}
5651
5652/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5653///
5654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5655#[inline]
5656#[target_feature(enable = "avx512f,avx512vl")]
5657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5658#[cfg_attr(test, assert_instr(vgetexpps))]
5659pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5660    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5661}
5662
5663/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5664///
5665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5666#[inline]
5667#[target_feature(enable = "avx512f,avx512vl")]
5668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5669#[cfg_attr(test, assert_instr(vgetexpps))]
5670pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5671    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5672}
5673
5674/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5675///
5676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5677#[inline]
5678#[target_feature(enable = "avx512f,avx512vl")]
5679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5680#[cfg_attr(test, assert_instr(vgetexpps))]
5681pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5682    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5683}
5684
5685/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5686///
5687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5688#[inline]
5689#[target_feature(enable = "avx512f")]
5690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5691#[cfg_attr(test, assert_instr(vgetexppd))]
5692pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5693    unsafe {
5694        transmute(vgetexppd(
5695            a.as_f64x8(),
5696            f64x8::ZERO,
5697            0b11111111,
5698            _MM_FROUND_CUR_DIRECTION,
5699        ))
5700    }
5701}
5702
5703/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5704///
5705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5706#[inline]
5707#[target_feature(enable = "avx512f")]
5708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5709#[cfg_attr(test, assert_instr(vgetexppd))]
5710pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5711    unsafe {
5712        transmute(vgetexppd(
5713            a.as_f64x8(),
5714            src.as_f64x8(),
5715            k,
5716            _MM_FROUND_CUR_DIRECTION,
5717        ))
5718    }
5719}
5720
5721/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5722///
5723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5724#[inline]
5725#[target_feature(enable = "avx512f")]
5726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5727#[cfg_attr(test, assert_instr(vgetexppd))]
5728pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5729    unsafe {
5730        transmute(vgetexppd(
5731            a.as_f64x8(),
5732            f64x8::ZERO,
5733            k,
5734            _MM_FROUND_CUR_DIRECTION,
5735        ))
5736    }
5737}
5738
5739/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5740///
5741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5742#[inline]
5743#[target_feature(enable = "avx512f,avx512vl")]
5744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5745#[cfg_attr(test, assert_instr(vgetexppd))]
5746pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5747    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5748}
5749
5750/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5751///
5752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5753#[inline]
5754#[target_feature(enable = "avx512f,avx512vl")]
5755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5756#[cfg_attr(test, assert_instr(vgetexppd))]
5757pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5758    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5759}
5760
5761/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5762///
5763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5764#[inline]
5765#[target_feature(enable = "avx512f,avx512vl")]
5766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5767#[cfg_attr(test, assert_instr(vgetexppd))]
5768pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5769    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5770}
5771
5772/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5773///
5774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5775#[inline]
5776#[target_feature(enable = "avx512f,avx512vl")]
5777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5778#[cfg_attr(test, assert_instr(vgetexppd))]
5779pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5780    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5781}
5782
5783/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5784///
5785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5786#[inline]
5787#[target_feature(enable = "avx512f,avx512vl")]
5788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5789#[cfg_attr(test, assert_instr(vgetexppd))]
5790pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5791    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5792}
5793
5794/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5795///
5796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5797#[inline]
5798#[target_feature(enable = "avx512f,avx512vl")]
5799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5800#[cfg_attr(test, assert_instr(vgetexppd))]
5801pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5802    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5803}
5804
5805/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5806/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5807/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5808/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5809/// * [`_MM_FROUND_TO_POS_INF`] : round up
5810/// * [`_MM_FROUND_TO_ZERO`] : truncate
5811/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5812///
5813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5814#[inline]
5815#[target_feature(enable = "avx512f")]
5816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5817#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5818#[rustc_legacy_const_generics(1)]
5819pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5820    unsafe {
5821        static_assert_uimm_bits!(IMM8, 8);
5822        let a = a.as_f32x16();
5823        let r = vrndscaleps(
5824            a,
5825            IMM8,
5826            f32x16::ZERO,
5827            0b11111111_11111111,
5828            _MM_FROUND_CUR_DIRECTION,
5829        );
5830        transmute(r)
5831    }
5832}
5833
5834/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5835/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5836/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5837/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5838/// * [`_MM_FROUND_TO_POS_INF`] : round up
5839/// * [`_MM_FROUND_TO_ZERO`] : truncate
5840/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5841///
5842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5843#[inline]
5844#[target_feature(enable = "avx512f")]
5845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5846#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5847#[rustc_legacy_const_generics(3)]
5848pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5849    unsafe {
5850        static_assert_uimm_bits!(IMM8, 8);
5851        let a = a.as_f32x16();
5852        let src = src.as_f32x16();
5853        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5854        transmute(r)
5855    }
5856}
5857
5858/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5859/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5860/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5861/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5862/// * [`_MM_FROUND_TO_POS_INF`] : round up
5863/// * [`_MM_FROUND_TO_ZERO`] : truncate
5864/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5865///
5866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5867#[inline]
5868#[target_feature(enable = "avx512f")]
5869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5870#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5871#[rustc_legacy_const_generics(2)]
5872pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5873    unsafe {
5874        static_assert_uimm_bits!(IMM8, 8);
5875        let a = a.as_f32x16();
5876        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5877        transmute(r)
5878    }
5879}
5880
5881/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5882/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5883/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5884/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5885/// * [`_MM_FROUND_TO_POS_INF`] : round up
5886/// * [`_MM_FROUND_TO_ZERO`] : truncate
5887/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5888///
5889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5890#[inline]
5891#[target_feature(enable = "avx512f,avx512vl")]
5892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5893#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5894#[rustc_legacy_const_generics(1)]
5895pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5896    unsafe {
5897        static_assert_uimm_bits!(IMM8, 8);
5898        let a = a.as_f32x8();
5899        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5900        transmute(r)
5901    }
5902}
5903
5904/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5905/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5906/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5907/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5908/// * [`_MM_FROUND_TO_POS_INF`] : round up
5909/// * [`_MM_FROUND_TO_ZERO`] : truncate
5910/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5911///
5912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5913#[inline]
5914#[target_feature(enable = "avx512f,avx512vl")]
5915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5916#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5917#[rustc_legacy_const_generics(3)]
5918pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5919    unsafe {
5920        static_assert_uimm_bits!(IMM8, 8);
5921        let a = a.as_f32x8();
5922        let src = src.as_f32x8();
5923        let r = vrndscaleps256(a, IMM8, src, k);
5924        transmute(r)
5925    }
5926}
5927
5928/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5929/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5930/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5931/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5932/// * [`_MM_FROUND_TO_POS_INF`] : round up
5933/// * [`_MM_FROUND_TO_ZERO`] : truncate
5934/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5935///
5936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5937#[inline]
5938#[target_feature(enable = "avx512f,avx512vl")]
5939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5940#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5941#[rustc_legacy_const_generics(2)]
5942pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5943    unsafe {
5944        static_assert_uimm_bits!(IMM8, 8);
5945        let a = a.as_f32x8();
5946        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5947        transmute(r)
5948    }
5949}
5950
5951/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5952/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5953/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5954/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5955/// * [`_MM_FROUND_TO_POS_INF`] : round up
5956/// * [`_MM_FROUND_TO_ZERO`] : truncate
5957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5958///
5959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5960#[inline]
5961#[target_feature(enable = "avx512f,avx512vl")]
5962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5963#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5964#[rustc_legacy_const_generics(1)]
5965pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5966    unsafe {
5967        static_assert_uimm_bits!(IMM8, 8);
5968        let a = a.as_f32x4();
5969        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5970        transmute(r)
5971    }
5972}
5973
5974/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5975/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5976/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5977/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5978/// * [`_MM_FROUND_TO_POS_INF`] : round up
5979/// * [`_MM_FROUND_TO_ZERO`] : truncate
5980/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5981///
5982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5983#[inline]
5984#[target_feature(enable = "avx512f,avx512vl")]
5985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5986#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5987#[rustc_legacy_const_generics(3)]
5988pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5989    unsafe {
5990        static_assert_uimm_bits!(IMM8, 8);
5991        let a = a.as_f32x4();
5992        let src = src.as_f32x4();
5993        let r = vrndscaleps128(a, IMM8, src, k);
5994        transmute(r)
5995    }
5996}
5997
5998/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5999/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6000/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6001/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6002/// * [`_MM_FROUND_TO_POS_INF`] : round up
6003/// * [`_MM_FROUND_TO_ZERO`] : truncate
6004/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6005///
6006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
6007#[inline]
6008#[target_feature(enable = "avx512f,avx512vl")]
6009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6010#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
6011#[rustc_legacy_const_generics(2)]
6012pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6013    unsafe {
6014        static_assert_uimm_bits!(IMM8, 8);
6015        let a = a.as_f32x4();
6016        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
6017        transmute(r)
6018    }
6019}
6020
6021/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6022/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6023/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6024/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6025/// * [`_MM_FROUND_TO_POS_INF`] : round up
6026/// * [`_MM_FROUND_TO_ZERO`] : truncate
6027/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6028///
6029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
6030#[inline]
6031#[target_feature(enable = "avx512f")]
6032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6033#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6034#[rustc_legacy_const_generics(1)]
6035pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
6036    unsafe {
6037        static_assert_uimm_bits!(IMM8, 8);
6038        let a = a.as_f64x8();
6039        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6040        transmute(r)
6041    }
6042}
6043
6044/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6045/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6046/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6047/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6048/// * [`_MM_FROUND_TO_POS_INF`] : round up
6049/// * [`_MM_FROUND_TO_ZERO`] : truncate
6050/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6051///
6052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
6053#[inline]
6054#[target_feature(enable = "avx512f")]
6055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6056#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6057#[rustc_legacy_const_generics(3)]
6058pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
6059    src: __m512d,
6060    k: __mmask8,
6061    a: __m512d,
6062) -> __m512d {
6063    unsafe {
6064        static_assert_uimm_bits!(IMM8, 8);
6065        let a = a.as_f64x8();
6066        let src = src.as_f64x8();
6067        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
6068        transmute(r)
6069    }
6070}
6071
6072/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6073/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6074/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6075/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6076/// * [`_MM_FROUND_TO_POS_INF`] : round up
6077/// * [`_MM_FROUND_TO_ZERO`] : truncate
6078/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6079///
6080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
6081#[inline]
6082#[target_feature(enable = "avx512f")]
6083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6084#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6085#[rustc_legacy_const_generics(2)]
6086pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
6087    unsafe {
6088        static_assert_uimm_bits!(IMM8, 8);
6089        let a = a.as_f64x8();
6090        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
6091        transmute(r)
6092    }
6093}
6094
6095/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6096/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6097/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6098/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6099/// * [`_MM_FROUND_TO_POS_INF`] : round up
6100/// * [`_MM_FROUND_TO_ZERO`] : truncate
6101/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6102///
6103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
6104#[inline]
6105#[target_feature(enable = "avx512f,avx512vl")]
6106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6107#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
6108#[rustc_legacy_const_generics(1)]
6109pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
6110    unsafe {
6111        static_assert_uimm_bits!(IMM8, 8);
6112        let a = a.as_f64x4();
6113        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
6114        transmute(r)
6115    }
6116}
6117
6118/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6119/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6120/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6121/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6122/// * [`_MM_FROUND_TO_POS_INF`] : round up
6123/// * [`_MM_FROUND_TO_ZERO`] : truncate
6124/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6125///
6126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
6127#[inline]
6128#[target_feature(enable = "avx512f,avx512vl")]
6129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6130#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6131#[rustc_legacy_const_generics(3)]
6132pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
6133    src: __m256d,
6134    k: __mmask8,
6135    a: __m256d,
6136) -> __m256d {
6137    unsafe {
6138        static_assert_uimm_bits!(IMM8, 8);
6139        let a = a.as_f64x4();
6140        let src = src.as_f64x4();
6141        let r = vrndscalepd256(a, IMM8, src, k);
6142        transmute(r)
6143    }
6144}
6145
6146/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6147/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6148/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6149/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6150/// * [`_MM_FROUND_TO_POS_INF`] : round up
6151/// * [`_MM_FROUND_TO_ZERO`] : truncate
6152/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6158#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6159#[rustc_legacy_const_generics(2)]
6160pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
6161    unsafe {
6162        static_assert_uimm_bits!(IMM8, 8);
6163        let a = a.as_f64x4();
6164        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
6165        transmute(r)
6166    }
6167}
6168
6169/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6170/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6171/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6172/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6173/// * [`_MM_FROUND_TO_POS_INF`] : round up
6174/// * [`_MM_FROUND_TO_ZERO`] : truncate
6175/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6176///
6177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
6178#[inline]
6179#[target_feature(enable = "avx512f,avx512vl")]
6180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6181#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
6182#[rustc_legacy_const_generics(1)]
6183pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
6184    unsafe {
6185        static_assert_uimm_bits!(IMM8, 8);
6186        let a = a.as_f64x2();
6187        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
6188        transmute(r)
6189    }
6190}
6191
6192/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6193/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6194/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6195/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6196/// * [`_MM_FROUND_TO_POS_INF`] : round up
6197/// * [`_MM_FROUND_TO_ZERO`] : truncate
6198/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6199///
6200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
6201#[inline]
6202#[target_feature(enable = "avx512f,avx512vl")]
6203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6204#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6205#[rustc_legacy_const_generics(3)]
6206pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
6207    unsafe {
6208        static_assert_uimm_bits!(IMM8, 8);
6209        let a = a.as_f64x2();
6210        let src = src.as_f64x2();
6211        let r = vrndscalepd128(a, IMM8, src, k);
6212        transmute(r)
6213    }
6214}
6215
6216/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6217/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6218/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6219/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6220/// * [`_MM_FROUND_TO_POS_INF`] : round up
6221/// * [`_MM_FROUND_TO_ZERO`] : truncate
6222/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6223///
6224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
6225#[inline]
6226#[target_feature(enable = "avx512f,avx512vl")]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6229#[rustc_legacy_const_generics(2)]
6230pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
6231    unsafe {
6232        static_assert_uimm_bits!(IMM8, 8);
6233        let a = a.as_f64x2();
6234        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
6235        transmute(r)
6236    }
6237}
6238
6239/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6240///
6241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
6242#[inline]
6243#[target_feature(enable = "avx512f")]
6244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6245#[cfg_attr(test, assert_instr(vscalefps))]
6246pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
6247    unsafe {
6248        transmute(vscalefps(
6249            a.as_f32x16(),
6250            b.as_f32x16(),
6251            f32x16::ZERO,
6252            0b11111111_11111111,
6253            _MM_FROUND_CUR_DIRECTION,
6254        ))
6255    }
6256}
6257
6258/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6259///
6260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
6261#[inline]
6262#[target_feature(enable = "avx512f")]
6263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6264#[cfg_attr(test, assert_instr(vscalefps))]
6265pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
6266    unsafe {
6267        transmute(vscalefps(
6268            a.as_f32x16(),
6269            b.as_f32x16(),
6270            src.as_f32x16(),
6271            k,
6272            _MM_FROUND_CUR_DIRECTION,
6273        ))
6274    }
6275}
6276
6277/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6278///
6279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
6280#[inline]
6281#[target_feature(enable = "avx512f")]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[cfg_attr(test, assert_instr(vscalefps))]
6284pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6285    unsafe {
6286        transmute(vscalefps(
6287            a.as_f32x16(),
6288            b.as_f32x16(),
6289            f32x16::ZERO,
6290            k,
6291            _MM_FROUND_CUR_DIRECTION,
6292        ))
6293    }
6294}
6295
6296/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6297///
6298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6299#[inline]
6300#[target_feature(enable = "avx512f,avx512vl")]
6301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6302#[cfg_attr(test, assert_instr(vscalefps))]
6303pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6304    unsafe {
6305        transmute(vscalefps256(
6306            a.as_f32x8(),
6307            b.as_f32x8(),
6308            f32x8::ZERO,
6309            0b11111111,
6310        ))
6311    }
6312}
6313
6314/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6315///
6316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6317#[inline]
6318#[target_feature(enable = "avx512f,avx512vl")]
6319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6320#[cfg_attr(test, assert_instr(vscalefps))]
6321pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6322    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6323}
6324
6325/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6326///
6327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6328#[inline]
6329#[target_feature(enable = "avx512f,avx512vl")]
6330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6331#[cfg_attr(test, assert_instr(vscalefps))]
6332pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6333    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6334}
6335
6336/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6337///
6338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6339#[inline]
6340#[target_feature(enable = "avx512f,avx512vl")]
6341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6342#[cfg_attr(test, assert_instr(vscalefps))]
6343pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6344    unsafe {
6345        transmute(vscalefps128(
6346            a.as_f32x4(),
6347            b.as_f32x4(),
6348            f32x4::ZERO,
6349            0b00001111,
6350        ))
6351    }
6352}
6353
6354/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6357#[inline]
6358#[target_feature(enable = "avx512f,avx512vl")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vscalefps))]
6361pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6362    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6363}
6364
6365/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6366///
6367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6368#[inline]
6369#[target_feature(enable = "avx512f,avx512vl")]
6370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6371#[cfg_attr(test, assert_instr(vscalefps))]
6372pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6373    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6374}
6375
6376/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6377///
6378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6379#[inline]
6380#[target_feature(enable = "avx512f")]
6381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6382#[cfg_attr(test, assert_instr(vscalefpd))]
6383pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6384    unsafe {
6385        transmute(vscalefpd(
6386            a.as_f64x8(),
6387            b.as_f64x8(),
6388            f64x8::ZERO,
6389            0b11111111,
6390            _MM_FROUND_CUR_DIRECTION,
6391        ))
6392    }
6393}
6394
6395/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6396///
6397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6398#[inline]
6399#[target_feature(enable = "avx512f")]
6400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6401#[cfg_attr(test, assert_instr(vscalefpd))]
6402pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6403    unsafe {
6404        transmute(vscalefpd(
6405            a.as_f64x8(),
6406            b.as_f64x8(),
6407            src.as_f64x8(),
6408            k,
6409            _MM_FROUND_CUR_DIRECTION,
6410        ))
6411    }
6412}
6413
6414/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6415///
6416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6417#[inline]
6418#[target_feature(enable = "avx512f")]
6419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6420#[cfg_attr(test, assert_instr(vscalefpd))]
6421pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6422    unsafe {
6423        transmute(vscalefpd(
6424            a.as_f64x8(),
6425            b.as_f64x8(),
6426            f64x8::ZERO,
6427            k,
6428            _MM_FROUND_CUR_DIRECTION,
6429        ))
6430    }
6431}
6432
6433/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6436#[inline]
6437#[target_feature(enable = "avx512f,avx512vl")]
6438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6439#[cfg_attr(test, assert_instr(vscalefpd))]
6440pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6441    unsafe {
6442        transmute(vscalefpd256(
6443            a.as_f64x4(),
6444            b.as_f64x4(),
6445            f64x4::ZERO,
6446            0b00001111,
6447        ))
6448    }
6449}
6450
6451/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6452///
6453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6454#[inline]
6455#[target_feature(enable = "avx512f,avx512vl")]
6456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6457#[cfg_attr(test, assert_instr(vscalefpd))]
6458pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6459    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6460}
6461
6462/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6463///
6464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6465#[inline]
6466#[target_feature(enable = "avx512f,avx512vl")]
6467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6468#[cfg_attr(test, assert_instr(vscalefpd))]
6469pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6470    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6471}
6472
6473/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6474///
6475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6476#[inline]
6477#[target_feature(enable = "avx512f,avx512vl")]
6478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6479#[cfg_attr(test, assert_instr(vscalefpd))]
6480pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6481    unsafe {
6482        transmute(vscalefpd128(
6483            a.as_f64x2(),
6484            b.as_f64x2(),
6485            f64x2::ZERO,
6486            0b00000011,
6487        ))
6488    }
6489}
6490
6491/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6494#[inline]
6495#[target_feature(enable = "avx512f,avx512vl")]
6496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497#[cfg_attr(test, assert_instr(vscalefpd))]
6498pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6499    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6500}
6501
6502/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6503///
6504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6505#[inline]
6506#[target_feature(enable = "avx512f,avx512vl")]
6507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6508#[cfg_attr(test, assert_instr(vscalefpd))]
6509pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6510    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6511}
6512
6513/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6514///
6515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6516#[inline]
6517#[target_feature(enable = "avx512f")]
6518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6519#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6520#[rustc_legacy_const_generics(3)]
6521pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6522    unsafe {
6523        static_assert_uimm_bits!(IMM8, 8);
6524        let a = a.as_f32x16();
6525        let b = b.as_f32x16();
6526        let c = c.as_i32x16();
6527        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6528        transmute(r)
6529    }
6530}
6531
6532/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6533///
6534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6535#[inline]
6536#[target_feature(enable = "avx512f")]
6537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6538#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6539#[rustc_legacy_const_generics(4)]
6540pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6541    a: __m512,
6542    k: __mmask16,
6543    b: __m512,
6544    c: __m512i,
6545) -> __m512 {
6546    unsafe {
6547        static_assert_uimm_bits!(IMM8, 8);
6548        let a = a.as_f32x16();
6549        let b = b.as_f32x16();
6550        let c = c.as_i32x16();
6551        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6552        transmute(r)
6553    }
6554}
6555
6556/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6557///
6558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6559#[inline]
6560#[target_feature(enable = "avx512f")]
6561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6562#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6563#[rustc_legacy_const_generics(4)]
6564pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6565    k: __mmask16,
6566    a: __m512,
6567    b: __m512,
6568    c: __m512i,
6569) -> __m512 {
6570    unsafe {
6571        static_assert_uimm_bits!(IMM8, 8);
6572        let a = a.as_f32x16();
6573        let b = b.as_f32x16();
6574        let c = c.as_i32x16();
6575        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6576        transmute(r)
6577    }
6578}
6579
6580/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6581///
6582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6583#[inline]
6584#[target_feature(enable = "avx512f,avx512vl")]
6585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6586#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6587#[rustc_legacy_const_generics(3)]
6588pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6589    unsafe {
6590        static_assert_uimm_bits!(IMM8, 8);
6591        let a = a.as_f32x8();
6592        let b = b.as_f32x8();
6593        let c = c.as_i32x8();
6594        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6595        transmute(r)
6596    }
6597}
6598
6599/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6600///
6601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6602#[inline]
6603#[target_feature(enable = "avx512f,avx512vl")]
6604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6605#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6606#[rustc_legacy_const_generics(4)]
6607pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6608    a: __m256,
6609    k: __mmask8,
6610    b: __m256,
6611    c: __m256i,
6612) -> __m256 {
6613    unsafe {
6614        static_assert_uimm_bits!(IMM8, 8);
6615        let a = a.as_f32x8();
6616        let b = b.as_f32x8();
6617        let c = c.as_i32x8();
6618        let r = vfixupimmps256(a, b, c, IMM8, k);
6619        transmute(r)
6620    }
6621}
6622
6623/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6624///
6625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6626#[inline]
6627#[target_feature(enable = "avx512f,avx512vl")]
6628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6629#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6630#[rustc_legacy_const_generics(4)]
6631pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6632    k: __mmask8,
6633    a: __m256,
6634    b: __m256,
6635    c: __m256i,
6636) -> __m256 {
6637    unsafe {
6638        static_assert_uimm_bits!(IMM8, 8);
6639        let a = a.as_f32x8();
6640        let b = b.as_f32x8();
6641        let c = c.as_i32x8();
6642        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6643        transmute(r)
6644    }
6645}
6646
6647/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6648///
6649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6650#[inline]
6651#[target_feature(enable = "avx512f,avx512vl")]
6652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6653#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6654#[rustc_legacy_const_generics(3)]
6655pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6656    unsafe {
6657        static_assert_uimm_bits!(IMM8, 8);
6658        let a = a.as_f32x4();
6659        let b = b.as_f32x4();
6660        let c = c.as_i32x4();
6661        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6662        transmute(r)
6663    }
6664}
6665
6666/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6667///
6668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6669#[inline]
6670#[target_feature(enable = "avx512f,avx512vl")]
6671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6672#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6673#[rustc_legacy_const_generics(4)]
6674pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6675    a: __m128,
6676    k: __mmask8,
6677    b: __m128,
6678    c: __m128i,
6679) -> __m128 {
6680    unsafe {
6681        static_assert_uimm_bits!(IMM8, 8);
6682        let a = a.as_f32x4();
6683        let b = b.as_f32x4();
6684        let c = c.as_i32x4();
6685        let r = vfixupimmps128(a, b, c, IMM8, k);
6686        transmute(r)
6687    }
6688}
6689
6690/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6691///
6692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6693#[inline]
6694#[target_feature(enable = "avx512f,avx512vl")]
6695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6696#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6697#[rustc_legacy_const_generics(4)]
6698pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6699    k: __mmask8,
6700    a: __m128,
6701    b: __m128,
6702    c: __m128i,
6703) -> __m128 {
6704    unsafe {
6705        static_assert_uimm_bits!(IMM8, 8);
6706        let a = a.as_f32x4();
6707        let b = b.as_f32x4();
6708        let c = c.as_i32x4();
6709        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6710        transmute(r)
6711    }
6712}
6713
6714/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6715///
6716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6717#[inline]
6718#[target_feature(enable = "avx512f")]
6719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6720#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6721#[rustc_legacy_const_generics(3)]
6722pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6723    unsafe {
6724        static_assert_uimm_bits!(IMM8, 8);
6725        let a = a.as_f64x8();
6726        let b = b.as_f64x8();
6727        let c = c.as_i64x8();
6728        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6729        transmute(r)
6730    }
6731}
6732
6733/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6734///
6735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6736#[inline]
6737#[target_feature(enable = "avx512f")]
6738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6739#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6740#[rustc_legacy_const_generics(4)]
6741pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6742    a: __m512d,
6743    k: __mmask8,
6744    b: __m512d,
6745    c: __m512i,
6746) -> __m512d {
6747    unsafe {
6748        static_assert_uimm_bits!(IMM8, 8);
6749        let a = a.as_f64x8();
6750        let b = b.as_f64x8();
6751        let c = c.as_i64x8();
6752        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6753        transmute(r)
6754    }
6755}
6756
6757/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6758///
6759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6760#[inline]
6761#[target_feature(enable = "avx512f")]
6762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6763#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6764#[rustc_legacy_const_generics(4)]
6765pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6766    k: __mmask8,
6767    a: __m512d,
6768    b: __m512d,
6769    c: __m512i,
6770) -> __m512d {
6771    unsafe {
6772        static_assert_uimm_bits!(IMM8, 8);
6773        let a = a.as_f64x8();
6774        let b = b.as_f64x8();
6775        let c = c.as_i64x8();
6776        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6777        transmute(r)
6778    }
6779}
6780
6781/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6782///
6783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6784#[inline]
6785#[target_feature(enable = "avx512f,avx512vl")]
6786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6787#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6788#[rustc_legacy_const_generics(3)]
6789pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6790    unsafe {
6791        static_assert_uimm_bits!(IMM8, 8);
6792        let a = a.as_f64x4();
6793        let b = b.as_f64x4();
6794        let c = c.as_i64x4();
6795        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6796        transmute(r)
6797    }
6798}
6799
6800/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6801///
6802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6803#[inline]
6804#[target_feature(enable = "avx512f,avx512vl")]
6805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6806#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6807#[rustc_legacy_const_generics(4)]
6808pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6809    a: __m256d,
6810    k: __mmask8,
6811    b: __m256d,
6812    c: __m256i,
6813) -> __m256d {
6814    unsafe {
6815        static_assert_uimm_bits!(IMM8, 8);
6816        let a = a.as_f64x4();
6817        let b = b.as_f64x4();
6818        let c = c.as_i64x4();
6819        let r = vfixupimmpd256(a, b, c, IMM8, k);
6820        transmute(r)
6821    }
6822}
6823
6824/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6825///
6826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6827#[inline]
6828#[target_feature(enable = "avx512f,avx512vl")]
6829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6830#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6831#[rustc_legacy_const_generics(4)]
6832pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6833    k: __mmask8,
6834    a: __m256d,
6835    b: __m256d,
6836    c: __m256i,
6837) -> __m256d {
6838    unsafe {
6839        static_assert_uimm_bits!(IMM8, 8);
6840        let a = a.as_f64x4();
6841        let b = b.as_f64x4();
6842        let c = c.as_i64x4();
6843        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6844        transmute(r)
6845    }
6846}
6847
6848/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6849///
6850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6851#[inline]
6852#[target_feature(enable = "avx512f,avx512vl")]
6853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6854#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6855#[rustc_legacy_const_generics(3)]
6856pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6857    unsafe {
6858        static_assert_uimm_bits!(IMM8, 8);
6859        let a = a.as_f64x2();
6860        let b = b.as_f64x2();
6861        let c = c.as_i64x2();
6862        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6863        transmute(r)
6864    }
6865}
6866
6867/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6868///
6869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6870#[inline]
6871#[target_feature(enable = "avx512f,avx512vl")]
6872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6873#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6874#[rustc_legacy_const_generics(4)]
6875pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6876    a: __m128d,
6877    k: __mmask8,
6878    b: __m128d,
6879    c: __m128i,
6880) -> __m128d {
6881    unsafe {
6882        static_assert_uimm_bits!(IMM8, 8);
6883        let a = a.as_f64x2();
6884        let b = b.as_f64x2();
6885        let c = c.as_i64x2();
6886        let r = vfixupimmpd128(a, b, c, IMM8, k);
6887        transmute(r)
6888    }
6889}
6890
6891/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6892///
6893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6894#[inline]
6895#[target_feature(enable = "avx512f,avx512vl")]
6896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6897#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6898#[rustc_legacy_const_generics(4)]
6899pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6900    k: __mmask8,
6901    a: __m128d,
6902    b: __m128d,
6903    c: __m128i,
6904) -> __m128d {
6905    unsafe {
6906        static_assert_uimm_bits!(IMM8, 8);
6907        let a = a.as_f64x2();
6908        let b = b.as_f64x2();
6909        let c = c.as_i64x2();
6910        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6911        transmute(r)
6912    }
6913}
6914
6915/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6916///
6917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6918#[inline]
6919#[target_feature(enable = "avx512f")]
6920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6921#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6922#[rustc_legacy_const_generics(3)]
6923pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6924    unsafe {
6925        static_assert_uimm_bits!(IMM8, 8);
6926        let a = a.as_i32x16();
6927        let b = b.as_i32x16();
6928        let c = c.as_i32x16();
6929        let r = vpternlogd(a, b, c, IMM8);
6930        transmute(r)
6931    }
6932}
6933
6934/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6935///
6936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6937#[inline]
6938#[target_feature(enable = "avx512f")]
6939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6940#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6941#[rustc_legacy_const_generics(4)]
6942pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6943    src: __m512i,
6944    k: __mmask16,
6945    a: __m512i,
6946    b: __m512i,
6947) -> __m512i {
6948    unsafe {
6949        static_assert_uimm_bits!(IMM8, 8);
6950        let src = src.as_i32x16();
6951        let a = a.as_i32x16();
6952        let b = b.as_i32x16();
6953        let r = vpternlogd(src, a, b, IMM8);
6954        transmute(simd_select_bitmask(k, r, src))
6955    }
6956}
6957
6958/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6959///
6960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6961#[inline]
6962#[target_feature(enable = "avx512f")]
6963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6964#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6965#[rustc_legacy_const_generics(4)]
6966pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6967    k: __mmask16,
6968    a: __m512i,
6969    b: __m512i,
6970    c: __m512i,
6971) -> __m512i {
6972    unsafe {
6973        static_assert_uimm_bits!(IMM8, 8);
6974        let a = a.as_i32x16();
6975        let b = b.as_i32x16();
6976        let c = c.as_i32x16();
6977        let r = vpternlogd(a, b, c, IMM8);
6978        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6979    }
6980}
6981
6982/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6983///
6984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6985#[inline]
6986#[target_feature(enable = "avx512f,avx512vl")]
6987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6988#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6989#[rustc_legacy_const_generics(3)]
6990pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6991    unsafe {
6992        static_assert_uimm_bits!(IMM8, 8);
6993        let a = a.as_i32x8();
6994        let b = b.as_i32x8();
6995        let c = c.as_i32x8();
6996        let r = vpternlogd256(a, b, c, IMM8);
6997        transmute(r)
6998    }
6999}
7000
7001/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7002///
7003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
7004#[inline]
7005#[target_feature(enable = "avx512f,avx512vl")]
7006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7007#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7008#[rustc_legacy_const_generics(4)]
7009pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
7010    src: __m256i,
7011    k: __mmask8,
7012    a: __m256i,
7013    b: __m256i,
7014) -> __m256i {
7015    unsafe {
7016        static_assert_uimm_bits!(IMM8, 8);
7017        let src = src.as_i32x8();
7018        let a = a.as_i32x8();
7019        let b = b.as_i32x8();
7020        let r = vpternlogd256(src, a, b, IMM8);
7021        transmute(simd_select_bitmask(k, r, src))
7022    }
7023}
7024
7025/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7026///
7027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
7028#[inline]
7029#[target_feature(enable = "avx512f,avx512vl")]
7030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7031#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7032#[rustc_legacy_const_generics(4)]
7033pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
7034    k: __mmask8,
7035    a: __m256i,
7036    b: __m256i,
7037    c: __m256i,
7038) -> __m256i {
7039    unsafe {
7040        static_assert_uimm_bits!(IMM8, 8);
7041        let a = a.as_i32x8();
7042        let b = b.as_i32x8();
7043        let c = c.as_i32x8();
7044        let r = vpternlogd256(a, b, c, IMM8);
7045        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
7046    }
7047}
7048
7049/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7050///
7051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
7052#[inline]
7053#[target_feature(enable = "avx512f,avx512vl")]
7054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7055#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7056#[rustc_legacy_const_generics(3)]
7057pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7058    unsafe {
7059        static_assert_uimm_bits!(IMM8, 8);
7060        let a = a.as_i32x4();
7061        let b = b.as_i32x4();
7062        let c = c.as_i32x4();
7063        let r = vpternlogd128(a, b, c, IMM8);
7064        transmute(r)
7065    }
7066}
7067
7068/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7069///
7070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
7071#[inline]
7072#[target_feature(enable = "avx512f,avx512vl")]
7073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7074#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7075#[rustc_legacy_const_generics(4)]
7076pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
7077    src: __m128i,
7078    k: __mmask8,
7079    a: __m128i,
7080    b: __m128i,
7081) -> __m128i {
7082    unsafe {
7083        static_assert_uimm_bits!(IMM8, 8);
7084        let src = src.as_i32x4();
7085        let a = a.as_i32x4();
7086        let b = b.as_i32x4();
7087        let r = vpternlogd128(src, a, b, IMM8);
7088        transmute(simd_select_bitmask(k, r, src))
7089    }
7090}
7091
7092/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7093///
7094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
7095#[inline]
7096#[target_feature(enable = "avx512f,avx512vl")]
7097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7098#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7099#[rustc_legacy_const_generics(4)]
7100pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
7101    k: __mmask8,
7102    a: __m128i,
7103    b: __m128i,
7104    c: __m128i,
7105) -> __m128i {
7106    unsafe {
7107        static_assert_uimm_bits!(IMM8, 8);
7108        let a = a.as_i32x4();
7109        let b = b.as_i32x4();
7110        let c = c.as_i32x4();
7111        let r = vpternlogd128(a, b, c, IMM8);
7112        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
7113    }
7114}
7115
7116/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7122#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7123#[rustc_legacy_const_generics(3)]
7124pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
7125    unsafe {
7126        static_assert_uimm_bits!(IMM8, 8);
7127        let a = a.as_i64x8();
7128        let b = b.as_i64x8();
7129        let c = c.as_i64x8();
7130        let r = vpternlogq(a, b, c, IMM8);
7131        transmute(r)
7132    }
7133}
7134
7135/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7136///
7137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
7138#[inline]
7139#[target_feature(enable = "avx512f")]
7140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7141#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7142#[rustc_legacy_const_generics(4)]
7143pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
7144    src: __m512i,
7145    k: __mmask8,
7146    a: __m512i,
7147    b: __m512i,
7148) -> __m512i {
7149    unsafe {
7150        static_assert_uimm_bits!(IMM8, 8);
7151        let src = src.as_i64x8();
7152        let a = a.as_i64x8();
7153        let b = b.as_i64x8();
7154        let r = vpternlogq(src, a, b, IMM8);
7155        transmute(simd_select_bitmask(k, r, src))
7156    }
7157}
7158
7159/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7160///
7161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
7162#[inline]
7163#[target_feature(enable = "avx512f")]
7164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7165#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7166#[rustc_legacy_const_generics(4)]
7167pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
7168    k: __mmask8,
7169    a: __m512i,
7170    b: __m512i,
7171    c: __m512i,
7172) -> __m512i {
7173    unsafe {
7174        static_assert_uimm_bits!(IMM8, 8);
7175        let a = a.as_i64x8();
7176        let b = b.as_i64x8();
7177        let c = c.as_i64x8();
7178        let r = vpternlogq(a, b, c, IMM8);
7179        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
7180    }
7181}
7182
7183/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7184///
7185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
7186#[inline]
7187#[target_feature(enable = "avx512f,avx512vl")]
7188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7189#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7190#[rustc_legacy_const_generics(3)]
7191pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
7192    unsafe {
7193        static_assert_uimm_bits!(IMM8, 8);
7194        let a = a.as_i64x4();
7195        let b = b.as_i64x4();
7196        let c = c.as_i64x4();
7197        let r = vpternlogq256(a, b, c, IMM8);
7198        transmute(r)
7199    }
7200}
7201
7202/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7203///
7204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
7205#[inline]
7206#[target_feature(enable = "avx512f,avx512vl")]
7207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7208#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7209#[rustc_legacy_const_generics(4)]
7210pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
7211    src: __m256i,
7212    k: __mmask8,
7213    a: __m256i,
7214    b: __m256i,
7215) -> __m256i {
7216    unsafe {
7217        static_assert_uimm_bits!(IMM8, 8);
7218        let src = src.as_i64x4();
7219        let a = a.as_i64x4();
7220        let b = b.as_i64x4();
7221        let r = vpternlogq256(src, a, b, IMM8);
7222        transmute(simd_select_bitmask(k, r, src))
7223    }
7224}
7225
7226/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7227///
7228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
7229#[inline]
7230#[target_feature(enable = "avx512f,avx512vl")]
7231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7232#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7233#[rustc_legacy_const_generics(4)]
7234pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
7235    k: __mmask8,
7236    a: __m256i,
7237    b: __m256i,
7238    c: __m256i,
7239) -> __m256i {
7240    unsafe {
7241        static_assert_uimm_bits!(IMM8, 8);
7242        let a = a.as_i64x4();
7243        let b = b.as_i64x4();
7244        let c = c.as_i64x4();
7245        let r = vpternlogq256(a, b, c, IMM8);
7246        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
7247    }
7248}
7249
7250/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7251///
7252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
7253#[inline]
7254#[target_feature(enable = "avx512f,avx512vl")]
7255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7256#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7257#[rustc_legacy_const_generics(3)]
7258pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7259    unsafe {
7260        static_assert_uimm_bits!(IMM8, 8);
7261        let a = a.as_i64x2();
7262        let b = b.as_i64x2();
7263        let c = c.as_i64x2();
7264        let r = vpternlogq128(a, b, c, IMM8);
7265        transmute(r)
7266    }
7267}
7268
7269/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7270///
7271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
7272#[inline]
7273#[target_feature(enable = "avx512f,avx512vl")]
7274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7275#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7276#[rustc_legacy_const_generics(4)]
7277pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
7278    src: __m128i,
7279    k: __mmask8,
7280    a: __m128i,
7281    b: __m128i,
7282) -> __m128i {
7283    unsafe {
7284        static_assert_uimm_bits!(IMM8, 8);
7285        let src = src.as_i64x2();
7286        let a = a.as_i64x2();
7287        let b = b.as_i64x2();
7288        let r = vpternlogq128(src, a, b, IMM8);
7289        transmute(simd_select_bitmask(k, r, src))
7290    }
7291}
7292
7293/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7294///
7295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7296#[inline]
7297#[target_feature(enable = "avx512f,avx512vl")]
7298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7299#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7300#[rustc_legacy_const_generics(4)]
7301pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7302    k: __mmask8,
7303    a: __m128i,
7304    b: __m128i,
7305    c: __m128i,
7306) -> __m128i {
7307    unsafe {
7308        static_assert_uimm_bits!(IMM8, 8);
7309        let a = a.as_i64x2();
7310        let b = b.as_i64x2();
7311        let c = c.as_i64x2();
7312        let r = vpternlogq128(a, b, c, IMM8);
7313        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7314    }
7315}
7316
7317/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7318/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7319///    _MM_MANT_NORM_1_2     // interval [1, 2)
7320///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7321///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7322///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7323/// The sign is determined by sc which can take the following values:
7324///    _MM_MANT_SIGN_src     // sign = sign(src)
7325///    _MM_MANT_SIGN_zero    // sign = 0
7326///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7327///
7328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7329#[inline]
7330#[target_feature(enable = "avx512f")]
7331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7332#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7333#[rustc_legacy_const_generics(1, 2)]
7334pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7335    a: __m512,
7336) -> __m512 {
7337    unsafe {
7338        static_assert_uimm_bits!(NORM, 4);
7339        static_assert_uimm_bits!(SIGN, 2);
7340        let a = a.as_f32x16();
7341        let zero = f32x16::ZERO;
7342        let r = vgetmantps(
7343            a,
7344            SIGN << 2 | NORM,
7345            zero,
7346            0b11111111_11111111,
7347            _MM_FROUND_CUR_DIRECTION,
7348        );
7349        transmute(r)
7350    }
7351}
7352
7353/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7354/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7355///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7356///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7357///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7358///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7359/// The sign is determined by sc which can take the following values:\
7360///    _MM_MANT_SIGN_src     // sign = sign(src)\
7361///    _MM_MANT_SIGN_zero    // sign = 0\
7362///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7363///
7364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7365#[inline]
7366#[target_feature(enable = "avx512f")]
7367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7368#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7369#[rustc_legacy_const_generics(3, 4)]
7370pub fn _mm512_mask_getmant_ps<
7371    const NORM: _MM_MANTISSA_NORM_ENUM,
7372    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7373>(
7374    src: __m512,
7375    k: __mmask16,
7376    a: __m512,
7377) -> __m512 {
7378    unsafe {
7379        static_assert_uimm_bits!(NORM, 4);
7380        static_assert_uimm_bits!(SIGN, 2);
7381        let a = a.as_f32x16();
7382        let src = src.as_f32x16();
7383        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7384        transmute(r)
7385    }
7386}
7387
7388/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7389/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7390///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7391///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7392///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7393///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7394/// The sign is determined by sc which can take the following values:\
7395///    _MM_MANT_SIGN_src     // sign = sign(src)\
7396///    _MM_MANT_SIGN_zero    // sign = 0\
7397///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7398///
7399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7400#[inline]
7401#[target_feature(enable = "avx512f")]
7402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7403#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7404#[rustc_legacy_const_generics(2, 3)]
7405pub fn _mm512_maskz_getmant_ps<
7406    const NORM: _MM_MANTISSA_NORM_ENUM,
7407    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7408>(
7409    k: __mmask16,
7410    a: __m512,
7411) -> __m512 {
7412    unsafe {
7413        static_assert_uimm_bits!(NORM, 4);
7414        static_assert_uimm_bits!(SIGN, 2);
7415        let a = a.as_f32x16();
7416        let r = vgetmantps(
7417            a,
7418            SIGN << 2 | NORM,
7419            f32x16::ZERO,
7420            k,
7421            _MM_FROUND_CUR_DIRECTION,
7422        );
7423        transmute(r)
7424    }
7425}
7426
7427/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7428/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7429///    _MM_MANT_NORM_1_2     // interval [1, 2)
7430///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7431///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7432///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7433/// The sign is determined by sc which can take the following values:
7434///    _MM_MANT_SIGN_src     // sign = sign(src)
7435///    _MM_MANT_SIGN_zero    // sign = 0
7436///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7437///
7438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7439#[inline]
7440#[target_feature(enable = "avx512f,avx512vl")]
7441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7442#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7443#[rustc_legacy_const_generics(1, 2)]
7444pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7445    a: __m256,
7446) -> __m256 {
7447    unsafe {
7448        static_assert_uimm_bits!(NORM, 4);
7449        static_assert_uimm_bits!(SIGN, 2);
7450        let a = a.as_f32x8();
7451        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7452        transmute(r)
7453    }
7454}
7455
7456/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7457/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7458///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7459///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7460///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7461///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7462/// The sign is determined by sc which can take the following values:\
7463///    _MM_MANT_SIGN_src     // sign = sign(src)\
7464///    _MM_MANT_SIGN_zero    // sign = 0\
7465///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7466///
7467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7468#[inline]
7469#[target_feature(enable = "avx512f,avx512vl")]
7470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7471#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7472#[rustc_legacy_const_generics(3, 4)]
7473pub fn _mm256_mask_getmant_ps<
7474    const NORM: _MM_MANTISSA_NORM_ENUM,
7475    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7476>(
7477    src: __m256,
7478    k: __mmask8,
7479    a: __m256,
7480) -> __m256 {
7481    unsafe {
7482        static_assert_uimm_bits!(NORM, 4);
7483        static_assert_uimm_bits!(SIGN, 2);
7484        let a = a.as_f32x8();
7485        let src = src.as_f32x8();
7486        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7487        transmute(r)
7488    }
7489}
7490
7491/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7492/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7493///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7494///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7495///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7496///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7497/// The sign is determined by sc which can take the following values:\
7498///    _MM_MANT_SIGN_src     // sign = sign(src)\
7499///    _MM_MANT_SIGN_zero    // sign = 0\
7500///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7501///
7502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7503#[inline]
7504#[target_feature(enable = "avx512f,avx512vl")]
7505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7506#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7507#[rustc_legacy_const_generics(2, 3)]
7508pub fn _mm256_maskz_getmant_ps<
7509    const NORM: _MM_MANTISSA_NORM_ENUM,
7510    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7511>(
7512    k: __mmask8,
7513    a: __m256,
7514) -> __m256 {
7515    unsafe {
7516        static_assert_uimm_bits!(NORM, 4);
7517        static_assert_uimm_bits!(SIGN, 2);
7518        let a = a.as_f32x8();
7519        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7520        transmute(r)
7521    }
7522}
7523
7524/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7525/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7526///    _MM_MANT_NORM_1_2     // interval [1, 2)
7527///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7528///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7529///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7530/// The sign is determined by sc which can take the following values:
7531///    _MM_MANT_SIGN_src     // sign = sign(src)
7532///    _MM_MANT_SIGN_zero    // sign = 0
7533///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7534///
7535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7536#[inline]
7537#[target_feature(enable = "avx512f,avx512vl")]
7538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7539#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7540#[rustc_legacy_const_generics(1, 2)]
7541pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7542    a: __m128,
7543) -> __m128 {
7544    unsafe {
7545        static_assert_uimm_bits!(NORM, 4);
7546        static_assert_uimm_bits!(SIGN, 2);
7547        let a = a.as_f32x4();
7548        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7549        transmute(r)
7550    }
7551}
7552
7553/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7554/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7555///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7556///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7557///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7558///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7559/// The sign is determined by sc which can take the following values:\
7560///    _MM_MANT_SIGN_src     // sign = sign(src)\
7561///    _MM_MANT_SIGN_zero    // sign = 0\
7562///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7563///
7564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7565#[inline]
7566#[target_feature(enable = "avx512f,avx512vl")]
7567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7568#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7569#[rustc_legacy_const_generics(3, 4)]
7570pub fn _mm_mask_getmant_ps<
7571    const NORM: _MM_MANTISSA_NORM_ENUM,
7572    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7573>(
7574    src: __m128,
7575    k: __mmask8,
7576    a: __m128,
7577) -> __m128 {
7578    unsafe {
7579        static_assert_uimm_bits!(NORM, 4);
7580        static_assert_uimm_bits!(SIGN, 2);
7581        let a = a.as_f32x4();
7582        let src = src.as_f32x4();
7583        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7584        transmute(r)
7585    }
7586}
7587
7588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7594/// The sign is determined by sc which can take the following values:\
7595///    _MM_MANT_SIGN_src     // sign = sign(src)\
7596///    _MM_MANT_SIGN_zero    // sign = 0\
7597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7598///
7599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7600#[inline]
7601#[target_feature(enable = "avx512f,avx512vl")]
7602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7603#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7604#[rustc_legacy_const_generics(2, 3)]
7605pub fn _mm_maskz_getmant_ps<
7606    const NORM: _MM_MANTISSA_NORM_ENUM,
7607    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7608>(
7609    k: __mmask8,
7610    a: __m128,
7611) -> __m128 {
7612    unsafe {
7613        static_assert_uimm_bits!(NORM, 4);
7614        static_assert_uimm_bits!(SIGN, 2);
7615        let a = a.as_f32x4();
7616        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7617        transmute(r)
7618    }
7619}
7620
7621/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7622/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7623///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7624///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7625///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7626///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7627/// The sign is determined by sc which can take the following values:\
7628///    _MM_MANT_SIGN_src     // sign = sign(src)\
7629///    _MM_MANT_SIGN_zero    // sign = 0\
7630///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7631///
7632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7633#[inline]
7634#[target_feature(enable = "avx512f")]
7635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7636#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7637#[rustc_legacy_const_generics(1, 2)]
7638pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7639    a: __m512d,
7640) -> __m512d {
7641    unsafe {
7642        static_assert_uimm_bits!(NORM, 4);
7643        static_assert_uimm_bits!(SIGN, 2);
7644        let a = a.as_f64x8();
7645        let zero = f64x8::ZERO;
7646        let r = vgetmantpd(
7647            a,
7648            SIGN << 2 | NORM,
7649            zero,
7650            0b11111111,
7651            _MM_FROUND_CUR_DIRECTION,
7652        );
7653        transmute(r)
7654    }
7655}
7656
7657/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7658/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7659///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7660///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7661///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7662///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7663/// The sign is determined by sc which can take the following values:\
7664///    _MM_MANT_SIGN_src     // sign = sign(src)\
7665///    _MM_MANT_SIGN_zero    // sign = 0\
7666///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7667///
7668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7669#[inline]
7670#[target_feature(enable = "avx512f")]
7671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7672#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7673#[rustc_legacy_const_generics(3, 4)]
7674pub fn _mm512_mask_getmant_pd<
7675    const NORM: _MM_MANTISSA_NORM_ENUM,
7676    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7677>(
7678    src: __m512d,
7679    k: __mmask8,
7680    a: __m512d,
7681) -> __m512d {
7682    unsafe {
7683        static_assert_uimm_bits!(NORM, 4);
7684        static_assert_uimm_bits!(SIGN, 2);
7685        let a = a.as_f64x8();
7686        let src = src.as_f64x8();
7687        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7688        transmute(r)
7689    }
7690}
7691
7692/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7693/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7694///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7695///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7696///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7697///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7698/// The sign is determined by sc which can take the following values:\
7699///    _MM_MANT_SIGN_src     // sign = sign(src)\
7700///    _MM_MANT_SIGN_zero    // sign = 0\
7701///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7702///
7703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7704#[inline]
7705#[target_feature(enable = "avx512f")]
7706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7707#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7708#[rustc_legacy_const_generics(2, 3)]
7709pub fn _mm512_maskz_getmant_pd<
7710    const NORM: _MM_MANTISSA_NORM_ENUM,
7711    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7712>(
7713    k: __mmask8,
7714    a: __m512d,
7715) -> __m512d {
7716    unsafe {
7717        static_assert_uimm_bits!(NORM, 4);
7718        static_assert_uimm_bits!(SIGN, 2);
7719        let a = a.as_f64x8();
7720        let r = vgetmantpd(
7721            a,
7722            SIGN << 2 | NORM,
7723            f64x8::ZERO,
7724            k,
7725            _MM_FROUND_CUR_DIRECTION,
7726        );
7727        transmute(r)
7728    }
7729}
7730
7731/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7732/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7733///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7734///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7735///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7736///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7737/// The sign is determined by sc which can take the following values:\
7738///    _MM_MANT_SIGN_src     // sign = sign(src)\
7739///    _MM_MANT_SIGN_zero    // sign = 0\
7740///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7741///
7742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7743#[inline]
7744#[target_feature(enable = "avx512f,avx512vl")]
7745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7746#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7747#[rustc_legacy_const_generics(1, 2)]
7748pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7749    a: __m256d,
7750) -> __m256d {
7751    unsafe {
7752        static_assert_uimm_bits!(NORM, 4);
7753        static_assert_uimm_bits!(SIGN, 2);
7754        let a = a.as_f64x4();
7755        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7756        transmute(r)
7757    }
7758}
7759
7760/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7761/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7762///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7763///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7764///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7765///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7766/// The sign is determined by sc which can take the following values:\
7767///    _MM_MANT_SIGN_src     // sign = sign(src)\
7768///    _MM_MANT_SIGN_zero    // sign = 0\
7769///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7770///
7771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7772#[inline]
7773#[target_feature(enable = "avx512f,avx512vl")]
7774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7775#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7776#[rustc_legacy_const_generics(3, 4)]
7777pub fn _mm256_mask_getmant_pd<
7778    const NORM: _MM_MANTISSA_NORM_ENUM,
7779    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7780>(
7781    src: __m256d,
7782    k: __mmask8,
7783    a: __m256d,
7784) -> __m256d {
7785    unsafe {
7786        static_assert_uimm_bits!(NORM, 4);
7787        static_assert_uimm_bits!(SIGN, 2);
7788        let a = a.as_f64x4();
7789        let src = src.as_f64x4();
7790        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7791        transmute(r)
7792    }
7793}
7794
7795/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7796/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7797///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7798///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7799///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7800///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7801/// The sign is determined by sc which can take the following values:\
7802///    _MM_MANT_SIGN_src     // sign = sign(src)\
7803///    _MM_MANT_SIGN_zero    // sign = 0\
7804///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7805///
7806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7807#[inline]
7808#[target_feature(enable = "avx512f,avx512vl")]
7809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7810#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7811#[rustc_legacy_const_generics(2, 3)]
7812pub fn _mm256_maskz_getmant_pd<
7813    const NORM: _MM_MANTISSA_NORM_ENUM,
7814    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7815>(
7816    k: __mmask8,
7817    a: __m256d,
7818) -> __m256d {
7819    unsafe {
7820        static_assert_uimm_bits!(NORM, 4);
7821        static_assert_uimm_bits!(SIGN, 2);
7822        let a = a.as_f64x4();
7823        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7824        transmute(r)
7825    }
7826}
7827
7828/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7829/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7830///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7831///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7832///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7833///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7834/// The sign is determined by sc which can take the following values:\
7835///    _MM_MANT_SIGN_src     // sign = sign(src)\
7836///    _MM_MANT_SIGN_zero    // sign = 0\
7837///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7838///
7839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7840#[inline]
7841#[target_feature(enable = "avx512f,avx512vl")]
7842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7843#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7844#[rustc_legacy_const_generics(1, 2)]
7845pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7846    a: __m128d,
7847) -> __m128d {
7848    unsafe {
7849        static_assert_uimm_bits!(NORM, 4);
7850        static_assert_uimm_bits!(SIGN, 2);
7851        let a = a.as_f64x2();
7852        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7853        transmute(r)
7854    }
7855}
7856
7857/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7858/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7859///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7860///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7861///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7862///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7863/// The sign is determined by sc which can take the following values:\
7864///    _MM_MANT_SIGN_src     // sign = sign(src)\
7865///    _MM_MANT_SIGN_zero    // sign = 0\
7866///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7867///
7868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7869#[inline]
7870#[target_feature(enable = "avx512f,avx512vl")]
7871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7872#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7873#[rustc_legacy_const_generics(3, 4)]
7874pub fn _mm_mask_getmant_pd<
7875    const NORM: _MM_MANTISSA_NORM_ENUM,
7876    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7877>(
7878    src: __m128d,
7879    k: __mmask8,
7880    a: __m128d,
7881) -> __m128d {
7882    unsafe {
7883        static_assert_uimm_bits!(NORM, 4);
7884        static_assert_uimm_bits!(SIGN, 2);
7885        let a = a.as_f64x2();
7886        let src = src.as_f64x2();
7887        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7888        transmute(r)
7889    }
7890}
7891
7892/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7893/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7894///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7895///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7896///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7897///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7898/// The sign is determined by sc which can take the following values:\
7899///    _MM_MANT_SIGN_src     // sign = sign(src)\
7900///    _MM_MANT_SIGN_zero    // sign = 0\
7901///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7902///
7903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7904#[inline]
7905#[target_feature(enable = "avx512f,avx512vl")]
7906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7907#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7908#[rustc_legacy_const_generics(2, 3)]
7909pub fn _mm_maskz_getmant_pd<
7910    const NORM: _MM_MANTISSA_NORM_ENUM,
7911    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7912>(
7913    k: __mmask8,
7914    a: __m128d,
7915) -> __m128d {
7916    unsafe {
7917        static_assert_uimm_bits!(NORM, 4);
7918        static_assert_uimm_bits!(SIGN, 2);
7919        let a = a.as_f64x2();
7920        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7921        transmute(r)
7922    }
7923}
7924
7925/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7926///
7927/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7928/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7929/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7930/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7931/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7932/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7933///
7934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7935#[inline]
7936#[target_feature(enable = "avx512f")]
7937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7938#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7939#[rustc_legacy_const_generics(2)]
7940pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7941    unsafe {
7942        static_assert_rounding!(ROUNDING);
7943        let a = a.as_f32x16();
7944        let b = b.as_f32x16();
7945        let r = vaddps(a, b, ROUNDING);
7946        transmute(r)
7947    }
7948}
7949
7950/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7951///
7952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7958///
7959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7960#[inline]
7961#[target_feature(enable = "avx512f")]
7962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7963#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7964#[rustc_legacy_const_generics(4)]
7965pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7966    src: __m512,
7967    k: __mmask16,
7968    a: __m512,
7969    b: __m512,
7970) -> __m512 {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f32x16();
7974        let b = b.as_f32x16();
7975        let r = vaddps(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7977    }
7978}
7979
7980/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(3)]
7995pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7996    k: __mmask16,
7997    a: __m512,
7998    b: __m512,
7999) -> __m512 {
8000    unsafe {
8001        static_assert_rounding!(ROUNDING);
8002        let a = a.as_f32x16();
8003        let b = b.as_f32x16();
8004        let r = vaddps(a, b, ROUNDING);
8005        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8006    }
8007}
8008
8009/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8010///
8011/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8012/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8013/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8014/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8015/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8016/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8017///
8018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
8019#[inline]
8020#[target_feature(enable = "avx512f")]
8021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8022#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8023#[rustc_legacy_const_generics(2)]
8024pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8025    unsafe {
8026        static_assert_rounding!(ROUNDING);
8027        let a = a.as_f64x8();
8028        let b = b.as_f64x8();
8029        let r = vaddpd(a, b, ROUNDING);
8030        transmute(r)
8031    }
8032}
8033
8034/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8035///
8036/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8037/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8038/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8039/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8040/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8041/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8042///
8043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
8044#[inline]
8045#[target_feature(enable = "avx512f")]
8046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8047#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8048#[rustc_legacy_const_generics(4)]
8049pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
8050    src: __m512d,
8051    k: __mmask8,
8052    a: __m512d,
8053    b: __m512d,
8054) -> __m512d {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f64x8();
8058        let b = b.as_f64x8();
8059        let r = vaddpd(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8061    }
8062}
8063
8064/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8077#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(3)]
8079pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
8080    k: __mmask8,
8081    a: __m512d,
8082    b: __m512d,
8083) -> __m512d {
8084    unsafe {
8085        static_assert_rounding!(ROUNDING);
8086        let a = a.as_f64x8();
8087        let b = b.as_f64x8();
8088        let r = vaddpd(a, b, ROUNDING);
8089        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8090    }
8091}
8092
8093/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8094///
8095/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8096/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8097/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8098/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8099/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8100/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8101///
8102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
8103#[inline]
8104#[target_feature(enable = "avx512f")]
8105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8106#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8107#[rustc_legacy_const_generics(2)]
8108pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8109    unsafe {
8110        static_assert_rounding!(ROUNDING);
8111        let a = a.as_f32x16();
8112        let b = b.as_f32x16();
8113        let r = vsubps(a, b, ROUNDING);
8114        transmute(r)
8115    }
8116}
8117
8118/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8119///
8120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8126///
8127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
8128#[inline]
8129#[target_feature(enable = "avx512f")]
8130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8131#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8132#[rustc_legacy_const_generics(4)]
8133pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
8134    src: __m512,
8135    k: __mmask16,
8136    a: __m512,
8137    b: __m512,
8138) -> __m512 {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f32x16();
8142        let b = b.as_f32x16();
8143        let r = vsubps(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8145    }
8146}
8147
8148/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8161#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(3)]
8163pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
8164    k: __mmask16,
8165    a: __m512,
8166    b: __m512,
8167) -> __m512 {
8168    unsafe {
8169        static_assert_rounding!(ROUNDING);
8170        let a = a.as_f32x16();
8171        let b = b.as_f32x16();
8172        let r = vsubps(a, b, ROUNDING);
8173        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8174    }
8175}
8176
8177/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8178///
8179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8185///
8186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
8187#[inline]
8188#[target_feature(enable = "avx512f")]
8189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8190#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8191#[rustc_legacy_const_generics(2)]
8192pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8193    unsafe {
8194        static_assert_rounding!(ROUNDING);
8195        let a = a.as_f64x8();
8196        let b = b.as_f64x8();
8197        let r = vsubpd(a, b, ROUNDING);
8198        transmute(r)
8199    }
8200}
8201
8202/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8203///
8204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8210///
8211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
8212#[inline]
8213#[target_feature(enable = "avx512f")]
8214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8215#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8216#[rustc_legacy_const_generics(4)]
8217pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
8218    src: __m512d,
8219    k: __mmask8,
8220    a: __m512d,
8221    b: __m512d,
8222) -> __m512d {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f64x8();
8226        let b = b.as_f64x8();
8227        let r = vsubpd(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8229    }
8230}
8231
8232/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8245#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(3)]
8247pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
8248    k: __mmask8,
8249    a: __m512d,
8250    b: __m512d,
8251) -> __m512d {
8252    unsafe {
8253        static_assert_rounding!(ROUNDING);
8254        let a = a.as_f64x8();
8255        let b = b.as_f64x8();
8256        let r = vsubpd(a, b, ROUNDING);
8257        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8258    }
8259}
8260
8261/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
8262///
8263/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8264/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8265/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8266/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8267/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8268/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8269///
8270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
8271#[inline]
8272#[target_feature(enable = "avx512f")]
8273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8274#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8275#[rustc_legacy_const_generics(2)]
8276pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8277    unsafe {
8278        static_assert_rounding!(ROUNDING);
8279        let a = a.as_f32x16();
8280        let b = b.as_f32x16();
8281        let r = vmulps(a, b, ROUNDING);
8282        transmute(r)
8283    }
8284}
8285
8286/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8287///
8288/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8289/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8290/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8291/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8292/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8293/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8294///
8295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8296#[inline]
8297#[target_feature(enable = "avx512f")]
8298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8299#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8300#[rustc_legacy_const_generics(4)]
8301pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8302    src: __m512,
8303    k: __mmask16,
8304    a: __m512,
8305    b: __m512,
8306) -> __m512 {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f32x16();
8310        let b = b.as_f32x16();
8311        let r = vmulps(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8313    }
8314}
8315
8316/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8329#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(3)]
8331pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8332    k: __mmask16,
8333    a: __m512,
8334    b: __m512,
8335) -> __m512 {
8336    unsafe {
8337        static_assert_rounding!(ROUNDING);
8338        let a = a.as_f32x16();
8339        let b = b.as_f32x16();
8340        let r = vmulps(a, b, ROUNDING);
8341        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8342    }
8343}
8344
8345/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8346///
8347/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8348/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8349/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8350/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8351/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8352/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8353///
8354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8355#[inline]
8356#[target_feature(enable = "avx512f")]
8357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8358#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8359#[rustc_legacy_const_generics(2)]
8360pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8361    unsafe {
8362        static_assert_rounding!(ROUNDING);
8363        let a = a.as_f64x8();
8364        let b = b.as_f64x8();
8365        let r = vmulpd(a, b, ROUNDING);
8366        transmute(r)
8367    }
8368}
8369
8370/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8371///
8372/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8373/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8374/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8375/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8376/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8377/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8378///
8379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8380#[inline]
8381#[target_feature(enable = "avx512f")]
8382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8383#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8384#[rustc_legacy_const_generics(4)]
8385pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8386    src: __m512d,
8387    k: __mmask8,
8388    a: __m512d,
8389    b: __m512d,
8390) -> __m512d {
8391    unsafe {
8392        static_assert_rounding!(ROUNDING);
8393        let a = a.as_f64x8();
8394        let b = b.as_f64x8();
8395        let r = vmulpd(a, b, ROUNDING);
8396        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8397    }
8398}
8399
8400/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8401///
8402/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8403/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8404/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8405/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8406/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8408///
8409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8410#[inline]
8411#[target_feature(enable = "avx512f")]
8412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8413#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8414#[rustc_legacy_const_generics(3)]
8415pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8416    k: __mmask8,
8417    a: __m512d,
8418    b: __m512d,
8419) -> __m512d {
8420    unsafe {
8421        static_assert_rounding!(ROUNDING);
8422        let a = a.as_f64x8();
8423        let b = b.as_f64x8();
8424        let r = vmulpd(a, b, ROUNDING);
8425        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8426    }
8427}
8428
8429/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8430///
8431/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8432/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8433/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8434/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8435/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8436/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8437///
8438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8439#[inline]
8440#[target_feature(enable = "avx512f")]
8441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8442#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8443#[rustc_legacy_const_generics(2)]
8444pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8445    unsafe {
8446        static_assert_rounding!(ROUNDING);
8447        let a = a.as_f32x16();
8448        let b = b.as_f32x16();
8449        let r = vdivps(a, b, ROUNDING);
8450        transmute(r)
8451    }
8452}
8453
8454/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8455///
8456/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8457/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8458/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8459/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8460/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8461/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8462///
8463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8464#[inline]
8465#[target_feature(enable = "avx512f")]
8466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8467#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8468#[rustc_legacy_const_generics(4)]
8469pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8470    src: __m512,
8471    k: __mmask16,
8472    a: __m512,
8473    b: __m512,
8474) -> __m512 {
8475    unsafe {
8476        static_assert_rounding!(ROUNDING);
8477        let a = a.as_f32x16();
8478        let b = b.as_f32x16();
8479        let r = vdivps(a, b, ROUNDING);
8480        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8481    }
8482}
8483
8484/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8485///
8486/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8487/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8488/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8489/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8490/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8491/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8492///
8493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8494#[inline]
8495#[target_feature(enable = "avx512f")]
8496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8497#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8498#[rustc_legacy_const_generics(3)]
8499pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8500    k: __mmask16,
8501    a: __m512,
8502    b: __m512,
8503) -> __m512 {
8504    unsafe {
8505        static_assert_rounding!(ROUNDING);
8506        let a = a.as_f32x16();
8507        let b = b.as_f32x16();
8508        let r = vdivps(a, b, ROUNDING);
8509        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8510    }
8511}
8512
8513/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8514///
8515/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8516/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8517/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8518/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8519/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8520/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8521///
8522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8523#[inline]
8524#[target_feature(enable = "avx512f")]
8525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8526#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8527#[rustc_legacy_const_generics(2)]
8528pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8529    unsafe {
8530        static_assert_rounding!(ROUNDING);
8531        let a = a.as_f64x8();
8532        let b = b.as_f64x8();
8533        let r = vdivpd(a, b, ROUNDING);
8534        transmute(r)
8535    }
8536}
8537
8538/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8539///
8540/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8546///
8547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8548#[inline]
8549#[target_feature(enable = "avx512f")]
8550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8551#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8552#[rustc_legacy_const_generics(4)]
8553pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8554    src: __m512d,
8555    k: __mmask8,
8556    a: __m512d,
8557    b: __m512d,
8558) -> __m512d {
8559    unsafe {
8560        static_assert_rounding!(ROUNDING);
8561        let a = a.as_f64x8();
8562        let b = b.as_f64x8();
8563        let r = vdivpd(a, b, ROUNDING);
8564        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8565    }
8566}
8567
8568/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8569///
8570/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8571/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8572/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8573/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8574/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8575/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8576///
8577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8578#[inline]
8579#[target_feature(enable = "avx512f")]
8580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8581#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8582#[rustc_legacy_const_generics(3)]
8583pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8584    k: __mmask8,
8585    a: __m512d,
8586    b: __m512d,
8587) -> __m512d {
8588    unsafe {
8589        static_assert_rounding!(ROUNDING);
8590        let a = a.as_f64x8();
8591        let b = b.as_f64x8();
8592        let r = vdivpd(a, b, ROUNDING);
8593        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8594    }
8595}
8596
8597/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8598///
8599/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8600/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8601/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8602/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8603/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8604/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8605///
8606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8607#[inline]
8608#[target_feature(enable = "avx512f")]
8609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8610#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8611#[rustc_legacy_const_generics(1)]
8612pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8613    unsafe {
8614        static_assert_rounding!(ROUNDING);
8615        let a = a.as_f32x16();
8616        let r = vsqrtps(a, ROUNDING);
8617        transmute(r)
8618    }
8619}
8620
8621/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8622///
8623/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8624/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8625/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8626/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8627/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8628/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8629///
8630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8631#[inline]
8632#[target_feature(enable = "avx512f")]
8633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8634#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8635#[rustc_legacy_const_generics(3)]
8636pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8637    src: __m512,
8638    k: __mmask16,
8639    a: __m512,
8640) -> __m512 {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        let a = a.as_f32x16();
8644        let r = vsqrtps(a, ROUNDING);
8645        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8646    }
8647}
8648
8649/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8650///
8651/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8652/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8653/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8654/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8655/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8656/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8657///
8658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8659#[inline]
8660#[target_feature(enable = "avx512f")]
8661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8662#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8663#[rustc_legacy_const_generics(2)]
8664pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8665    unsafe {
8666        static_assert_rounding!(ROUNDING);
8667        let a = a.as_f32x16();
8668        let r = vsqrtps(a, ROUNDING);
8669        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8670    }
8671}
8672
8673/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8674///
8675/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8676/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8677/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8678/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8679/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8680/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8681///
8682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8683#[inline]
8684#[target_feature(enable = "avx512f")]
8685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8686#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8687#[rustc_legacy_const_generics(1)]
8688pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8689    unsafe {
8690        static_assert_rounding!(ROUNDING);
8691        let a = a.as_f64x8();
8692        let r = vsqrtpd(a, ROUNDING);
8693        transmute(r)
8694    }
8695}
8696
8697/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8698///
8699/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8700/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8701/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8702/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8703/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8704/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8705///
8706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8707#[inline]
8708#[target_feature(enable = "avx512f")]
8709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8710#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8711#[rustc_legacy_const_generics(3)]
8712pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8713    src: __m512d,
8714    k: __mmask8,
8715    a: __m512d,
8716) -> __m512d {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let a = a.as_f64x8();
8720        let r = vsqrtpd(a, ROUNDING);
8721        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8722    }
8723}
8724
8725/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8726///
8727/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8728/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8729/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8730/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8731/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8732/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8733///
8734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8735#[inline]
8736#[target_feature(enable = "avx512f")]
8737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8738#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8739#[rustc_legacy_const_generics(2)]
8740pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8741    unsafe {
8742        static_assert_rounding!(ROUNDING);
8743        let a = a.as_f64x8();
8744        let r = vsqrtpd(a, ROUNDING);
8745        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8746    }
8747}
8748
8749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8750///
8751/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8757///
8758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8759#[inline]
8760#[target_feature(enable = "avx512f")]
8761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8762#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8763#[rustc_legacy_const_generics(3)]
8764pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8765    unsafe {
8766        static_assert_rounding!(ROUNDING);
8767        vfmadd132psround(a, b, c, ROUNDING)
8768    }
8769}
8770
8771/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8772///
8773/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8774/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8775/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8776/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8777/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8778/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8779///
8780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8781#[inline]
8782#[target_feature(enable = "avx512f")]
8783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8784#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8785#[rustc_legacy_const_generics(4)]
8786pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8787    a: __m512,
8788    k: __mmask16,
8789    b: __m512,
8790    c: __m512,
8791) -> __m512 {
8792    unsafe {
8793        static_assert_rounding!(ROUNDING);
8794        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8795    }
8796}
8797
8798/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8799///
8800/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8801/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8802/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8803/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8804/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8805/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8806///
8807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8808#[inline]
8809#[target_feature(enable = "avx512f")]
8810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8811#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8812#[rustc_legacy_const_generics(4)]
8813pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8814    k: __mmask16,
8815    a: __m512,
8816    b: __m512,
8817    c: __m512,
8818) -> __m512 {
8819    unsafe {
8820        static_assert_rounding!(ROUNDING);
8821        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8822    }
8823}
8824
8825/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8826///
8827/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8828/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8829/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8830/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8831/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8833///
8834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8835#[inline]
8836#[target_feature(enable = "avx512f")]
8837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8838#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8839#[rustc_legacy_const_generics(4)]
8840pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8841    a: __m512,
8842    b: __m512,
8843    c: __m512,
8844    k: __mmask16,
8845) -> __m512 {
8846    unsafe {
8847        static_assert_rounding!(ROUNDING);
8848        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8849    }
8850}
8851
8852/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8853///
8854/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8855/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8856/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8857/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8858/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8859/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8860///
8861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8862#[inline]
8863#[target_feature(enable = "avx512f")]
8864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8865#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8866#[rustc_legacy_const_generics(3)]
8867pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8868    unsafe {
8869        static_assert_rounding!(ROUNDING);
8870        vfmadd132pdround(a, b, c, ROUNDING)
8871    }
8872}
8873
8874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8875///
8876/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8877/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8878/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8879/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8880/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8881/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8882///
8883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8884#[inline]
8885#[target_feature(enable = "avx512f")]
8886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8887#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8888#[rustc_legacy_const_generics(4)]
8889pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8890    a: __m512d,
8891    k: __mmask8,
8892    b: __m512d,
8893    c: __m512d,
8894) -> __m512d {
8895    unsafe {
8896        static_assert_rounding!(ROUNDING);
8897        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8898    }
8899}
8900
8901/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8902///
8903/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8904/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8905/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8906/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8907/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8908/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8909///
8910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8911#[inline]
8912#[target_feature(enable = "avx512f")]
8913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8914#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8915#[rustc_legacy_const_generics(4)]
8916pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8917    k: __mmask8,
8918    a: __m512d,
8919    b: __m512d,
8920    c: __m512d,
8921) -> __m512d {
8922    unsafe {
8923        static_assert_rounding!(ROUNDING);
8924        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8925    }
8926}
8927
8928/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8929///
8930/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8931/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8932/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8933/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8934/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8935/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8936///
8937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8938#[inline]
8939#[target_feature(enable = "avx512f")]
8940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8941#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8942#[rustc_legacy_const_generics(4)]
8943pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8944    a: __m512d,
8945    b: __m512d,
8946    c: __m512d,
8947    k: __mmask8,
8948) -> __m512d {
8949    unsafe {
8950        static_assert_rounding!(ROUNDING);
8951        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8952    }
8953}
8954
8955/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8956///
8957/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8958/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8959/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8960/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8961/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8962/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8963///
8964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8965#[inline]
8966#[target_feature(enable = "avx512f")]
8967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8968#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8969#[rustc_legacy_const_generics(3)]
8970pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8971    unsafe {
8972        static_assert_rounding!(ROUNDING);
8973        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8974    }
8975}
8976
8977/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8978///
8979/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8980/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8981/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8982/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8983/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8984/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8985///
8986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8987#[inline]
8988#[target_feature(enable = "avx512f")]
8989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8990#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8991#[rustc_legacy_const_generics(4)]
8992pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8993    a: __m512,
8994    k: __mmask16,
8995    b: __m512,
8996    c: __m512,
8997) -> __m512 {
8998    unsafe {
8999        static_assert_rounding!(ROUNDING);
9000        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9001        simd_select_bitmask(k, r, a)
9002    }
9003}
9004
9005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9006///
9007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9013///
9014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
9015#[inline]
9016#[target_feature(enable = "avx512f")]
9017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9018#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9019#[rustc_legacy_const_generics(4)]
9020pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
9021    k: __mmask16,
9022    a: __m512,
9023    b: __m512,
9024    c: __m512,
9025) -> __m512 {
9026    unsafe {
9027        static_assert_rounding!(ROUNDING);
9028        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9029        simd_select_bitmask(k, r, _mm512_setzero_ps())
9030    }
9031}
9032
9033/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9034///
9035/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9036/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9037/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9038/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9039/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9040/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9041///
9042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
9043#[inline]
9044#[target_feature(enable = "avx512f")]
9045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9046#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9047#[rustc_legacy_const_generics(4)]
9048pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
9049    a: __m512,
9050    b: __m512,
9051    c: __m512,
9052    k: __mmask16,
9053) -> __m512 {
9054    unsafe {
9055        static_assert_rounding!(ROUNDING);
9056        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9057        simd_select_bitmask(k, r, c)
9058    }
9059}
9060
9061/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
9062///
9063/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9064/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9065/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9066/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9067/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9068/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9069///
9070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
9071#[inline]
9072#[target_feature(enable = "avx512f")]
9073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9074#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9075#[rustc_legacy_const_generics(3)]
9076pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9077    unsafe {
9078        static_assert_rounding!(ROUNDING);
9079        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
9080    }
9081}
9082
9083/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9084///
9085/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9086/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9087/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9088/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9089/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9091///
9092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
9093#[inline]
9094#[target_feature(enable = "avx512f")]
9095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9096#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9097#[rustc_legacy_const_generics(4)]
9098pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
9099    a: __m512d,
9100    k: __mmask8,
9101    b: __m512d,
9102    c: __m512d,
9103) -> __m512d {
9104    unsafe {
9105        static_assert_rounding!(ROUNDING);
9106        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9107        simd_select_bitmask(k, r, a)
9108    }
9109}
9110
9111/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9112///
9113/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9114/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9115/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9116/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9117/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9118/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9119///
9120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
9121#[inline]
9122#[target_feature(enable = "avx512f")]
9123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9124#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9125#[rustc_legacy_const_generics(4)]
9126pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
9127    k: __mmask8,
9128    a: __m512d,
9129    b: __m512d,
9130    c: __m512d,
9131) -> __m512d {
9132    unsafe {
9133        static_assert_rounding!(ROUNDING);
9134        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9135        simd_select_bitmask(k, r, _mm512_setzero_pd())
9136    }
9137}
9138
9139/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9140///
9141/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9142/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9143/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9144/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9145/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9146/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9147///
9148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
9149#[inline]
9150#[target_feature(enable = "avx512f")]
9151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9152#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9153#[rustc_legacy_const_generics(4)]
9154pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
9155    a: __m512d,
9156    b: __m512d,
9157    c: __m512d,
9158    k: __mmask8,
9159) -> __m512d {
9160    unsafe {
9161        static_assert_rounding!(ROUNDING);
9162        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9163        simd_select_bitmask(k, r, c)
9164    }
9165}
9166
9167/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9168///
9169/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9170/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9171/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9172/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9173/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9174/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9175///
9176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
9177#[inline]
9178#[target_feature(enable = "avx512f")]
9179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9180#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9181#[rustc_legacy_const_generics(3)]
9182pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9183    unsafe {
9184        static_assert_rounding!(ROUNDING);
9185        vfmaddsubpsround(a, b, c, ROUNDING)
9186    }
9187}
9188
9189/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9190///
9191/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9192/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9193/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9194/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9195/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9196/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9197///
9198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
9199#[inline]
9200#[target_feature(enable = "avx512f")]
9201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9202#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9203#[rustc_legacy_const_generics(4)]
9204pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
9205    a: __m512,
9206    k: __mmask16,
9207    b: __m512,
9208    c: __m512,
9209) -> __m512 {
9210    unsafe {
9211        static_assert_rounding!(ROUNDING);
9212        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
9213    }
9214}
9215
9216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9217///
9218/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9224///
9225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
9226#[inline]
9227#[target_feature(enable = "avx512f")]
9228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9229#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9230#[rustc_legacy_const_generics(4)]
9231pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
9232    k: __mmask16,
9233    a: __m512,
9234    b: __m512,
9235    c: __m512,
9236) -> __m512 {
9237    unsafe {
9238        static_assert_rounding!(ROUNDING);
9239        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
9240    }
9241}
9242
9243/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9244///
9245/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9246/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9247/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9248/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9249/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9250/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9251///
9252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
9253#[inline]
9254#[target_feature(enable = "avx512f")]
9255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9256#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9257#[rustc_legacy_const_generics(4)]
9258pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
9259    a: __m512,
9260    b: __m512,
9261    c: __m512,
9262    k: __mmask16,
9263) -> __m512 {
9264    unsafe {
9265        static_assert_rounding!(ROUNDING);
9266        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
9267    }
9268}
9269
9270/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9271///
9272/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9278///
9279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
9280#[inline]
9281#[target_feature(enable = "avx512f")]
9282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9283#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9284#[rustc_legacy_const_generics(3)]
9285pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9286    a: __m512d,
9287    b: __m512d,
9288    c: __m512d,
9289) -> __m512d {
9290    unsafe {
9291        static_assert_rounding!(ROUNDING);
9292        vfmaddsubpdround(a, b, c, ROUNDING)
9293    }
9294}
9295
9296/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9297///
9298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9304///
9305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9306#[inline]
9307#[target_feature(enable = "avx512f")]
9308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9309#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9310#[rustc_legacy_const_generics(4)]
9311pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9312    a: __m512d,
9313    k: __mmask8,
9314    b: __m512d,
9315    c: __m512d,
9316) -> __m512d {
9317    unsafe {
9318        static_assert_rounding!(ROUNDING);
9319        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9320    }
9321}
9322
9323/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9324///
9325/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9326/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9327/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9328/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9329/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9330/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9331///
9332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9333#[inline]
9334#[target_feature(enable = "avx512f")]
9335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9336#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9337#[rustc_legacy_const_generics(4)]
9338pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9339    k: __mmask8,
9340    a: __m512d,
9341    b: __m512d,
9342    c: __m512d,
9343) -> __m512d {
9344    unsafe {
9345        static_assert_rounding!(ROUNDING);
9346        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9347    }
9348}
9349
9350/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9351///
9352/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9353/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9354/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9355/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9356/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9358///
9359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9360#[inline]
9361#[target_feature(enable = "avx512f")]
9362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9363#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9364#[rustc_legacy_const_generics(4)]
9365pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9366    a: __m512d,
9367    b: __m512d,
9368    c: __m512d,
9369    k: __mmask8,
9370) -> __m512d {
9371    unsafe {
9372        static_assert_rounding!(ROUNDING);
9373        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9374    }
9375}
9376
9377/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9378///
9379/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9380/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9381/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9382/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9383/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9384/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9385///
9386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9387#[inline]
9388#[target_feature(enable = "avx512f")]
9389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9390#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9391#[rustc_legacy_const_generics(3)]
9392pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9393    unsafe {
9394        static_assert_rounding!(ROUNDING);
9395        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9396    }
9397}
9398
9399/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9400///
9401/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9402/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9403/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9404/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9405/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9406/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9407///
9408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9409#[inline]
9410#[target_feature(enable = "avx512f")]
9411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9412#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9413#[rustc_legacy_const_generics(4)]
9414pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9415    a: __m512,
9416    k: __mmask16,
9417    b: __m512,
9418    c: __m512,
9419) -> __m512 {
9420    unsafe {
9421        static_assert_rounding!(ROUNDING);
9422        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9423        simd_select_bitmask(k, r, a)
9424    }
9425}
9426
9427/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9428///
9429/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9430/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9431/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9432/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9433/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9434/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9435///
9436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9437#[inline]
9438#[target_feature(enable = "avx512f")]
9439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9440#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9441#[rustc_legacy_const_generics(4)]
9442pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9443    k: __mmask16,
9444    a: __m512,
9445    b: __m512,
9446    c: __m512,
9447) -> __m512 {
9448    unsafe {
9449        static_assert_rounding!(ROUNDING);
9450        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9451        simd_select_bitmask(k, r, _mm512_setzero_ps())
9452    }
9453}
9454
9455/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9456///
9457/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9458/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9459/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9460/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9461/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9462/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9463///
9464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9465#[inline]
9466#[target_feature(enable = "avx512f")]
9467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9468#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9469#[rustc_legacy_const_generics(4)]
9470pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9471    a: __m512,
9472    b: __m512,
9473    c: __m512,
9474    k: __mmask16,
9475) -> __m512 {
9476    unsafe {
9477        static_assert_rounding!(ROUNDING);
9478        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9479        simd_select_bitmask(k, r, c)
9480    }
9481}
9482
9483/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9484///
9485/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9486/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9487/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9488/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9489/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9490/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9491///
9492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9493#[inline]
9494#[target_feature(enable = "avx512f")]
9495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9496#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9497#[rustc_legacy_const_generics(3)]
9498pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9499    a: __m512d,
9500    b: __m512d,
9501    c: __m512d,
9502) -> __m512d {
9503    unsafe {
9504        static_assert_rounding!(ROUNDING);
9505        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9506    }
9507}
9508
9509/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9510///
9511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9517///
9518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9519#[inline]
9520#[target_feature(enable = "avx512f")]
9521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9522#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9523#[rustc_legacy_const_generics(4)]
9524pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9525    a: __m512d,
9526    k: __mmask8,
9527    b: __m512d,
9528    c: __m512d,
9529) -> __m512d {
9530    unsafe {
9531        static_assert_rounding!(ROUNDING);
9532        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9533        simd_select_bitmask(k, r, a)
9534    }
9535}
9536
9537/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9538///
9539/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9545///
9546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9547#[inline]
9548#[target_feature(enable = "avx512f")]
9549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9550#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9551#[rustc_legacy_const_generics(4)]
9552pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9553    k: __mmask8,
9554    a: __m512d,
9555    b: __m512d,
9556    c: __m512d,
9557) -> __m512d {
9558    unsafe {
9559        static_assert_rounding!(ROUNDING);
9560        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9561        simd_select_bitmask(k, r, _mm512_setzero_pd())
9562    }
9563}
9564
9565/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9566///
9567/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9568/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9569/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9570/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9571/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9572/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9573///
9574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9575#[inline]
9576#[target_feature(enable = "avx512f")]
9577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9579#[rustc_legacy_const_generics(4)]
9580pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9581    a: __m512d,
9582    b: __m512d,
9583    c: __m512d,
9584    k: __mmask8,
9585) -> __m512d {
9586    unsafe {
9587        static_assert_rounding!(ROUNDING);
9588        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9589        simd_select_bitmask(k, r, c)
9590    }
9591}
9592
9593/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9594///
9595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9601///
9602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9603#[inline]
9604#[target_feature(enable = "avx512f")]
9605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9606#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9607#[rustc_legacy_const_generics(3)]
9608pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9609    unsafe {
9610        static_assert_rounding!(ROUNDING);
9611        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9612    }
9613}
9614
9615/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9616///
9617/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9618/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9619/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9620/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9621/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9622/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9623///
9624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9625#[inline]
9626#[target_feature(enable = "avx512f")]
9627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9628#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9629#[rustc_legacy_const_generics(4)]
9630pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9631    a: __m512,
9632    k: __mmask16,
9633    b: __m512,
9634    c: __m512,
9635) -> __m512 {
9636    unsafe {
9637        static_assert_rounding!(ROUNDING);
9638        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9639        simd_select_bitmask(k, r, a)
9640    }
9641}
9642
9643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9644///
9645/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9646/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9647/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9648/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9649/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9650/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9651///
9652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9653#[inline]
9654#[target_feature(enable = "avx512f")]
9655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9656#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9657#[rustc_legacy_const_generics(4)]
9658pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9659    k: __mmask16,
9660    a: __m512,
9661    b: __m512,
9662    c: __m512,
9663) -> __m512 {
9664    unsafe {
9665        static_assert_rounding!(ROUNDING);
9666        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9667        simd_select_bitmask(k, r, _mm512_setzero_ps())
9668    }
9669}
9670
9671/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9672///
9673/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9674/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9675/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9676/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9677/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9678/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9679///
9680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9681#[inline]
9682#[target_feature(enable = "avx512f")]
9683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9684#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9685#[rustc_legacy_const_generics(4)]
9686pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9687    a: __m512,
9688    b: __m512,
9689    c: __m512,
9690    k: __mmask16,
9691) -> __m512 {
9692    unsafe {
9693        static_assert_rounding!(ROUNDING);
9694        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9695        simd_select_bitmask(k, r, c)
9696    }
9697}
9698
9699/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9700///
9701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9707///
9708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9709#[inline]
9710#[target_feature(enable = "avx512f")]
9711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9712#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9713#[rustc_legacy_const_generics(3)]
9714pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9715    unsafe {
9716        static_assert_rounding!(ROUNDING);
9717        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9718    }
9719}
9720
9721/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9722///
9723/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9724/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9725/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9726/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9727/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9728/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9729///
9730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9731#[inline]
9732#[target_feature(enable = "avx512f")]
9733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9734#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9735#[rustc_legacy_const_generics(4)]
9736pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9737    a: __m512d,
9738    k: __mmask8,
9739    b: __m512d,
9740    c: __m512d,
9741) -> __m512d {
9742    unsafe {
9743        static_assert_rounding!(ROUNDING);
9744        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9745        simd_select_bitmask(k, r, a)
9746    }
9747}
9748
9749/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9750///
9751/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9762#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9765    k: __mmask8,
9766    a: __m512d,
9767    b: __m512d,
9768    c: __m512d,
9769) -> __m512d {
9770    unsafe {
9771        static_assert_rounding!(ROUNDING);
9772        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9773        simd_select_bitmask(k, r, _mm512_setzero_pd())
9774    }
9775}
9776
9777/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9778///
9779/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9780/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9781/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9782/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9783/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9784/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9785///
9786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9787#[inline]
9788#[target_feature(enable = "avx512f")]
9789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9790#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9791#[rustc_legacy_const_generics(4)]
9792pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9793    a: __m512d,
9794    b: __m512d,
9795    c: __m512d,
9796    k: __mmask8,
9797) -> __m512d {
9798    unsafe {
9799        static_assert_rounding!(ROUNDING);
9800        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9801        simd_select_bitmask(k, r, c)
9802    }
9803}
9804
9805/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9806///
9807/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9808/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9809/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9810/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9811/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9812/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9813///
9814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9815#[inline]
9816#[target_feature(enable = "avx512f")]
9817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9818#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9819#[rustc_legacy_const_generics(3)]
9820pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9821    unsafe {
9822        static_assert_rounding!(ROUNDING);
9823        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9824    }
9825}
9826
9827/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9828///
9829/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9830/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9831/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9832/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9833/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9834/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9835///
9836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9837#[inline]
9838#[target_feature(enable = "avx512f")]
9839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9840#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9841#[rustc_legacy_const_generics(4)]
9842pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9843    a: __m512,
9844    k: __mmask16,
9845    b: __m512,
9846    c: __m512,
9847) -> __m512 {
9848    unsafe {
9849        static_assert_rounding!(ROUNDING);
9850        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9851        simd_select_bitmask(k, r, a)
9852    }
9853}
9854
9855/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9856///
9857/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9858/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9859/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9860/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9861/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9862/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9863///
9864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9865#[inline]
9866#[target_feature(enable = "avx512f")]
9867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9868#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9869#[rustc_legacy_const_generics(4)]
9870pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9871    k: __mmask16,
9872    a: __m512,
9873    b: __m512,
9874    c: __m512,
9875) -> __m512 {
9876    unsafe {
9877        static_assert_rounding!(ROUNDING);
9878        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9879        simd_select_bitmask(k, r, _mm512_setzero_ps())
9880    }
9881}
9882
9883/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9884///
9885/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9886/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9887/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9888/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9889/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9890/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9891///
9892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9893#[inline]
9894#[target_feature(enable = "avx512f")]
9895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9896#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9897#[rustc_legacy_const_generics(4)]
9898pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9899    a: __m512,
9900    b: __m512,
9901    c: __m512,
9902    k: __mmask16,
9903) -> __m512 {
9904    unsafe {
9905        static_assert_rounding!(ROUNDING);
9906        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9907        simd_select_bitmask(k, r, c)
9908    }
9909}
9910
9911/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9912///
9913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9919///
9920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9921#[inline]
9922#[target_feature(enable = "avx512f")]
9923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9924#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9925#[rustc_legacy_const_generics(3)]
9926pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9927    unsafe {
9928        static_assert_rounding!(ROUNDING);
9929        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9930    }
9931}
9932
9933/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9934///
9935/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9936/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9937/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9938/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9939/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9940/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9941///
9942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9943#[inline]
9944#[target_feature(enable = "avx512f")]
9945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9946#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9947#[rustc_legacy_const_generics(4)]
9948pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9949    a: __m512d,
9950    k: __mmask8,
9951    b: __m512d,
9952    c: __m512d,
9953) -> __m512d {
9954    unsafe {
9955        static_assert_rounding!(ROUNDING);
9956        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9957        simd_select_bitmask(k, r, a)
9958    }
9959}
9960
9961/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9962///
9963/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9964/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9965/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9966/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9967/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9968/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9969///
9970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9971#[inline]
9972#[target_feature(enable = "avx512f")]
9973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9974#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9975#[rustc_legacy_const_generics(4)]
9976pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9977    k: __mmask8,
9978    a: __m512d,
9979    b: __m512d,
9980    c: __m512d,
9981) -> __m512d {
9982    unsafe {
9983        static_assert_rounding!(ROUNDING);
9984        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9985        simd_select_bitmask(k, r, _mm512_setzero_pd())
9986    }
9987}
9988
9989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9990///
9991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9997///
9998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9999#[inline]
10000#[target_feature(enable = "avx512f")]
10001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10002#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
10003#[rustc_legacy_const_generics(4)]
10004pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
10005    a: __m512d,
10006    b: __m512d,
10007    c: __m512d,
10008    k: __mmask8,
10009) -> __m512d {
10010    unsafe {
10011        static_assert_rounding!(ROUNDING);
10012        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
10013        simd_select_bitmask(k, r, c)
10014    }
10015}
10016
10017/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10018/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10019///
10020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
10021#[inline]
10022#[target_feature(enable = "avx512f")]
10023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10024#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10025#[rustc_legacy_const_generics(2)]
10026pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10027    unsafe {
10028        static_assert_sae!(SAE);
10029        let a = a.as_f32x16();
10030        let b = b.as_f32x16();
10031        let r = vmaxps(a, b, SAE);
10032        transmute(r)
10033    }
10034}
10035
10036/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10037/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10038///
10039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
10040#[inline]
10041#[target_feature(enable = "avx512f")]
10042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10043#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10044#[rustc_legacy_const_generics(4)]
10045pub fn _mm512_mask_max_round_ps<const SAE: i32>(
10046    src: __m512,
10047    k: __mmask16,
10048    a: __m512,
10049    b: __m512,
10050) -> __m512 {
10051    unsafe {
10052        static_assert_sae!(SAE);
10053        let a = a.as_f32x16();
10054        let b = b.as_f32x16();
10055        let r = vmaxps(a, b, SAE);
10056        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
10057    }
10058}
10059
10060/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10062///
10063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
10064#[inline]
10065#[target_feature(enable = "avx512f")]
10066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10067#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10068#[rustc_legacy_const_generics(3)]
10069pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10070    unsafe {
10071        static_assert_sae!(SAE);
10072        let a = a.as_f32x16();
10073        let b = b.as_f32x16();
10074        let r = vmaxps(a, b, SAE);
10075        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
10076    }
10077}
10078
10079/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10080/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10081///
10082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
10083#[inline]
10084#[target_feature(enable = "avx512f")]
10085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10087#[rustc_legacy_const_generics(2)]
10088pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10089    unsafe {
10090        static_assert_sae!(SAE);
10091        let a = a.as_f64x8();
10092        let b = b.as_f64x8();
10093        let r = vmaxpd(a, b, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10099/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10100///
10101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
10102#[inline]
10103#[target_feature(enable = "avx512f")]
10104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10105#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10106#[rustc_legacy_const_generics(4)]
10107pub fn _mm512_mask_max_round_pd<const SAE: i32>(
10108    src: __m512d,
10109    k: __mmask8,
10110    a: __m512d,
10111    b: __m512d,
10112) -> __m512d {
10113    unsafe {
10114        static_assert_sae!(SAE);
10115        let a = a.as_f64x8();
10116        let b = b.as_f64x8();
10117        let r = vmaxpd(a, b, SAE);
10118        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
10119    }
10120}
10121
10122/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10123/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10124///
10125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
10126#[inline]
10127#[target_feature(enable = "avx512f")]
10128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10129#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10130#[rustc_legacy_const_generics(3)]
10131pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10132    unsafe {
10133        static_assert_sae!(SAE);
10134        let a = a.as_f64x8();
10135        let b = b.as_f64x8();
10136        let r = vmaxpd(a, b, SAE);
10137        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
10138    }
10139}
10140
10141/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10142/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10143///
10144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
10145#[inline]
10146#[target_feature(enable = "avx512f")]
10147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10148#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10149#[rustc_legacy_const_generics(2)]
10150pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10151    unsafe {
10152        static_assert_sae!(SAE);
10153        let a = a.as_f32x16();
10154        let b = b.as_f32x16();
10155        let r = vminps(a, b, SAE);
10156        transmute(r)
10157    }
10158}
10159
10160/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162///
10163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
10164#[inline]
10165#[target_feature(enable = "avx512f")]
10166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10167#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10168#[rustc_legacy_const_generics(4)]
10169pub fn _mm512_mask_min_round_ps<const SAE: i32>(
10170    src: __m512,
10171    k: __mmask16,
10172    a: __m512,
10173    b: __m512,
10174) -> __m512 {
10175    unsafe {
10176        static_assert_sae!(SAE);
10177        let a = a.as_f32x16();
10178        let b = b.as_f32x16();
10179        let r = vminps(a, b, SAE);
10180        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
10181    }
10182}
10183
10184/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10185/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10186///
10187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
10188#[inline]
10189#[target_feature(enable = "avx512f")]
10190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10191#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10192#[rustc_legacy_const_generics(3)]
10193pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10194    unsafe {
10195        static_assert_sae!(SAE);
10196        let a = a.as_f32x16();
10197        let b = b.as_f32x16();
10198        let r = vminps(a, b, SAE);
10199        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
10200    }
10201}
10202
10203/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10204/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10205///
10206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
10207#[inline]
10208#[target_feature(enable = "avx512f")]
10209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10210#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10211#[rustc_legacy_const_generics(2)]
10212pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10213    unsafe {
10214        static_assert_sae!(SAE);
10215        let a = a.as_f64x8();
10216        let b = b.as_f64x8();
10217        let r = vminpd(a, b, SAE);
10218        transmute(r)
10219    }
10220}
10221
10222/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10223/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10224///
10225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
10226#[inline]
10227#[target_feature(enable = "avx512f")]
10228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10229#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10230#[rustc_legacy_const_generics(4)]
10231pub fn _mm512_mask_min_round_pd<const SAE: i32>(
10232    src: __m512d,
10233    k: __mmask8,
10234    a: __m512d,
10235    b: __m512d,
10236) -> __m512d {
10237    unsafe {
10238        static_assert_sae!(SAE);
10239        let a = a.as_f64x8();
10240        let b = b.as_f64x8();
10241        let r = vminpd(a, b, SAE);
10242        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
10243    }
10244}
10245
10246/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10247/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10248///
10249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
10250#[inline]
10251#[target_feature(enable = "avx512f")]
10252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10253#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10254#[rustc_legacy_const_generics(3)]
10255pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10256    unsafe {
10257        static_assert_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let b = b.as_f64x8();
10260        let r = vminpd(a, b, SAE);
10261        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
10262    }
10263}
10264
10265/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10266/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10267///
10268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
10269#[inline]
10270#[target_feature(enable = "avx512f")]
10271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10272#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10273#[rustc_legacy_const_generics(1)]
10274pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
10275    unsafe {
10276        static_assert_sae!(SAE);
10277        let a = a.as_f32x16();
10278        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
10279        transmute(r)
10280    }
10281}
10282
10283/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10284/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10285///
10286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10287#[inline]
10288#[target_feature(enable = "avx512f")]
10289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10290#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10291#[rustc_legacy_const_generics(3)]
10292pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10293    unsafe {
10294        static_assert_sae!(SAE);
10295        let a = a.as_f32x16();
10296        let src = src.as_f32x16();
10297        let r = vgetexpps(a, src, k, SAE);
10298        transmute(r)
10299    }
10300}
10301
10302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10303/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10304///
10305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10306#[inline]
10307#[target_feature(enable = "avx512f")]
10308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10309#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10310#[rustc_legacy_const_generics(2)]
10311pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10312    unsafe {
10313        static_assert_sae!(SAE);
10314        let a = a.as_f32x16();
10315        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10316        transmute(r)
10317    }
10318}
10319
10320/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10322///
10323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10324#[inline]
10325#[target_feature(enable = "avx512f")]
10326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10327#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10328#[rustc_legacy_const_generics(1)]
10329pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10330    unsafe {
10331        static_assert_sae!(SAE);
10332        let a = a.as_f64x8();
10333        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10334        transmute(r)
10335    }
10336}
10337
10338/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10339/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10340///
10341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10342#[inline]
10343#[target_feature(enable = "avx512f")]
10344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10345#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10346#[rustc_legacy_const_generics(3)]
10347pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10348    src: __m512d,
10349    k: __mmask8,
10350    a: __m512d,
10351) -> __m512d {
10352    unsafe {
10353        static_assert_sae!(SAE);
10354        let a = a.as_f64x8();
10355        let src = src.as_f64x8();
10356        let r = vgetexppd(a, src, k, SAE);
10357        transmute(r)
10358    }
10359}
10360
10361/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10362/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10363///
10364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10365#[inline]
10366#[target_feature(enable = "avx512f")]
10367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10368#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10369#[rustc_legacy_const_generics(2)]
10370pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10371    unsafe {
10372        static_assert_sae!(SAE);
10373        let a = a.as_f64x8();
10374        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10375        transmute(r)
10376    }
10377}
10378
10379/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10380/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10381/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10382/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10383/// * [`_MM_FROUND_TO_POS_INF`] : round up
10384/// * [`_MM_FROUND_TO_ZERO`] : truncate
10385/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10386///
10387/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10389#[inline]
10390#[target_feature(enable = "avx512f")]
10391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10392#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10393#[rustc_legacy_const_generics(1, 2)]
10394pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10395    unsafe {
10396        static_assert_uimm_bits!(IMM8, 8);
10397        static_assert_mantissas_sae!(SAE);
10398        let a = a.as_f32x16();
10399        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10400        transmute(r)
10401    }
10402}
10403
10404/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10405/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10406/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10407/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10408/// * [`_MM_FROUND_TO_POS_INF`] : round up
10409/// * [`_MM_FROUND_TO_ZERO`] : truncate
10410/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10411///
10412/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10414#[inline]
10415#[target_feature(enable = "avx512f")]
10416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10417#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10418#[rustc_legacy_const_generics(3, 4)]
10419pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10420    src: __m512,
10421    k: __mmask16,
10422    a: __m512,
10423) -> __m512 {
10424    unsafe {
10425        static_assert_uimm_bits!(IMM8, 8);
10426        static_assert_mantissas_sae!(SAE);
10427        let a = a.as_f32x16();
10428        let src = src.as_f32x16();
10429        let r = vrndscaleps(a, IMM8, src, k, SAE);
10430        transmute(r)
10431    }
10432}
10433
10434/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10435/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10436/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10437/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10438/// * [`_MM_FROUND_TO_POS_INF`] : round up
10439/// * [`_MM_FROUND_TO_ZERO`] : truncate
10440/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10441///
10442/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10444#[inline]
10445#[target_feature(enable = "avx512f")]
10446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10447#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10448#[rustc_legacy_const_generics(2, 3)]
10449pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10450    k: __mmask16,
10451    a: __m512,
10452) -> __m512 {
10453    unsafe {
10454        static_assert_uimm_bits!(IMM8, 8);
10455        static_assert_mantissas_sae!(SAE);
10456        let a = a.as_f32x16();
10457        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10458        transmute(r)
10459    }
10460}
10461
10462/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10463/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10464/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10465/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10466/// * [`_MM_FROUND_TO_POS_INF`] : round up
10467/// * [`_MM_FROUND_TO_ZERO`] : truncate
10468/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10469///
10470/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10472#[inline]
10473#[target_feature(enable = "avx512f")]
10474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10475#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10476#[rustc_legacy_const_generics(1, 2)]
10477pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10478    unsafe {
10479        static_assert_uimm_bits!(IMM8, 8);
10480        static_assert_mantissas_sae!(SAE);
10481        let a = a.as_f64x8();
10482        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10483        transmute(r)
10484    }
10485}
10486
10487/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10488/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10489/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10490/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10491/// * [`_MM_FROUND_TO_POS_INF`] : round up
10492/// * [`_MM_FROUND_TO_ZERO`] : truncate
10493/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10494///
10495/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10497#[inline]
10498#[target_feature(enable = "avx512f")]
10499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10500#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10501#[rustc_legacy_const_generics(3, 4)]
10502pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10503    src: __m512d,
10504    k: __mmask8,
10505    a: __m512d,
10506) -> __m512d {
10507    unsafe {
10508        static_assert_uimm_bits!(IMM8, 8);
10509        static_assert_mantissas_sae!(SAE);
10510        let a = a.as_f64x8();
10511        let src = src.as_f64x8();
10512        let r = vrndscalepd(a, IMM8, src, k, SAE);
10513        transmute(r)
10514    }
10515}
10516
10517/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10518/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10519/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10520/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10521/// * [`_MM_FROUND_TO_POS_INF`] : round up
10522/// * [`_MM_FROUND_TO_ZERO`] : truncate
10523/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10524///
10525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10527#[inline]
10528#[target_feature(enable = "avx512f")]
10529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10530#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10531#[rustc_legacy_const_generics(2, 3)]
10532pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10533    k: __mmask8,
10534    a: __m512d,
10535) -> __m512d {
10536    unsafe {
10537        static_assert_uimm_bits!(IMM8, 8);
10538        static_assert_mantissas_sae!(SAE);
10539        let a = a.as_f64x8();
10540        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10541        transmute(r)
10542    }
10543}
10544
10545/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10546///
10547/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10553///
10554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10555#[inline]
10556#[target_feature(enable = "avx512f")]
10557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10558#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10559#[rustc_legacy_const_generics(2)]
10560pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10561    unsafe {
10562        static_assert_rounding!(ROUNDING);
10563        let a = a.as_f32x16();
10564        let b = b.as_f32x16();
10565        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10566        transmute(r)
10567    }
10568}
10569
10570/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10571///
10572/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10573/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10574/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10575/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10576/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10577/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10578///
10579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10580#[inline]
10581#[target_feature(enable = "avx512f")]
10582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10583#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10584#[rustc_legacy_const_generics(4)]
10585pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10586    src: __m512,
10587    k: __mmask16,
10588    a: __m512,
10589    b: __m512,
10590) -> __m512 {
10591    unsafe {
10592        static_assert_rounding!(ROUNDING);
10593        let a = a.as_f32x16();
10594        let b = b.as_f32x16();
10595        let src = src.as_f32x16();
10596        let r = vscalefps(a, b, src, k, ROUNDING);
10597        transmute(r)
10598    }
10599}
10600
10601/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10602///
10603/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10609///
10610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10611#[inline]
10612#[target_feature(enable = "avx512f")]
10613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10614#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10615#[rustc_legacy_const_generics(3)]
10616pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10617    k: __mmask16,
10618    a: __m512,
10619    b: __m512,
10620) -> __m512 {
10621    unsafe {
10622        static_assert_rounding!(ROUNDING);
10623        let a = a.as_f32x16();
10624        let b = b.as_f32x16();
10625        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10626        transmute(r)
10627    }
10628}
10629
10630/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10631///
10632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10638///
10639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10640#[inline]
10641#[target_feature(enable = "avx512f")]
10642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10643#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10644#[rustc_legacy_const_generics(2)]
10645pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10646    unsafe {
10647        static_assert_rounding!(ROUNDING);
10648        let a = a.as_f64x8();
10649        let b = b.as_f64x8();
10650        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10651        transmute(r)
10652    }
10653}
10654
10655/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10656///
10657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10663///
10664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10665#[inline]
10666#[target_feature(enable = "avx512f")]
10667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10668#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10669#[rustc_legacy_const_generics(4)]
10670pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10671    src: __m512d,
10672    k: __mmask8,
10673    a: __m512d,
10674    b: __m512d,
10675) -> __m512d {
10676    unsafe {
10677        static_assert_rounding!(ROUNDING);
10678        let a = a.as_f64x8();
10679        let b = b.as_f64x8();
10680        let src = src.as_f64x8();
10681        let r = vscalefpd(a, b, src, k, ROUNDING);
10682        transmute(r)
10683    }
10684}
10685
10686/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10687///
10688/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10689/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10690/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10691/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10692/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10693/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10694///
10695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10696#[inline]
10697#[target_feature(enable = "avx512f")]
10698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10699#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10700#[rustc_legacy_const_generics(3)]
10701pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10702    k: __mmask8,
10703    a: __m512d,
10704    b: __m512d,
10705) -> __m512d {
10706    unsafe {
10707        static_assert_rounding!(ROUNDING);
10708        let a = a.as_f64x8();
10709        let b = b.as_f64x8();
10710        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10711        transmute(r)
10712    }
10713}
10714
10715/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10716///
10717/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10719#[inline]
10720#[target_feature(enable = "avx512f")]
10721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10722#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10723#[rustc_legacy_const_generics(3, 4)]
10724pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10725    a: __m512,
10726    b: __m512,
10727    c: __m512i,
10728) -> __m512 {
10729    unsafe {
10730        static_assert_uimm_bits!(IMM8, 8);
10731        static_assert_mantissas_sae!(SAE);
10732        let a = a.as_f32x16();
10733        let b = b.as_f32x16();
10734        let c = c.as_i32x16();
10735        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10736        transmute(r)
10737    }
10738}
10739
10740/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10741///
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10744#[inline]
10745#[target_feature(enable = "avx512f")]
10746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10747#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10748#[rustc_legacy_const_generics(4, 5)]
10749pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10750    a: __m512,
10751    k: __mmask16,
10752    b: __m512,
10753    c: __m512i,
10754) -> __m512 {
10755    unsafe {
10756        static_assert_uimm_bits!(IMM8, 8);
10757        static_assert_mantissas_sae!(SAE);
10758        let a = a.as_f32x16();
10759        let b = b.as_f32x16();
10760        let c = c.as_i32x16();
10761        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10762        transmute(r)
10763    }
10764}
10765
10766/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10767///
10768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10770#[inline]
10771#[target_feature(enable = "avx512f")]
10772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10773#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10774#[rustc_legacy_const_generics(4, 5)]
10775pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10776    k: __mmask16,
10777    a: __m512,
10778    b: __m512,
10779    c: __m512i,
10780) -> __m512 {
10781    unsafe {
10782        static_assert_uimm_bits!(IMM8, 8);
10783        static_assert_mantissas_sae!(SAE);
10784        let a = a.as_f32x16();
10785        let b = b.as_f32x16();
10786        let c = c.as_i32x16();
10787        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10788        transmute(r)
10789    }
10790}
10791
10792/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10793///
10794/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10796#[inline]
10797#[target_feature(enable = "avx512f")]
10798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10799#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10800#[rustc_legacy_const_generics(3, 4)]
10801pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10802    a: __m512d,
10803    b: __m512d,
10804    c: __m512i,
10805) -> __m512d {
10806    unsafe {
10807        static_assert_uimm_bits!(IMM8, 8);
10808        static_assert_mantissas_sae!(SAE);
10809        let a = a.as_f64x8();
10810        let b = b.as_f64x8();
10811        let c = c.as_i64x8();
10812        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10813        transmute(r)
10814    }
10815}
10816
10817/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10818///
10819/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10821#[inline]
10822#[target_feature(enable = "avx512f")]
10823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10824#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10825#[rustc_legacy_const_generics(4, 5)]
10826pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10827    a: __m512d,
10828    k: __mmask8,
10829    b: __m512d,
10830    c: __m512i,
10831) -> __m512d {
10832    unsafe {
10833        static_assert_uimm_bits!(IMM8, 8);
10834        static_assert_mantissas_sae!(SAE);
10835        let a = a.as_f64x8();
10836        let b = b.as_f64x8();
10837        let c = c.as_i64x8();
10838        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10839        transmute(r)
10840    }
10841}
10842
10843/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10844///
10845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10847#[inline]
10848#[target_feature(enable = "avx512f")]
10849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10851#[rustc_legacy_const_generics(4, 5)]
10852pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10853    k: __mmask8,
10854    a: __m512d,
10855    b: __m512d,
10856    c: __m512i,
10857) -> __m512d {
10858    unsafe {
10859        static_assert_uimm_bits!(IMM8, 8);
10860        static_assert_mantissas_sae!(SAE);
10861        let a = a.as_f64x8();
10862        let b = b.as_f64x8();
10863        let c = c.as_i64x8();
10864        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10865        transmute(r)
10866    }
10867}
10868
10869/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10870/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10871///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10872///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10873///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10874///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10875/// The sign is determined by sc which can take the following values:\
10876///    _MM_MANT_SIGN_src     // sign = sign(src)\
10877///    _MM_MANT_SIGN_zero    // sign = 0\
10878///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10879/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10880///
10881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10882#[inline]
10883#[target_feature(enable = "avx512f")]
10884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10885#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10886#[rustc_legacy_const_generics(1, 2, 3)]
10887pub fn _mm512_getmant_round_ps<
10888    const NORM: _MM_MANTISSA_NORM_ENUM,
10889    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10890    const SAE: i32,
10891>(
10892    a: __m512,
10893) -> __m512 {
10894    unsafe {
10895        static_assert_uimm_bits!(NORM, 4);
10896        static_assert_uimm_bits!(SIGN, 2);
10897        static_assert_mantissas_sae!(SAE);
10898        let a = a.as_f32x16();
10899        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10900        transmute(r)
10901    }
10902}
10903
10904/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10905/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10906///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10907///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10908///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10909///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10910/// The sign is determined by sc which can take the following values:\
10911///    _MM_MANT_SIGN_src     // sign = sign(src)\
10912///    _MM_MANT_SIGN_zero    // sign = 0\
10913///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10914/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10915///
10916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10917#[inline]
10918#[target_feature(enable = "avx512f")]
10919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10920#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10921#[rustc_legacy_const_generics(3, 4, 5)]
10922pub fn _mm512_mask_getmant_round_ps<
10923    const NORM: _MM_MANTISSA_NORM_ENUM,
10924    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10925    const SAE: i32,
10926>(
10927    src: __m512,
10928    k: __mmask16,
10929    a: __m512,
10930) -> __m512 {
10931    unsafe {
10932        static_assert_uimm_bits!(NORM, 4);
10933        static_assert_uimm_bits!(SIGN, 2);
10934        static_assert_mantissas_sae!(SAE);
10935        let a = a.as_f32x16();
10936        let src = src.as_f32x16();
10937        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10938        transmute(r)
10939    }
10940}
10941
10942/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10943/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10944///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10945///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10946///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10947///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10948/// The sign is determined by sc which can take the following values:\
10949///    _MM_MANT_SIGN_src     // sign = sign(src)\
10950///    _MM_MANT_SIGN_zero    // sign = 0\
10951///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10952/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10958#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10959#[rustc_legacy_const_generics(2, 3, 4)]
10960pub fn _mm512_maskz_getmant_round_ps<
10961    const NORM: _MM_MANTISSA_NORM_ENUM,
10962    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10963    const SAE: i32,
10964>(
10965    k: __mmask16,
10966    a: __m512,
10967) -> __m512 {
10968    unsafe {
10969        static_assert_uimm_bits!(NORM, 4);
10970        static_assert_uimm_bits!(SIGN, 2);
10971        static_assert_mantissas_sae!(SAE);
10972        let a = a.as_f32x16();
10973        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10974        transmute(r)
10975    }
10976}
10977
10978/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10979/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10980///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10981///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10982///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10983///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10984/// The sign is determined by sc which can take the following values:\
10985///    _MM_MANT_SIGN_src     // sign = sign(src)\
10986///    _MM_MANT_SIGN_zero    // sign = 0\
10987///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10988/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10989///
10990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10991#[inline]
10992#[target_feature(enable = "avx512f")]
10993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10994#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10995#[rustc_legacy_const_generics(1, 2, 3)]
10996pub fn _mm512_getmant_round_pd<
10997    const NORM: _MM_MANTISSA_NORM_ENUM,
10998    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10999    const SAE: i32,
11000>(
11001    a: __m512d,
11002) -> __m512d {
11003    unsafe {
11004        static_assert_uimm_bits!(NORM, 4);
11005        static_assert_uimm_bits!(SIGN, 2);
11006        static_assert_mantissas_sae!(SAE);
11007        let a = a.as_f64x8();
11008        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
11009        transmute(r)
11010    }
11011}
11012
11013/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11014/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11015///    _MM_MANT_NORM_1_2     // interval [1, 2)\
11016///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
11017///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
11018///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11019/// The sign is determined by sc which can take the following values:\
11020///    _MM_MANT_SIGN_src     // sign = sign(src)\
11021///    _MM_MANT_SIGN_zero    // sign = 0\
11022///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
11023/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11024///
11025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
11026#[inline]
11027#[target_feature(enable = "avx512f")]
11028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11029#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
11030#[rustc_legacy_const_generics(3, 4, 5)]
11031pub fn _mm512_mask_getmant_round_pd<
11032    const NORM: _MM_MANTISSA_NORM_ENUM,
11033    const SIGN: _MM_MANTISSA_SIGN_ENUM,
11034    const SAE: i32,
11035>(
11036    src: __m512d,
11037    k: __mmask8,
11038    a: __m512d,
11039) -> __m512d {
11040    unsafe {
11041        static_assert_uimm_bits!(NORM, 4);
11042        static_assert_uimm_bits!(SIGN, 2);
11043        static_assert_mantissas_sae!(SAE);
11044        let a = a.as_f64x8();
11045        let src = src.as_f64x8();
11046        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
11047        transmute(r)
11048    }
11049}
11050
11051/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11052/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11053///    _MM_MANT_NORM_1_2     // interval [1, 2)\
11054///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
11055///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
11056///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11057/// The sign is determined by sc which can take the following values:\
11058///    _MM_MANT_SIGN_src     // sign = sign(src)\
11059///    _MM_MANT_SIGN_zero    // sign = 0\
11060///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
11061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11062///
11063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
11064#[inline]
11065#[target_feature(enable = "avx512f")]
11066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11067#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
11068#[rustc_legacy_const_generics(2, 3, 4)]
11069pub fn _mm512_maskz_getmant_round_pd<
11070    const NORM: _MM_MANTISSA_NORM_ENUM,
11071    const SIGN: _MM_MANTISSA_SIGN_ENUM,
11072    const SAE: i32,
11073>(
11074    k: __mmask8,
11075    a: __m512d,
11076) -> __m512d {
11077    unsafe {
11078        static_assert_uimm_bits!(NORM, 4);
11079        static_assert_uimm_bits!(SIGN, 2);
11080        static_assert_mantissas_sae!(SAE);
11081        let a = a.as_f64x8();
11082        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
11083        transmute(r)
11084    }
11085}
11086
11087/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11088///
11089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
11090#[inline]
11091#[target_feature(enable = "avx512f")]
11092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11093#[cfg_attr(test, assert_instr(vcvtps2dq))]
11094pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
11095    unsafe {
11096        transmute(vcvtps2dq(
11097            a.as_f32x16(),
11098            i32x16::ZERO,
11099            0b11111111_11111111,
11100            _MM_FROUND_CUR_DIRECTION,
11101        ))
11102    }
11103}
11104
11105/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11106///
11107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
11108#[inline]
11109#[target_feature(enable = "avx512f")]
11110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11111#[cfg_attr(test, assert_instr(vcvtps2dq))]
11112pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11113    unsafe {
11114        transmute(vcvtps2dq(
11115            a.as_f32x16(),
11116            src.as_i32x16(),
11117            k,
11118            _MM_FROUND_CUR_DIRECTION,
11119        ))
11120    }
11121}
11122
11123/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11124///
11125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
11126#[inline]
11127#[target_feature(enable = "avx512f")]
11128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11129#[cfg_attr(test, assert_instr(vcvtps2dq))]
11130pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
11131    unsafe {
11132        transmute(vcvtps2dq(
11133            a.as_f32x16(),
11134            i32x16::ZERO,
11135            k,
11136            _MM_FROUND_CUR_DIRECTION,
11137        ))
11138    }
11139}
11140
11141/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11142///
11143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
11144#[inline]
11145#[target_feature(enable = "avx512f,avx512vl")]
11146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11147#[cfg_attr(test, assert_instr(vcvtps2dq))]
11148pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11149    unsafe {
11150        let convert = _mm256_cvtps_epi32(a);
11151        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
11152    }
11153}
11154
11155/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11156///
11157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
11158#[inline]
11159#[target_feature(enable = "avx512f,avx512vl")]
11160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11161#[cfg_attr(test, assert_instr(vcvtps2dq))]
11162pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
11163    unsafe {
11164        let convert = _mm256_cvtps_epi32(a);
11165        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
11166    }
11167}
11168
11169/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11170///
11171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
11172#[inline]
11173#[target_feature(enable = "avx512f,avx512vl")]
11174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11175#[cfg_attr(test, assert_instr(vcvtps2dq))]
11176pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11177    unsafe {
11178        let convert = _mm_cvtps_epi32(a);
11179        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11180    }
11181}
11182
11183/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11184///
11185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
11186#[inline]
11187#[target_feature(enable = "avx512f,avx512vl")]
11188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11189#[cfg_attr(test, assert_instr(vcvtps2dq))]
11190pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
11191    unsafe {
11192        let convert = _mm_cvtps_epi32(a);
11193        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11194    }
11195}
11196
11197/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11198///
11199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
11200#[inline]
11201#[target_feature(enable = "avx512f")]
11202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11203#[cfg_attr(test, assert_instr(vcvtps2udq))]
11204pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
11205    unsafe {
11206        transmute(vcvtps2udq(
11207            a.as_f32x16(),
11208            u32x16::ZERO,
11209            0b11111111_11111111,
11210            _MM_FROUND_CUR_DIRECTION,
11211        ))
11212    }
11213}
11214
11215/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11216///
11217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
11218#[inline]
11219#[target_feature(enable = "avx512f")]
11220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11221#[cfg_attr(test, assert_instr(vcvtps2udq))]
11222pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11223    unsafe {
11224        transmute(vcvtps2udq(
11225            a.as_f32x16(),
11226            src.as_u32x16(),
11227            k,
11228            _MM_FROUND_CUR_DIRECTION,
11229        ))
11230    }
11231}
11232
11233/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11234///
11235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
11236#[inline]
11237#[target_feature(enable = "avx512f")]
11238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11239#[cfg_attr(test, assert_instr(vcvtps2udq))]
11240pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
11241    unsafe {
11242        transmute(vcvtps2udq(
11243            a.as_f32x16(),
11244            u32x16::ZERO,
11245            k,
11246            _MM_FROUND_CUR_DIRECTION,
11247        ))
11248    }
11249}
11250
11251/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11252///
11253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
11254#[inline]
11255#[target_feature(enable = "avx512f,avx512vl")]
11256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11257#[cfg_attr(test, assert_instr(vcvtps2udq))]
11258pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
11259    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
11260}
11261
11262/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11263///
11264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
11265#[inline]
11266#[target_feature(enable = "avx512f,avx512vl")]
11267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11268#[cfg_attr(test, assert_instr(vcvtps2udq))]
11269pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11270    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
11271}
11272
11273/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11274///
11275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
11276#[inline]
11277#[target_feature(enable = "avx512f,avx512vl")]
11278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11279#[cfg_attr(test, assert_instr(vcvtps2udq))]
11280pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11281    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11282}
11283
11284/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11285///
11286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11287#[inline]
11288#[target_feature(enable = "avx512f,avx512vl")]
11289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11290#[cfg_attr(test, assert_instr(vcvtps2udq))]
11291pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11292    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11293}
11294
11295/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11296///
11297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11298#[inline]
11299#[target_feature(enable = "avx512f,avx512vl")]
11300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11301#[cfg_attr(test, assert_instr(vcvtps2udq))]
11302pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11303    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11304}
11305
11306/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11307///
11308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11309#[inline]
11310#[target_feature(enable = "avx512f,avx512vl")]
11311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11312#[cfg_attr(test, assert_instr(vcvtps2udq))]
11313pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11314    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11315}
11316
11317/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11318///
11319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11320#[inline]
11321#[target_feature(enable = "avx512f")]
11322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323#[cfg_attr(test, assert_instr(vcvtps2pd))]
11324pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11325    unsafe {
11326        transmute(vcvtps2pd(
11327            a.as_f32x8(),
11328            f64x8::ZERO,
11329            0b11111111,
11330            _MM_FROUND_CUR_DIRECTION,
11331        ))
11332    }
11333}
11334
11335/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11336///
11337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11338#[inline]
11339#[target_feature(enable = "avx512f")]
11340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11341#[cfg_attr(test, assert_instr(vcvtps2pd))]
11342pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11343    unsafe {
11344        transmute(vcvtps2pd(
11345            a.as_f32x8(),
11346            src.as_f64x8(),
11347            k,
11348            _MM_FROUND_CUR_DIRECTION,
11349        ))
11350    }
11351}
11352
11353/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11354///
11355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11356#[inline]
11357#[target_feature(enable = "avx512f")]
11358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11359#[cfg_attr(test, assert_instr(vcvtps2pd))]
11360pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11361    unsafe {
11362        transmute(vcvtps2pd(
11363            a.as_f32x8(),
11364            f64x8::ZERO,
11365            k,
11366            _MM_FROUND_CUR_DIRECTION,
11367        ))
11368    }
11369}
11370
11371/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11372///
11373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11374#[inline]
11375#[target_feature(enable = "avx512f")]
11376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11377#[cfg_attr(test, assert_instr(vcvtps2pd))]
11378pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11379    unsafe {
11380        transmute(vcvtps2pd(
11381            _mm512_castps512_ps256(v2).as_f32x8(),
11382            f64x8::ZERO,
11383            0b11111111,
11384            _MM_FROUND_CUR_DIRECTION,
11385        ))
11386    }
11387}
11388
11389/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11390///
11391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11392#[inline]
11393#[target_feature(enable = "avx512f")]
11394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11395#[cfg_attr(test, assert_instr(vcvtps2pd))]
11396pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11397    unsafe {
11398        transmute(vcvtps2pd(
11399            _mm512_castps512_ps256(v2).as_f32x8(),
11400            src.as_f64x8(),
11401            k,
11402            _MM_FROUND_CUR_DIRECTION,
11403        ))
11404    }
11405}
11406
11407/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11408///
11409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11410#[inline]
11411#[target_feature(enable = "avx512f")]
11412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11413#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11414pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11415    unsafe {
11416        transmute(vcvtpd2ps(
11417            a.as_f64x8(),
11418            f32x8::ZERO,
11419            0b11111111,
11420            _MM_FROUND_CUR_DIRECTION,
11421        ))
11422    }
11423}
11424
11425/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11426///
11427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11428#[inline]
11429#[target_feature(enable = "avx512f")]
11430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11431#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11432pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11433    unsafe {
11434        transmute(vcvtpd2ps(
11435            a.as_f64x8(),
11436            src.as_f32x8(),
11437            k,
11438            _MM_FROUND_CUR_DIRECTION,
11439        ))
11440    }
11441}
11442
11443/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11444///
11445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11446#[inline]
11447#[target_feature(enable = "avx512f")]
11448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11449#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11450pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11451    unsafe {
11452        transmute(vcvtpd2ps(
11453            a.as_f64x8(),
11454            f32x8::ZERO,
11455            k,
11456            _MM_FROUND_CUR_DIRECTION,
11457        ))
11458    }
11459}
11460
11461/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11462///
11463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11464#[inline]
11465#[target_feature(enable = "avx512f,avx512vl")]
11466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11467#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11468pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11469    unsafe {
11470        let convert = _mm256_cvtpd_ps(a);
11471        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11472    }
11473}
11474
11475/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11476///
11477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11478#[inline]
11479#[target_feature(enable = "avx512f,avx512vl")]
11480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11481#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11482pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11483    unsafe {
11484        let convert = _mm256_cvtpd_ps(a);
11485        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11486    }
11487}
11488
11489/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11492#[inline]
11493#[target_feature(enable = "avx512f,avx512vl")]
11494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11497    unsafe { vcvtpd2ps128(a.as_f64x2(), src.as_f32x4(), k).as_m128() }
11498}
11499
11500/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11501///
11502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11503#[inline]
11504#[target_feature(enable = "avx512f,avx512vl")]
11505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11507pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11508    unsafe {
11509        let convert = _mm_cvtpd_ps(a);
11510        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11511    }
11512}
11513
11514/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11515///
11516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11517#[inline]
11518#[target_feature(enable = "avx512f")]
11519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11520#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11521pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11522    unsafe {
11523        transmute(vcvtpd2dq(
11524            a.as_f64x8(),
11525            i32x8::ZERO,
11526            0b11111111,
11527            _MM_FROUND_CUR_DIRECTION,
11528        ))
11529    }
11530}
11531
11532/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11533///
11534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11535#[inline]
11536#[target_feature(enable = "avx512f")]
11537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11538#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11539pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11540    unsafe {
11541        transmute(vcvtpd2dq(
11542            a.as_f64x8(),
11543            src.as_i32x8(),
11544            k,
11545            _MM_FROUND_CUR_DIRECTION,
11546        ))
11547    }
11548}
11549
11550/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11551///
11552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11553#[inline]
11554#[target_feature(enable = "avx512f")]
11555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11556#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11557pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11558    unsafe {
11559        transmute(vcvtpd2dq(
11560            a.as_f64x8(),
11561            i32x8::ZERO,
11562            k,
11563            _MM_FROUND_CUR_DIRECTION,
11564        ))
11565    }
11566}
11567
11568/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11574#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11575pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11576    unsafe {
11577        let convert = _mm256_cvtpd_epi32(a);
11578        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11579    }
11580}
11581
11582/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11589pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11590    unsafe {
11591        let convert = _mm256_cvtpd_epi32(a);
11592        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11593    }
11594}
11595
11596/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11603pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11604    unsafe { vcvtpd2dq128(a.as_f64x2(), src.as_i32x4(), k).as_m128i() }
11605}
11606
11607/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11608///
11609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11610#[inline]
11611#[target_feature(enable = "avx512f,avx512vl")]
11612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11613#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11614pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11615    unsafe {
11616        let convert = _mm_cvtpd_epi32(a);
11617        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11618    }
11619}
11620
11621/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11622///
11623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11624#[inline]
11625#[target_feature(enable = "avx512f")]
11626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11627#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11628pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11629    unsafe {
11630        transmute(vcvtpd2udq(
11631            a.as_f64x8(),
11632            u32x8::ZERO,
11633            0b11111111,
11634            _MM_FROUND_CUR_DIRECTION,
11635        ))
11636    }
11637}
11638
11639/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11645#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11646pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11647    unsafe {
11648        transmute(vcvtpd2udq(
11649            a.as_f64x8(),
11650            src.as_u32x8(),
11651            k,
11652            _MM_FROUND_CUR_DIRECTION,
11653        ))
11654    }
11655}
11656
11657/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11658///
11659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11660#[inline]
11661#[target_feature(enable = "avx512f")]
11662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11663#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11664pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11665    unsafe {
11666        transmute(vcvtpd2udq(
11667            a.as_f64x8(),
11668            u32x8::ZERO,
11669            k,
11670            _MM_FROUND_CUR_DIRECTION,
11671        ))
11672    }
11673}
11674
11675/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11676///
11677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11678#[inline]
11679#[target_feature(enable = "avx512f,avx512vl")]
11680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11681#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11682pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11683    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11684}
11685
11686/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11687///
11688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11689#[inline]
11690#[target_feature(enable = "avx512f,avx512vl")]
11691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11692#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11693pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11694    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11695}
11696
11697/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11698///
11699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11700#[inline]
11701#[target_feature(enable = "avx512f,avx512vl")]
11702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11703#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11704pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11705    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11706}
11707
11708/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11709///
11710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11711#[inline]
11712#[target_feature(enable = "avx512f,avx512vl")]
11713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11714#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11715pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11716    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11717}
11718
11719/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11720///
11721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11722#[inline]
11723#[target_feature(enable = "avx512f,avx512vl")]
11724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11725#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11726pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11727    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11728}
11729
11730/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11731///
11732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11733#[inline]
11734#[target_feature(enable = "avx512f,avx512vl")]
11735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11736#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11737pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11738    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11739}
11740
11741/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11742///
11743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11744#[inline]
11745#[target_feature(enable = "avx512f")]
11746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11747#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11748pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11749    unsafe {
11750        let r: f32x8 = vcvtpd2ps(
11751            v2.as_f64x8(),
11752            f32x8::ZERO,
11753            0b11111111,
11754            _MM_FROUND_CUR_DIRECTION,
11755        );
11756        simd_shuffle!(
11757            r,
11758            f32x8::ZERO,
11759            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11760        )
11761    }
11762}
11763
11764/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11765///
11766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11767#[inline]
11768#[target_feature(enable = "avx512f")]
11769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11770#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11771pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11772    unsafe {
11773        let r: f32x8 = vcvtpd2ps(
11774            v2.as_f64x8(),
11775            _mm512_castps512_ps256(src).as_f32x8(),
11776            k,
11777            _MM_FROUND_CUR_DIRECTION,
11778        );
11779        simd_shuffle!(
11780            r,
11781            f32x8::ZERO,
11782            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11783        )
11784    }
11785}
11786
11787/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11788///
11789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11790#[inline]
11791#[target_feature(enable = "avx512f")]
11792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11793#[cfg_attr(test, assert_instr(vpmovsxbd))]
11794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11795pub const fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11796    unsafe {
11797        let a = a.as_i8x16();
11798        transmute::<i32x16, _>(simd_cast(a))
11799    }
11800}
11801
11802/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11803///
11804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11805#[inline]
11806#[target_feature(enable = "avx512f")]
11807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11808#[cfg_attr(test, assert_instr(vpmovsxbd))]
11809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11810pub const fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11811    unsafe {
11812        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11813        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11814    }
11815}
11816
11817/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11818///
11819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11820#[inline]
11821#[target_feature(enable = "avx512f")]
11822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11823#[cfg_attr(test, assert_instr(vpmovsxbd))]
11824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11825pub const fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11826    unsafe {
11827        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11828        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11829    }
11830}
11831
11832/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11833///
11834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11835#[inline]
11836#[target_feature(enable = "avx512f,avx512vl")]
11837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11838#[cfg_attr(test, assert_instr(vpmovsxbd))]
11839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11840pub const fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11841    unsafe {
11842        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11843        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11844    }
11845}
11846
11847/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11848///
11849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11850#[inline]
11851#[target_feature(enable = "avx512f,avx512vl")]
11852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11853#[cfg_attr(test, assert_instr(vpmovsxbd))]
11854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11855pub const fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11856    unsafe {
11857        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11858        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11859    }
11860}
11861
11862/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11863///
11864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11865#[inline]
11866#[target_feature(enable = "avx512f,avx512vl")]
11867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11868#[cfg_attr(test, assert_instr(vpmovsxbd))]
11869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11870pub const fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11871    unsafe {
11872        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11873        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11874    }
11875}
11876
11877/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11878///
11879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11880#[inline]
11881#[target_feature(enable = "avx512f,avx512vl")]
11882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11883#[cfg_attr(test, assert_instr(vpmovsxbd))]
11884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11885pub const fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11888        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11889    }
11890}
11891
11892/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11895#[inline]
11896#[target_feature(enable = "avx512f")]
11897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11898#[cfg_attr(test, assert_instr(vpmovsxbq))]
11899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11900pub const fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11901    unsafe {
11902        let a = a.as_i8x16();
11903        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11904        transmute::<i64x8, _>(simd_cast(v64))
11905    }
11906}
11907
11908/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11909///
11910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11911#[inline]
11912#[target_feature(enable = "avx512f")]
11913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11914#[cfg_attr(test, assert_instr(vpmovsxbq))]
11915#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11916pub const fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11917    unsafe {
11918        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11919        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11920    }
11921}
11922
11923/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11924///
11925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11926#[inline]
11927#[target_feature(enable = "avx512f")]
11928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11929#[cfg_attr(test, assert_instr(vpmovsxbq))]
11930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11931pub const fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11932    unsafe {
11933        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11934        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11935    }
11936}
11937
11938/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11939///
11940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11941#[inline]
11942#[target_feature(enable = "avx512f,avx512vl")]
11943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11944#[cfg_attr(test, assert_instr(vpmovsxbq))]
11945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11946pub const fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11947    unsafe {
11948        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11949        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11950    }
11951}
11952
11953/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11954///
11955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11956#[inline]
11957#[target_feature(enable = "avx512f,avx512vl")]
11958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11959#[cfg_attr(test, assert_instr(vpmovsxbq))]
11960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11961pub const fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11962    unsafe {
11963        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11964        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11965    }
11966}
11967
11968/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11969///
11970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11971#[inline]
11972#[target_feature(enable = "avx512f,avx512vl")]
11973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11974#[cfg_attr(test, assert_instr(vpmovsxbq))]
11975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11976pub const fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11977    unsafe {
11978        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11979        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11980    }
11981}
11982
11983/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11984///
11985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11986#[inline]
11987#[target_feature(enable = "avx512f,avx512vl")]
11988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11989#[cfg_attr(test, assert_instr(vpmovsxbq))]
11990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11991pub const fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11992    unsafe {
11993        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11994        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11995    }
11996}
11997
11998/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11999///
12000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
12001#[inline]
12002#[target_feature(enable = "avx512f")]
12003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12004#[cfg_attr(test, assert_instr(vpmovzxbd))]
12005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12006pub const fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
12007    unsafe {
12008        let a = a.as_u8x16();
12009        transmute::<i32x16, _>(simd_cast(a))
12010    }
12011}
12012
12013/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12014///
12015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
12016#[inline]
12017#[target_feature(enable = "avx512f")]
12018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12019#[cfg_attr(test, assert_instr(vpmovzxbd))]
12020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12021pub const fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
12022    unsafe {
12023        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
12024        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12025    }
12026}
12027
12028/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12029///
12030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
12031#[inline]
12032#[target_feature(enable = "avx512f")]
12033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12034#[cfg_attr(test, assert_instr(vpmovzxbd))]
12035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12036pub const fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
12037    unsafe {
12038        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
12039        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12040    }
12041}
12042
12043/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12044///
12045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
12046#[inline]
12047#[target_feature(enable = "avx512f,avx512vl")]
12048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12049#[cfg_attr(test, assert_instr(vpmovzxbd))]
12050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12051pub const fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12052    unsafe {
12053        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
12054        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12055    }
12056}
12057
12058/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12059///
12060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
12061#[inline]
12062#[target_feature(enable = "avx512f,avx512vl")]
12063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12064#[cfg_attr(test, assert_instr(vpmovzxbd))]
12065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12066pub const fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
12067    unsafe {
12068        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
12069        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12070    }
12071}
12072
12073/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12074///
12075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
12076#[inline]
12077#[target_feature(enable = "avx512f,avx512vl")]
12078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12079#[cfg_attr(test, assert_instr(vpmovzxbd))]
12080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12081pub const fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
12084        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12085    }
12086}
12087
12088/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094#[cfg_attr(test, assert_instr(vpmovzxbd))]
12095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12096pub const fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
12097    unsafe {
12098        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
12099        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12100    }
12101}
12102
12103/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
12104///
12105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
12106#[inline]
12107#[target_feature(enable = "avx512f")]
12108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12109#[cfg_attr(test, assert_instr(vpmovzxbq))]
12110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12111pub const fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
12112    unsafe {
12113        let a = a.as_u8x16();
12114        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
12115        transmute::<i64x8, _>(simd_cast(v64))
12116    }
12117}
12118
12119/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12120///
12121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
12122#[inline]
12123#[target_feature(enable = "avx512f")]
12124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12125#[cfg_attr(test, assert_instr(vpmovzxbq))]
12126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12127pub const fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12128    unsafe {
12129        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
12130        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12131    }
12132}
12133
12134/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12135///
12136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
12137#[inline]
12138#[target_feature(enable = "avx512f")]
12139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12140#[cfg_attr(test, assert_instr(vpmovzxbq))]
12141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12142pub const fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
12143    unsafe {
12144        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
12145        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12146    }
12147}
12148
12149/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12150///
12151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
12152#[inline]
12153#[target_feature(enable = "avx512f,avx512vl")]
12154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12155#[cfg_attr(test, assert_instr(vpmovzxbq))]
12156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12157pub const fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12158    unsafe {
12159        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
12160        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12161    }
12162}
12163
12164/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12165///
12166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
12167#[inline]
12168#[target_feature(enable = "avx512f,avx512vl")]
12169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12170#[cfg_attr(test, assert_instr(vpmovzxbq))]
12171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12172pub const fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
12173    unsafe {
12174        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
12175        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12176    }
12177}
12178
12179/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12180///
12181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
12182#[inline]
12183#[target_feature(enable = "avx512f,avx512vl")]
12184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12185#[cfg_attr(test, assert_instr(vpmovzxbq))]
12186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12187pub const fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12188    unsafe {
12189        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
12190        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12191    }
12192}
12193
12194/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12195///
12196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
12197#[inline]
12198#[target_feature(enable = "avx512f,avx512vl")]
12199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12200#[cfg_attr(test, assert_instr(vpmovzxbq))]
12201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12202pub const fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
12203    unsafe {
12204        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
12205        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12206    }
12207}
12208
12209/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12210///
12211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
12212#[inline]
12213#[target_feature(enable = "avx512f")]
12214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12215#[cfg_attr(test, assert_instr(vpmovsxwd))]
12216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12217pub const fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
12218    unsafe {
12219        let a = a.as_i16x16();
12220        transmute::<i32x16, _>(simd_cast(a))
12221    }
12222}
12223
12224/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12225///
12226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
12227#[inline]
12228#[target_feature(enable = "avx512f")]
12229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12230#[cfg_attr(test, assert_instr(vpmovsxwd))]
12231#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12232pub const fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12233    unsafe {
12234        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
12235        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12236    }
12237}
12238
12239/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12240///
12241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
12242#[inline]
12243#[target_feature(enable = "avx512f")]
12244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12245#[cfg_attr(test, assert_instr(vpmovsxwd))]
12246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12247pub const fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12248    unsafe {
12249        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
12250        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12251    }
12252}
12253
12254/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12255///
12256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
12257#[inline]
12258#[target_feature(enable = "avx512f,avx512vl")]
12259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12260#[cfg_attr(test, assert_instr(vpmovsxwd))]
12261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12262pub const fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12263    unsafe {
12264        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
12265        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12266    }
12267}
12268
12269/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12270///
12271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
12272#[inline]
12273#[target_feature(enable = "avx512f,avx512vl")]
12274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12275#[cfg_attr(test, assert_instr(vpmovsxwd))]
12276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12277pub const fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12278    unsafe {
12279        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
12280        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12281    }
12282}
12283
12284/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12290#[cfg_attr(test, assert_instr(vpmovsxwd))]
12291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12292pub const fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12293    unsafe {
12294        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12295        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12296    }
12297}
12298
12299/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12300///
12301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
12302#[inline]
12303#[target_feature(enable = "avx512f,avx512vl")]
12304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12305#[cfg_attr(test, assert_instr(vpmovsxwd))]
12306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12307pub const fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12308    unsafe {
12309        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12310        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12311    }
12312}
12313
12314/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12315///
12316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12317#[inline]
12318#[target_feature(enable = "avx512f")]
12319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12320#[cfg_attr(test, assert_instr(vpmovsxwq))]
12321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12322pub const fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12323    unsafe {
12324        let a = a.as_i16x8();
12325        transmute::<i64x8, _>(simd_cast(a))
12326    }
12327}
12328
12329/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12330///
12331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12332#[inline]
12333#[target_feature(enable = "avx512f")]
12334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12335#[cfg_attr(test, assert_instr(vpmovsxwq))]
12336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12337pub const fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12338    unsafe {
12339        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12340        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12341    }
12342}
12343
12344/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12345///
12346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12347#[inline]
12348#[target_feature(enable = "avx512f")]
12349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12350#[cfg_attr(test, assert_instr(vpmovsxwq))]
12351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12352pub const fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12353    unsafe {
12354        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12355        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12356    }
12357}
12358
12359/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12360///
12361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12362#[inline]
12363#[target_feature(enable = "avx512f,avx512vl")]
12364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12365#[cfg_attr(test, assert_instr(vpmovsxwq))]
12366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12367pub const fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12368    unsafe {
12369        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12370        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12371    }
12372}
12373
12374/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12375///
12376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12377#[inline]
12378#[target_feature(enable = "avx512f,avx512vl")]
12379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12380#[cfg_attr(test, assert_instr(vpmovsxwq))]
12381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12382pub const fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12383    unsafe {
12384        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12385        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12386    }
12387}
12388
12389/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12390///
12391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12392#[inline]
12393#[target_feature(enable = "avx512f,avx512vl")]
12394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12395#[cfg_attr(test, assert_instr(vpmovsxwq))]
12396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12397pub const fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12398    unsafe {
12399        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12400        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12401    }
12402}
12403
12404/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12405///
12406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12407#[inline]
12408#[target_feature(enable = "avx512f,avx512vl")]
12409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12410#[cfg_attr(test, assert_instr(vpmovsxwq))]
12411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12412pub const fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12413    unsafe {
12414        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12415        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12416    }
12417}
12418
12419/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12420///
12421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12422#[inline]
12423#[target_feature(enable = "avx512f")]
12424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12425#[cfg_attr(test, assert_instr(vpmovzxwd))]
12426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12427pub const fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12428    unsafe {
12429        let a = a.as_u16x16();
12430        transmute::<i32x16, _>(simd_cast(a))
12431    }
12432}
12433
12434/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12435///
12436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12437#[inline]
12438#[target_feature(enable = "avx512f")]
12439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12440#[cfg_attr(test, assert_instr(vpmovzxwd))]
12441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12442pub const fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12443    unsafe {
12444        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12445        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12446    }
12447}
12448
12449/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12450///
12451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12452#[inline]
12453#[target_feature(enable = "avx512f")]
12454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12455#[cfg_attr(test, assert_instr(vpmovzxwd))]
12456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12457pub const fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12458    unsafe {
12459        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12460        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12461    }
12462}
12463
12464/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12465///
12466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12467#[inline]
12468#[target_feature(enable = "avx512f,avx512vl")]
12469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12470#[cfg_attr(test, assert_instr(vpmovzxwd))]
12471#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12472pub const fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12473    unsafe {
12474        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12475        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12476    }
12477}
12478
12479/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12480///
12481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12482#[inline]
12483#[target_feature(enable = "avx512f,avx512vl")]
12484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12485#[cfg_attr(test, assert_instr(vpmovzxwd))]
12486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12487pub const fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12488    unsafe {
12489        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12490        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12491    }
12492}
12493
12494/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12497#[inline]
12498#[target_feature(enable = "avx512f,avx512vl")]
12499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12500#[cfg_attr(test, assert_instr(vpmovzxwd))]
12501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12502pub const fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12503    unsafe {
12504        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12505        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12506    }
12507}
12508
12509/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12510///
12511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12512#[inline]
12513#[target_feature(enable = "avx512f,avx512vl")]
12514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12515#[cfg_attr(test, assert_instr(vpmovzxwd))]
12516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12517pub const fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12518    unsafe {
12519        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12520        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12521    }
12522}
12523
12524/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12525///
12526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12527#[inline]
12528#[target_feature(enable = "avx512f")]
12529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12530#[cfg_attr(test, assert_instr(vpmovzxwq))]
12531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12532pub const fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12533    unsafe {
12534        let a = a.as_u16x8();
12535        transmute::<i64x8, _>(simd_cast(a))
12536    }
12537}
12538
12539/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12540///
12541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12542#[inline]
12543#[target_feature(enable = "avx512f")]
12544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12545#[cfg_attr(test, assert_instr(vpmovzxwq))]
12546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12547pub const fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12548    unsafe {
12549        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12550        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12551    }
12552}
12553
12554/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12555///
12556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12557#[inline]
12558#[target_feature(enable = "avx512f")]
12559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12560#[cfg_attr(test, assert_instr(vpmovzxwq))]
12561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12562pub const fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12563    unsafe {
12564        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12565        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12566    }
12567}
12568
12569/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12570///
12571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12572#[inline]
12573#[target_feature(enable = "avx512f,avx512vl")]
12574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12575#[cfg_attr(test, assert_instr(vpmovzxwq))]
12576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12577pub const fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12578    unsafe {
12579        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12580        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12581    }
12582}
12583
12584/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12585///
12586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12587#[inline]
12588#[target_feature(enable = "avx512f,avx512vl")]
12589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12590#[cfg_attr(test, assert_instr(vpmovzxwq))]
12591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12592pub const fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12593    unsafe {
12594        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12595        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12596    }
12597}
12598
12599/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12600///
12601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12602#[inline]
12603#[target_feature(enable = "avx512f,avx512vl")]
12604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12605#[cfg_attr(test, assert_instr(vpmovzxwq))]
12606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12607pub const fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12608    unsafe {
12609        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12610        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12611    }
12612}
12613
12614/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12615///
12616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12617#[inline]
12618#[target_feature(enable = "avx512f,avx512vl")]
12619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12620#[cfg_attr(test, assert_instr(vpmovzxwq))]
12621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12622pub const fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12623    unsafe {
12624        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12625        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12626    }
12627}
12628
12629/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12630///
12631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12632#[inline]
12633#[target_feature(enable = "avx512f")]
12634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12635#[cfg_attr(test, assert_instr(vpmovsxdq))]
12636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12637pub const fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12638    unsafe {
12639        let a = a.as_i32x8();
12640        transmute::<i64x8, _>(simd_cast(a))
12641    }
12642}
12643
12644/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12645///
12646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12647#[inline]
12648#[target_feature(enable = "avx512f")]
12649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12650#[cfg_attr(test, assert_instr(vpmovsxdq))]
12651#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12652pub const fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12653    unsafe {
12654        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12655        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12656    }
12657}
12658
12659/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12660///
12661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12662#[inline]
12663#[target_feature(enable = "avx512f")]
12664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12665#[cfg_attr(test, assert_instr(vpmovsxdq))]
12666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12667pub const fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12668    unsafe {
12669        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12670        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12671    }
12672}
12673
12674/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12675///
12676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12677#[inline]
12678#[target_feature(enable = "avx512f,avx512vl")]
12679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12680#[cfg_attr(test, assert_instr(vpmovsxdq))]
12681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12682pub const fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12683    unsafe {
12684        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12685        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12686    }
12687}
12688
12689/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12690///
12691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12692#[inline]
12693#[target_feature(enable = "avx512f,avx512vl")]
12694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12695#[cfg_attr(test, assert_instr(vpmovsxdq))]
12696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12697pub const fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12698    unsafe {
12699        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12700        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12701    }
12702}
12703
12704/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12707#[inline]
12708#[target_feature(enable = "avx512f,avx512vl")]
12709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12710#[cfg_attr(test, assert_instr(vpmovsxdq))]
12711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12712pub const fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12713    unsafe {
12714        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12715        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12716    }
12717}
12718
12719/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12720///
12721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12722#[inline]
12723#[target_feature(enable = "avx512f,avx512vl")]
12724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12725#[cfg_attr(test, assert_instr(vpmovsxdq))]
12726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12727pub const fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12728    unsafe {
12729        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12730        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12731    }
12732}
12733
12734/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12735///
12736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12737#[inline]
12738#[target_feature(enable = "avx512f")]
12739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12740#[cfg_attr(test, assert_instr(vpmovzxdq))]
12741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12742pub const fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12743    unsafe {
12744        let a = a.as_u32x8();
12745        transmute::<i64x8, _>(simd_cast(a))
12746    }
12747}
12748
12749/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12750///
12751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12752#[inline]
12753#[target_feature(enable = "avx512f")]
12754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12755#[cfg_attr(test, assert_instr(vpmovzxdq))]
12756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12757pub const fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12758    unsafe {
12759        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12760        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12761    }
12762}
12763
12764/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12765///
12766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12767#[inline]
12768#[target_feature(enable = "avx512f")]
12769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12770#[cfg_attr(test, assert_instr(vpmovzxdq))]
12771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12772pub const fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12773    unsafe {
12774        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12775        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12776    }
12777}
12778
12779/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12780///
12781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12782#[inline]
12783#[target_feature(enable = "avx512f,avx512vl")]
12784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12785#[cfg_attr(test, assert_instr(vpmovzxdq))]
12786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12787pub const fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12788    unsafe {
12789        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12790        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12791    }
12792}
12793
12794/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12795///
12796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12797#[inline]
12798#[target_feature(enable = "avx512f,avx512vl")]
12799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12800#[cfg_attr(test, assert_instr(vpmovzxdq))]
12801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12802pub const fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12803    unsafe {
12804        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12805        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12806    }
12807}
12808
12809/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12810///
12811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12812#[inline]
12813#[target_feature(enable = "avx512f,avx512vl")]
12814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12815#[cfg_attr(test, assert_instr(vpmovzxdq))]
12816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12817pub const fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12818    unsafe {
12819        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12820        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12821    }
12822}
12823
12824/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12825///
12826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12827#[inline]
12828#[target_feature(enable = "avx512f,avx512vl")]
12829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12830#[cfg_attr(test, assert_instr(vpmovzxdq))]
12831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12832pub const fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12833    unsafe {
12834        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12835        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12836    }
12837}
12838
12839/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12840///
12841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12842#[inline]
12843#[target_feature(enable = "avx512f")]
12844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12845#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12847pub const fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12848    unsafe {
12849        let a = a.as_i32x16();
12850        transmute::<f32x16, _>(simd_cast(a))
12851    }
12852}
12853
12854/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12855///
12856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12857#[inline]
12858#[target_feature(enable = "avx512f")]
12859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12860#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12862pub const fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12863    unsafe {
12864        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12865        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12866    }
12867}
12868
12869/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12870///
12871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12872#[inline]
12873#[target_feature(enable = "avx512f")]
12874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12875#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12877pub const fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12878    unsafe {
12879        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12880        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12881    }
12882}
12883
12884/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12885///
12886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12887#[inline]
12888#[target_feature(enable = "avx512f,avx512vl")]
12889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12890#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12892pub const fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12893    unsafe {
12894        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12895        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12896    }
12897}
12898
12899/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12900///
12901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12902#[inline]
12903#[target_feature(enable = "avx512f,avx512vl")]
12904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12905#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12907pub const fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12908    unsafe {
12909        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12910        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12911    }
12912}
12913
12914/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12915///
12916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12917#[inline]
12918#[target_feature(enable = "avx512f,avx512vl")]
12919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12920#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12922pub const fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12923    unsafe {
12924        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12925        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12926    }
12927}
12928
12929/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12930///
12931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12932#[inline]
12933#[target_feature(enable = "avx512f,avx512vl")]
12934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12935#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12937pub const fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12938    unsafe {
12939        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12940        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12941    }
12942}
12943
12944/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12945///
12946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12947#[inline]
12948#[target_feature(enable = "avx512f")]
12949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12950#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12952pub const fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12953    unsafe {
12954        let a = a.as_i32x8();
12955        transmute::<f64x8, _>(simd_cast(a))
12956    }
12957}
12958
12959/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12962#[inline]
12963#[target_feature(enable = "avx512f")]
12964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12965#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12967pub const fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12968    unsafe {
12969        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12970        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12971    }
12972}
12973
12974/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12975///
12976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12977#[inline]
12978#[target_feature(enable = "avx512f")]
12979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12980#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12982pub const fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12983    unsafe {
12984        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12985        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12986    }
12987}
12988
12989/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12990///
12991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12992#[inline]
12993#[target_feature(enable = "avx512f,avx512vl")]
12994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12995#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12997pub const fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12998    unsafe {
12999        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
13000        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
13001    }
13002}
13003
13004/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13005///
13006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
13007#[inline]
13008#[target_feature(enable = "avx512f,avx512vl")]
13009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13010#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13012pub const fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
13013    unsafe {
13014        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
13015        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
13016    }
13017}
13018
13019/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13020///
13021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
13022#[inline]
13023#[target_feature(enable = "avx512f,avx512vl")]
13024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13025#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13026#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13027pub const fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13028    unsafe {
13029        let convert = _mm_cvtepi32_pd(a).as_f64x2();
13030        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
13031    }
13032}
13033
13034/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
13037#[inline]
13038#[target_feature(enable = "avx512f,avx512vl")]
13039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13040#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13042pub const fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
13043    unsafe {
13044        let convert = _mm_cvtepi32_pd(a).as_f64x2();
13045        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
13046    }
13047}
13048
13049/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
13050///
13051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
13052#[inline]
13053#[target_feature(enable = "avx512f")]
13054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13055#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13057pub const fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
13058    unsafe {
13059        let a = a.as_u32x16();
13060        transmute::<f32x16, _>(simd_cast(a))
13061    }
13062}
13063
13064/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13065///
13066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
13067#[inline]
13068#[target_feature(enable = "avx512f")]
13069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13070#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13071#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13072pub const fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
13073    unsafe {
13074        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
13075        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
13076    }
13077}
13078
13079/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13080///
13081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
13082#[inline]
13083#[target_feature(enable = "avx512f")]
13084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13085#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13086#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13087pub const fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
13088    unsafe {
13089        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
13090        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
13091    }
13092}
13093
13094/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13095///
13096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
13097#[inline]
13098#[target_feature(enable = "avx512f")]
13099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13100#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13102pub const fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
13103    unsafe {
13104        let a = a.as_u32x8();
13105        transmute::<f64x8, _>(simd_cast(a))
13106    }
13107}
13108
13109/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
13112#[inline]
13113#[target_feature(enable = "avx512f")]
13114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13115#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13117pub const fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
13118    unsafe {
13119        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
13120        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13121    }
13122}
13123
13124/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13125///
13126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
13127#[inline]
13128#[target_feature(enable = "avx512f")]
13129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13130#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13131#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13132pub const fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
13133    unsafe {
13134        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
13135        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
13136    }
13137}
13138
13139/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13140///
13141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
13142#[inline]
13143#[target_feature(enable = "avx512f,avx512vl")]
13144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13145#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13147pub const fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
13148    unsafe {
13149        let a = a.as_u32x4();
13150        transmute::<f64x4, _>(simd_cast(a))
13151    }
13152}
13153
13154/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13155///
13156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
13157#[inline]
13158#[target_feature(enable = "avx512f,avx512vl")]
13159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13160#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13162pub const fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
13163    unsafe {
13164        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
13165        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
13166    }
13167}
13168
13169/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13170///
13171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
13172#[inline]
13173#[target_feature(enable = "avx512f,avx512vl")]
13174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13175#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13177pub const fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
13178    unsafe {
13179        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
13180        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
13181    }
13182}
13183
13184/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13190#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13192pub const fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
13193    unsafe {
13194        let a = a.as_u32x4();
13195        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
13196        transmute::<f64x2, _>(simd_cast(u64))
13197    }
13198}
13199
13200/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13201///
13202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
13203#[inline]
13204#[target_feature(enable = "avx512f,avx512vl")]
13205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13206#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13208pub const fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13209    unsafe {
13210        let convert = _mm_cvtepu32_pd(a).as_f64x2();
13211        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
13212    }
13213}
13214
13215/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13216///
13217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
13218#[inline]
13219#[target_feature(enable = "avx512f,avx512vl")]
13220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13221#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13223pub const fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
13224    unsafe {
13225        let convert = _mm_cvtepu32_pd(a).as_f64x2();
13226        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
13227    }
13228}
13229
13230/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13231///
13232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
13233#[inline]
13234#[target_feature(enable = "avx512f")]
13235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13236#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13238pub const fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
13239    unsafe {
13240        let v2 = v2.as_i32x16();
13241        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
13242        transmute::<f64x8, _>(simd_cast(v256))
13243    }
13244}
13245
13246/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13247///
13248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
13249#[inline]
13250#[target_feature(enable = "avx512f")]
13251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13252#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13254pub const fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13255    unsafe {
13256        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
13257        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13258    }
13259}
13260
13261/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13262///
13263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
13264#[inline]
13265#[target_feature(enable = "avx512f")]
13266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13267#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13269pub const fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
13270    unsafe {
13271        let v2 = v2.as_u32x16();
13272        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
13273        transmute::<f64x8, _>(simd_cast(v256))
13274    }
13275}
13276
13277/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13278///
13279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
13280#[inline]
13281#[target_feature(enable = "avx512f")]
13282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13283#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13285pub const fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13286    unsafe {
13287        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
13288        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13289    }
13290}
13291
13292/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13293///
13294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
13295#[inline]
13296#[target_feature(enable = "avx512f")]
13297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13298#[cfg_attr(test, assert_instr(vpmovdw))]
13299#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13300pub const fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
13301    unsafe {
13302        let a = a.as_i32x16();
13303        transmute::<i16x16, _>(simd_cast(a))
13304    }
13305}
13306
13307/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13308///
13309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
13310#[inline]
13311#[target_feature(enable = "avx512f")]
13312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13313#[cfg_attr(test, assert_instr(vpmovdw))]
13314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13315pub const fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13316    unsafe {
13317        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
13318        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
13319    }
13320}
13321
13322/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13323///
13324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
13325#[inline]
13326#[target_feature(enable = "avx512f")]
13327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13328#[cfg_attr(test, assert_instr(vpmovdw))]
13329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13330pub const fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13331    unsafe {
13332        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
13333        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
13334    }
13335}
13336
13337/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13338///
13339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
13340#[inline]
13341#[target_feature(enable = "avx512f,avx512vl")]
13342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13343#[cfg_attr(test, assert_instr(vpmovdw))]
13344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13345pub const fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
13346    unsafe {
13347        let a = a.as_i32x8();
13348        transmute::<i16x8, _>(simd_cast(a))
13349    }
13350}
13351
13352/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13353///
13354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
13355#[inline]
13356#[target_feature(enable = "avx512f,avx512vl")]
13357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13358#[cfg_attr(test, assert_instr(vpmovdw))]
13359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13360pub const fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13361    unsafe {
13362        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
13363        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13364    }
13365}
13366
13367/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
13370#[inline]
13371#[target_feature(enable = "avx512f,avx512vl")]
13372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13373#[cfg_attr(test, assert_instr(vpmovdw))]
13374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13375pub const fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13376    unsafe {
13377        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
13378        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13379    }
13380}
13381
13382/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13383///
13384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13385#[inline]
13386#[target_feature(enable = "avx512f,avx512vl")]
13387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13388#[cfg_attr(test, assert_instr(vpmovdw))]
13389pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13390    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13391}
13392
13393/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13394///
13395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13396#[inline]
13397#[target_feature(enable = "avx512f,avx512vl")]
13398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13399#[cfg_attr(test, assert_instr(vpmovdw))]
13400pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13401    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13402}
13403
13404/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13405///
13406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13407#[inline]
13408#[target_feature(enable = "avx512f,avx512vl")]
13409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13410#[cfg_attr(test, assert_instr(vpmovdw))]
13411pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13412    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13413}
13414
13415/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13416///
13417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13418#[inline]
13419#[target_feature(enable = "avx512f")]
13420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13421#[cfg_attr(test, assert_instr(vpmovdb))]
13422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13423pub const fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13424    unsafe {
13425        let a = a.as_i32x16();
13426        transmute::<i8x16, _>(simd_cast(a))
13427    }
13428}
13429
13430/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13431///
13432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13433#[inline]
13434#[target_feature(enable = "avx512f")]
13435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13436#[cfg_attr(test, assert_instr(vpmovdb))]
13437#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13438pub const fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13439    unsafe {
13440        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13441        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13442    }
13443}
13444
13445/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13446///
13447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13448#[inline]
13449#[target_feature(enable = "avx512f")]
13450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13451#[cfg_attr(test, assert_instr(vpmovdb))]
13452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13453pub const fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13454    unsafe {
13455        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13456        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13457    }
13458}
13459
13460/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13461///
13462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13463#[inline]
13464#[target_feature(enable = "avx512f,avx512vl")]
13465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13466#[cfg_attr(test, assert_instr(vpmovdb))]
13467pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13468    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13469}
13470
13471/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13472///
13473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13474#[inline]
13475#[target_feature(enable = "avx512f,avx512vl")]
13476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13477#[cfg_attr(test, assert_instr(vpmovdb))]
13478pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13479    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13480}
13481
13482/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13483///
13484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13485#[inline]
13486#[target_feature(enable = "avx512f,avx512vl")]
13487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13488#[cfg_attr(test, assert_instr(vpmovdb))]
13489pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13490    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13491}
13492
13493/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13494///
13495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13496#[inline]
13497#[target_feature(enable = "avx512f,avx512vl")]
13498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13499#[cfg_attr(test, assert_instr(vpmovdb))]
13500pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13501    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13502}
13503
13504/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13505///
13506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13507#[inline]
13508#[target_feature(enable = "avx512f,avx512vl")]
13509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13510#[cfg_attr(test, assert_instr(vpmovdb))]
13511pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13512    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13513}
13514
13515/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13516///
13517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13518#[inline]
13519#[target_feature(enable = "avx512f,avx512vl")]
13520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13521#[cfg_attr(test, assert_instr(vpmovdb))]
13522pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13523    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13524}
13525
13526/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13527///
13528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13529#[inline]
13530#[target_feature(enable = "avx512f")]
13531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13532#[cfg_attr(test, assert_instr(vpmovqd))]
13533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13534pub const fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13535    unsafe {
13536        let a = a.as_i64x8();
13537        transmute::<i32x8, _>(simd_cast(a))
13538    }
13539}
13540
13541/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13542///
13543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13544#[inline]
13545#[target_feature(enable = "avx512f")]
13546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13547#[cfg_attr(test, assert_instr(vpmovqd))]
13548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13549pub const fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13550    unsafe {
13551        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13552        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13553    }
13554}
13555
13556/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13557///
13558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13559#[inline]
13560#[target_feature(enable = "avx512f")]
13561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13562#[cfg_attr(test, assert_instr(vpmovqd))]
13563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13564pub const fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13565    unsafe {
13566        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13567        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13568    }
13569}
13570
13571/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13572///
13573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13574#[inline]
13575#[target_feature(enable = "avx512f,avx512vl")]
13576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13577#[cfg_attr(test, assert_instr(vpmovqd))]
13578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13579pub const fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13580    unsafe {
13581        let a = a.as_i64x4();
13582        transmute::<i32x4, _>(simd_cast(a))
13583    }
13584}
13585
13586/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13587///
13588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13589#[inline]
13590#[target_feature(enable = "avx512f,avx512vl")]
13591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13592#[cfg_attr(test, assert_instr(vpmovqd))]
13593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13594pub const fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13595    unsafe {
13596        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13597        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13598    }
13599}
13600
13601/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13602///
13603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13604#[inline]
13605#[target_feature(enable = "avx512f,avx512vl")]
13606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13607#[cfg_attr(test, assert_instr(vpmovqd))]
13608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13609pub const fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13610    unsafe {
13611        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13612        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13613    }
13614}
13615
13616/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13617///
13618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13619#[inline]
13620#[target_feature(enable = "avx512f,avx512vl")]
13621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13622#[cfg_attr(test, assert_instr(vpmovqd))]
13623pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13624    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13625}
13626
13627/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13628///
13629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13630#[inline]
13631#[target_feature(enable = "avx512f,avx512vl")]
13632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13633#[cfg_attr(test, assert_instr(vpmovqd))]
13634pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13635    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13636}
13637
13638/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13639///
13640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13641#[inline]
13642#[target_feature(enable = "avx512f,avx512vl")]
13643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13644#[cfg_attr(test, assert_instr(vpmovqd))]
13645pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13646    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13647}
13648
13649/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13650///
13651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13652#[inline]
13653#[target_feature(enable = "avx512f")]
13654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13655#[cfg_attr(test, assert_instr(vpmovqw))]
13656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13657pub const fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13658    unsafe {
13659        let a = a.as_i64x8();
13660        transmute::<i16x8, _>(simd_cast(a))
13661    }
13662}
13663
13664/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13670#[cfg_attr(test, assert_instr(vpmovqw))]
13671#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13672pub const fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13673    unsafe {
13674        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13675        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13676    }
13677}
13678
13679/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13680///
13681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13682#[inline]
13683#[target_feature(enable = "avx512f")]
13684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13685#[cfg_attr(test, assert_instr(vpmovqw))]
13686#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13687pub const fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13688    unsafe {
13689        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13690        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13691    }
13692}
13693
13694/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13695///
13696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13697#[inline]
13698#[target_feature(enable = "avx512f,avx512vl")]
13699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13700#[cfg_attr(test, assert_instr(vpmovqw))]
13701pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13702    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13703}
13704
13705/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13706///
13707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13708#[inline]
13709#[target_feature(enable = "avx512f,avx512vl")]
13710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13711#[cfg_attr(test, assert_instr(vpmovqw))]
13712pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13713    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13714}
13715
13716/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13717///
13718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13719#[inline]
13720#[target_feature(enable = "avx512f,avx512vl")]
13721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13722#[cfg_attr(test, assert_instr(vpmovqw))]
13723pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13724    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13725}
13726
13727/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13728///
13729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13730#[inline]
13731#[target_feature(enable = "avx512f,avx512vl")]
13732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13733#[cfg_attr(test, assert_instr(vpmovqw))]
13734pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13735    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13736}
13737
13738/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13739///
13740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13741#[inline]
13742#[target_feature(enable = "avx512f,avx512vl")]
13743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13744#[cfg_attr(test, assert_instr(vpmovqw))]
13745pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13746    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13747}
13748
13749/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13750///
13751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13752#[inline]
13753#[target_feature(enable = "avx512f,avx512vl")]
13754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13755#[cfg_attr(test, assert_instr(vpmovqw))]
13756pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13757    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13758}
13759
13760/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13761///
13762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13763#[inline]
13764#[target_feature(enable = "avx512f")]
13765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13766#[cfg_attr(test, assert_instr(vpmovqb))]
13767pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13768    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13769}
13770
13771/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13772///
13773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13774#[inline]
13775#[target_feature(enable = "avx512f")]
13776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13777#[cfg_attr(test, assert_instr(vpmovqb))]
13778pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13779    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13780}
13781
13782/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13783///
13784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13785#[inline]
13786#[target_feature(enable = "avx512f")]
13787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13788#[cfg_attr(test, assert_instr(vpmovqb))]
13789pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13790    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13791}
13792
13793/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13794///
13795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13796#[inline]
13797#[target_feature(enable = "avx512f,avx512vl")]
13798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13799#[cfg_attr(test, assert_instr(vpmovqb))]
13800pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13801    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13802}
13803
13804/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13805///
13806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13807#[inline]
13808#[target_feature(enable = "avx512f,avx512vl")]
13809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13810#[cfg_attr(test, assert_instr(vpmovqb))]
13811pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13812    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13813}
13814
13815/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13816///
13817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13818#[inline]
13819#[target_feature(enable = "avx512f,avx512vl")]
13820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13821#[cfg_attr(test, assert_instr(vpmovqb))]
13822pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13823    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13824}
13825
13826/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13827///
13828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13829#[inline]
13830#[target_feature(enable = "avx512f,avx512vl")]
13831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13832#[cfg_attr(test, assert_instr(vpmovqb))]
13833pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13834    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13835}
13836
13837/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13838///
13839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13840#[inline]
13841#[target_feature(enable = "avx512f,avx512vl")]
13842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13843#[cfg_attr(test, assert_instr(vpmovqb))]
13844pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13845    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13846}
13847
13848/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13849///
13850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13851#[inline]
13852#[target_feature(enable = "avx512f,avx512vl")]
13853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13854#[cfg_attr(test, assert_instr(vpmovqb))]
13855pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13856    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13857}
13858
13859/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13860///
13861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13862#[inline]
13863#[target_feature(enable = "avx512f")]
13864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13865#[cfg_attr(test, assert_instr(vpmovsdw))]
13866pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13867    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13868}
13869
13870/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13871///
13872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13873#[inline]
13874#[target_feature(enable = "avx512f")]
13875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13876#[cfg_attr(test, assert_instr(vpmovsdw))]
13877pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13878    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13879}
13880
13881/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13882///
13883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13884#[inline]
13885#[target_feature(enable = "avx512f")]
13886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13887#[cfg_attr(test, assert_instr(vpmovsdw))]
13888pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13889    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13890}
13891
13892/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13893///
13894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13895#[inline]
13896#[target_feature(enable = "avx512f,avx512vl")]
13897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13898#[cfg_attr(test, assert_instr(vpmovsdw))]
13899pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13900    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13901}
13902
13903/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13904///
13905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13906#[inline]
13907#[target_feature(enable = "avx512f,avx512vl")]
13908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13909#[cfg_attr(test, assert_instr(vpmovsdw))]
13910pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13911    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13912}
13913
13914/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13915///
13916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13917#[inline]
13918#[target_feature(enable = "avx512f,avx512vl")]
13919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13920#[cfg_attr(test, assert_instr(vpmovsdw))]
13921pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13922    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13923}
13924
13925/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13926///
13927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13928#[inline]
13929#[target_feature(enable = "avx512f,avx512vl")]
13930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13931#[cfg_attr(test, assert_instr(vpmovsdw))]
13932pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13933    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13934}
13935
13936/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13937///
13938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13939#[inline]
13940#[target_feature(enable = "avx512f,avx512vl")]
13941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13942#[cfg_attr(test, assert_instr(vpmovsdw))]
13943pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13944    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13945}
13946
13947/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13948///
13949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13950#[inline]
13951#[target_feature(enable = "avx512f,avx512vl")]
13952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13953#[cfg_attr(test, assert_instr(vpmovsdw))]
13954pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13955    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13956}
13957
13958/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13959///
13960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13961#[inline]
13962#[target_feature(enable = "avx512f")]
13963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13964#[cfg_attr(test, assert_instr(vpmovsdb))]
13965pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13966    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13967}
13968
13969/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13970///
13971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13972#[inline]
13973#[target_feature(enable = "avx512f")]
13974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13975#[cfg_attr(test, assert_instr(vpmovsdb))]
13976pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13977    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13978}
13979
13980/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13981///
13982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13983#[inline]
13984#[target_feature(enable = "avx512f")]
13985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13986#[cfg_attr(test, assert_instr(vpmovsdb))]
13987pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13988    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13989}
13990
13991/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13992///
13993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13994#[inline]
13995#[target_feature(enable = "avx512f,avx512vl")]
13996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13997#[cfg_attr(test, assert_instr(vpmovsdb))]
13998pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13999    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
14000}
14001
14002/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14003///
14004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
14005#[inline]
14006#[target_feature(enable = "avx512f,avx512vl")]
14007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14008#[cfg_attr(test, assert_instr(vpmovsdb))]
14009pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14010    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
14011}
14012
14013/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14014///
14015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
14016#[inline]
14017#[target_feature(enable = "avx512f,avx512vl")]
14018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14019#[cfg_attr(test, assert_instr(vpmovsdb))]
14020pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14021    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
14022}
14023
14024/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14025///
14026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
14027#[inline]
14028#[target_feature(enable = "avx512f,avx512vl")]
14029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14030#[cfg_attr(test, assert_instr(vpmovsdb))]
14031pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
14032    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
14033}
14034
14035/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14036///
14037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
14038#[inline]
14039#[target_feature(enable = "avx512f,avx512vl")]
14040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14041#[cfg_attr(test, assert_instr(vpmovsdb))]
14042pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14043    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
14044}
14045
14046/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14047///
14048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
14049#[inline]
14050#[target_feature(enable = "avx512f,avx512vl")]
14051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14052#[cfg_attr(test, assert_instr(vpmovsdb))]
14053pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14054    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
14055}
14056
14057/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14058///
14059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
14060#[inline]
14061#[target_feature(enable = "avx512f")]
14062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14063#[cfg_attr(test, assert_instr(vpmovsqd))]
14064pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
14065    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
14066}
14067
14068/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14069///
14070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
14071#[inline]
14072#[target_feature(enable = "avx512f")]
14073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14074#[cfg_attr(test, assert_instr(vpmovsqd))]
14075pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14076    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
14077}
14078
14079/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14080///
14081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
14082#[inline]
14083#[target_feature(enable = "avx512f")]
14084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14085#[cfg_attr(test, assert_instr(vpmovsqd))]
14086pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14087    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
14088}
14089
14090/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14091///
14092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
14093#[inline]
14094#[target_feature(enable = "avx512f,avx512vl")]
14095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14096#[cfg_attr(test, assert_instr(vpmovsqd))]
14097pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
14098    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
14099}
14100
14101/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14102///
14103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
14104#[inline]
14105#[target_feature(enable = "avx512f,avx512vl")]
14106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14107#[cfg_attr(test, assert_instr(vpmovsqd))]
14108pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14109    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
14110}
14111
14112/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14113///
14114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
14115#[inline]
14116#[target_feature(enable = "avx512f,avx512vl")]
14117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14118#[cfg_attr(test, assert_instr(vpmovsqd))]
14119pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14120    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
14121}
14122
14123/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14124///
14125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
14126#[inline]
14127#[target_feature(enable = "avx512f,avx512vl")]
14128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14129#[cfg_attr(test, assert_instr(vpmovsqd))]
14130pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
14131    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
14132}
14133
14134/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14135///
14136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
14137#[inline]
14138#[target_feature(enable = "avx512f,avx512vl")]
14139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14140#[cfg_attr(test, assert_instr(vpmovsqd))]
14141pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14142    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
14143}
14144
14145/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14146///
14147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
14148#[inline]
14149#[target_feature(enable = "avx512f,avx512vl")]
14150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14151#[cfg_attr(test, assert_instr(vpmovsqd))]
14152pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14153    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
14154}
14155
14156/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14157///
14158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
14159#[inline]
14160#[target_feature(enable = "avx512f")]
14161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14162#[cfg_attr(test, assert_instr(vpmovsqw))]
14163pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
14164    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
14165}
14166
14167/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14168///
14169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
14170#[inline]
14171#[target_feature(enable = "avx512f")]
14172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14173#[cfg_attr(test, assert_instr(vpmovsqw))]
14174pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14175    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
14176}
14177
14178/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14179///
14180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
14181#[inline]
14182#[target_feature(enable = "avx512f")]
14183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14184#[cfg_attr(test, assert_instr(vpmovsqw))]
14185pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14186    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
14187}
14188
14189/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14190///
14191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
14192#[inline]
14193#[target_feature(enable = "avx512f,avx512vl")]
14194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14195#[cfg_attr(test, assert_instr(vpmovsqw))]
14196pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
14197    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
14198}
14199
14200/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14201///
14202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
14203#[inline]
14204#[target_feature(enable = "avx512f,avx512vl")]
14205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14206#[cfg_attr(test, assert_instr(vpmovsqw))]
14207pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14208    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
14209}
14210
14211/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14212///
14213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
14214#[inline]
14215#[target_feature(enable = "avx512f,avx512vl")]
14216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14217#[cfg_attr(test, assert_instr(vpmovsqw))]
14218pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14219    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
14220}
14221
14222/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14223///
14224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
14225#[inline]
14226#[target_feature(enable = "avx512f,avx512vl")]
14227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14228#[cfg_attr(test, assert_instr(vpmovsqw))]
14229pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
14230    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
14231}
14232
14233/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14234///
14235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
14236#[inline]
14237#[target_feature(enable = "avx512f,avx512vl")]
14238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14239#[cfg_attr(test, assert_instr(vpmovsqw))]
14240pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14241    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
14242}
14243
14244/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14245///
14246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
14247#[inline]
14248#[target_feature(enable = "avx512f,avx512vl")]
14249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14250#[cfg_attr(test, assert_instr(vpmovsqw))]
14251pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14252    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
14253}
14254
14255/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14256///
14257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
14258#[inline]
14259#[target_feature(enable = "avx512f")]
14260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14261#[cfg_attr(test, assert_instr(vpmovsqb))]
14262pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
14263    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
14264}
14265
14266/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14267///
14268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
14269#[inline]
14270#[target_feature(enable = "avx512f")]
14271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14272#[cfg_attr(test, assert_instr(vpmovsqb))]
14273pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14274    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
14275}
14276
14277/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14278///
14279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
14280#[inline]
14281#[target_feature(enable = "avx512f")]
14282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14283#[cfg_attr(test, assert_instr(vpmovsqb))]
14284pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14285    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
14286}
14287
14288/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14289///
14290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
14291#[inline]
14292#[target_feature(enable = "avx512f,avx512vl")]
14293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14294#[cfg_attr(test, assert_instr(vpmovsqb))]
14295pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
14296    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
14297}
14298
14299/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14300///
14301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
14302#[inline]
14303#[target_feature(enable = "avx512f,avx512vl")]
14304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14305#[cfg_attr(test, assert_instr(vpmovsqb))]
14306pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14307    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
14308}
14309
14310/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14311///
14312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
14313#[inline]
14314#[target_feature(enable = "avx512f,avx512vl")]
14315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14316#[cfg_attr(test, assert_instr(vpmovsqb))]
14317pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14318    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
14319}
14320
14321/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14322///
14323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
14324#[inline]
14325#[target_feature(enable = "avx512f,avx512vl")]
14326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14327#[cfg_attr(test, assert_instr(vpmovsqb))]
14328pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
14329    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
14330}
14331
14332/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14333///
14334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
14335#[inline]
14336#[target_feature(enable = "avx512f,avx512vl")]
14337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14338#[cfg_attr(test, assert_instr(vpmovsqb))]
14339pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14340    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
14341}
14342
14343/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14344///
14345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
14346#[inline]
14347#[target_feature(enable = "avx512f,avx512vl")]
14348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14349#[cfg_attr(test, assert_instr(vpmovsqb))]
14350pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14351    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
14352}
14353
14354/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14355///
14356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
14357#[inline]
14358#[target_feature(enable = "avx512f")]
14359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14360#[cfg_attr(test, assert_instr(vpmovusdw))]
14361pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
14362    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
14363}
14364
14365/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14366///
14367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
14368#[inline]
14369#[target_feature(enable = "avx512f")]
14370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14371#[cfg_attr(test, assert_instr(vpmovusdw))]
14372pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
14373    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
14374}
14375
14376/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14377///
14378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
14379#[inline]
14380#[target_feature(enable = "avx512f")]
14381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14382#[cfg_attr(test, assert_instr(vpmovusdw))]
14383pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
14384    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
14385}
14386
14387/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14388///
14389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
14390#[inline]
14391#[target_feature(enable = "avx512f,avx512vl")]
14392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14393#[cfg_attr(test, assert_instr(vpmovusdw))]
14394pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14395    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14396}
14397
14398/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14399///
14400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14401#[inline]
14402#[target_feature(enable = "avx512f,avx512vl")]
14403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14404#[cfg_attr(test, assert_instr(vpmovusdw))]
14405pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14406    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14407}
14408
14409/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14410///
14411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14412#[inline]
14413#[target_feature(enable = "avx512f,avx512vl")]
14414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14415#[cfg_attr(test, assert_instr(vpmovusdw))]
14416pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14417    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14418}
14419
14420/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14421///
14422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14423#[inline]
14424#[target_feature(enable = "avx512f,avx512vl")]
14425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14426#[cfg_attr(test, assert_instr(vpmovusdw))]
14427pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14428    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14429}
14430
14431/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14432///
14433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14434#[inline]
14435#[target_feature(enable = "avx512f,avx512vl")]
14436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14437#[cfg_attr(test, assert_instr(vpmovusdw))]
14438pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14439    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14440}
14441
14442/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14443///
14444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14445#[inline]
14446#[target_feature(enable = "avx512f,avx512vl")]
14447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14448#[cfg_attr(test, assert_instr(vpmovusdw))]
14449pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14450    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14451}
14452
14453/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14454///
14455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14456#[inline]
14457#[target_feature(enable = "avx512f")]
14458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14459#[cfg_attr(test, assert_instr(vpmovusdb))]
14460pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14461    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14462}
14463
14464/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14465///
14466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14467#[inline]
14468#[target_feature(enable = "avx512f")]
14469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14470#[cfg_attr(test, assert_instr(vpmovusdb))]
14471pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14472    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14473}
14474
14475/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14476///
14477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14478#[inline]
14479#[target_feature(enable = "avx512f")]
14480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14481#[cfg_attr(test, assert_instr(vpmovusdb))]
14482pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14483    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14484}
14485
14486/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14487///
14488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14489#[inline]
14490#[target_feature(enable = "avx512f,avx512vl")]
14491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14492#[cfg_attr(test, assert_instr(vpmovusdb))]
14493pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14494    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14495}
14496
14497/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14498///
14499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14500#[inline]
14501#[target_feature(enable = "avx512f,avx512vl")]
14502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14503#[cfg_attr(test, assert_instr(vpmovusdb))]
14504pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14505    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14506}
14507
14508/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14509///
14510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14511#[inline]
14512#[target_feature(enable = "avx512f,avx512vl")]
14513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14514#[cfg_attr(test, assert_instr(vpmovusdb))]
14515pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14516    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14517}
14518
14519/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14520///
14521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14522#[inline]
14523#[target_feature(enable = "avx512f,avx512vl")]
14524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14525#[cfg_attr(test, assert_instr(vpmovusdb))]
14526pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14527    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14528}
14529
14530/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14531///
14532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14533#[inline]
14534#[target_feature(enable = "avx512f,avx512vl")]
14535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14536#[cfg_attr(test, assert_instr(vpmovusdb))]
14537pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14538    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14539}
14540
14541/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14542///
14543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14544#[inline]
14545#[target_feature(enable = "avx512f,avx512vl")]
14546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14547#[cfg_attr(test, assert_instr(vpmovusdb))]
14548pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14549    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14550}
14551
14552/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14553///
14554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14555#[inline]
14556#[target_feature(enable = "avx512f")]
14557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14558#[cfg_attr(test, assert_instr(vpmovusqd))]
14559pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14560    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14561}
14562
14563/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14564///
14565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14566#[inline]
14567#[target_feature(enable = "avx512f")]
14568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14569#[cfg_attr(test, assert_instr(vpmovusqd))]
14570pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14571    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14572}
14573
14574/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14575///
14576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14577#[inline]
14578#[target_feature(enable = "avx512f")]
14579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14580#[cfg_attr(test, assert_instr(vpmovusqd))]
14581pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14582    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14583}
14584
14585/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14586///
14587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14588#[inline]
14589#[target_feature(enable = "avx512f,avx512vl")]
14590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14591#[cfg_attr(test, assert_instr(vpmovusqd))]
14592pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14593    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14594}
14595
14596/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14597///
14598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14599#[inline]
14600#[target_feature(enable = "avx512f,avx512vl")]
14601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14602#[cfg_attr(test, assert_instr(vpmovusqd))]
14603pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14604    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14605}
14606
14607/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14608///
14609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14610#[inline]
14611#[target_feature(enable = "avx512f,avx512vl")]
14612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14613#[cfg_attr(test, assert_instr(vpmovusqd))]
14614pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14615    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14616}
14617
14618/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14619///
14620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14621#[inline]
14622#[target_feature(enable = "avx512f,avx512vl")]
14623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14624#[cfg_attr(test, assert_instr(vpmovusqd))]
14625pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14626    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14627}
14628
14629/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14632#[inline]
14633#[target_feature(enable = "avx512f,avx512vl")]
14634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14635#[cfg_attr(test, assert_instr(vpmovusqd))]
14636pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14637    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14638}
14639
14640/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14641///
14642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14643#[inline]
14644#[target_feature(enable = "avx512f,avx512vl")]
14645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14646#[cfg_attr(test, assert_instr(vpmovusqd))]
14647pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14648    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14649}
14650
14651/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14652///
14653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14654#[inline]
14655#[target_feature(enable = "avx512f")]
14656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14657#[cfg_attr(test, assert_instr(vpmovusqw))]
14658pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14659    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14660}
14661
14662/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14663///
14664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14665#[inline]
14666#[target_feature(enable = "avx512f")]
14667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14668#[cfg_attr(test, assert_instr(vpmovusqw))]
14669pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14670    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14671}
14672
14673/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14674///
14675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14676#[inline]
14677#[target_feature(enable = "avx512f")]
14678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14679#[cfg_attr(test, assert_instr(vpmovusqw))]
14680pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14681    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14682}
14683
14684/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14685///
14686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14687#[inline]
14688#[target_feature(enable = "avx512f,avx512vl")]
14689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14690#[cfg_attr(test, assert_instr(vpmovusqw))]
14691pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14692    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14693}
14694
14695/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14696///
14697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14698#[inline]
14699#[target_feature(enable = "avx512f,avx512vl")]
14700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14701#[cfg_attr(test, assert_instr(vpmovusqw))]
14702pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14703    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14704}
14705
14706/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14707///
14708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14709#[inline]
14710#[target_feature(enable = "avx512f,avx512vl")]
14711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14712#[cfg_attr(test, assert_instr(vpmovusqw))]
14713pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14714    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14715}
14716
14717/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14718///
14719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14720#[inline]
14721#[target_feature(enable = "avx512f,avx512vl")]
14722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14723#[cfg_attr(test, assert_instr(vpmovusqw))]
14724pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14725    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14726}
14727
14728/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14729///
14730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14731#[inline]
14732#[target_feature(enable = "avx512f,avx512vl")]
14733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14734#[cfg_attr(test, assert_instr(vpmovusqw))]
14735pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14736    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14737}
14738
14739/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14740///
14741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14742#[inline]
14743#[target_feature(enable = "avx512f,avx512vl")]
14744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14745#[cfg_attr(test, assert_instr(vpmovusqw))]
14746pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14747    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14748}
14749
14750/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14751///
14752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14753#[inline]
14754#[target_feature(enable = "avx512f")]
14755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14756#[cfg_attr(test, assert_instr(vpmovusqb))]
14757pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14758    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14759}
14760
14761/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14762///
14763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14764#[inline]
14765#[target_feature(enable = "avx512f")]
14766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14767#[cfg_attr(test, assert_instr(vpmovusqb))]
14768pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14769    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14770}
14771
14772/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14773///
14774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14775#[inline]
14776#[target_feature(enable = "avx512f")]
14777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14778#[cfg_attr(test, assert_instr(vpmovusqb))]
14779pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14780    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14781}
14782
14783/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14784///
14785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14786#[inline]
14787#[target_feature(enable = "avx512f,avx512vl")]
14788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14789#[cfg_attr(test, assert_instr(vpmovusqb))]
14790pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14791    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14792}
14793
14794/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14795///
14796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14797#[inline]
14798#[target_feature(enable = "avx512f,avx512vl")]
14799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14800#[cfg_attr(test, assert_instr(vpmovusqb))]
14801pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14802    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14803}
14804
14805/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14806///
14807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14808#[inline]
14809#[target_feature(enable = "avx512f,avx512vl")]
14810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14811#[cfg_attr(test, assert_instr(vpmovusqb))]
14812pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14813    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14814}
14815
14816/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14817///
14818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14819#[inline]
14820#[target_feature(enable = "avx512f,avx512vl")]
14821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14822#[cfg_attr(test, assert_instr(vpmovusqb))]
14823pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14824    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14825}
14826
14827/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14828///
14829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14830#[inline]
14831#[target_feature(enable = "avx512f,avx512vl")]
14832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14833#[cfg_attr(test, assert_instr(vpmovusqb))]
14834pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14835    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14836}
14837
14838/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14839///
14840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14841#[inline]
14842#[target_feature(enable = "avx512f,avx512vl")]
14843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14844#[cfg_attr(test, assert_instr(vpmovusqb))]
14845pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14846    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14847}
14848
14849/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14850///
14851/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14852/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14853/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14854/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14855/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14856/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14857///
14858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14859#[inline]
14860#[target_feature(enable = "avx512f")]
14861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14862#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14863#[rustc_legacy_const_generics(1)]
14864pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14865    unsafe {
14866        static_assert_rounding!(ROUNDING);
14867        let a = a.as_f32x16();
14868        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14869        transmute(r)
14870    }
14871}
14872
14873/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14874///
14875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14881///
14882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14883#[inline]
14884#[target_feature(enable = "avx512f")]
14885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14886#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14887#[rustc_legacy_const_generics(3)]
14888pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14889    src: __m512i,
14890    k: __mmask16,
14891    a: __m512,
14892) -> __m512i {
14893    unsafe {
14894        static_assert_rounding!(ROUNDING);
14895        let a = a.as_f32x16();
14896        let src = src.as_i32x16();
14897        let r = vcvtps2dq(a, src, k, ROUNDING);
14898        transmute(r)
14899    }
14900}
14901
14902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14903///
14904/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14905/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14906/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14907/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14908/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14909/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14910///
14911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14912#[inline]
14913#[target_feature(enable = "avx512f")]
14914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14915#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14916#[rustc_legacy_const_generics(2)]
14917pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14918    unsafe {
14919        static_assert_rounding!(ROUNDING);
14920        let a = a.as_f32x16();
14921        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14922        transmute(r)
14923    }
14924}
14925
14926/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14927///
14928/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14929/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14930/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14931/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14932/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14933/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14934///
14935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14936#[inline]
14937#[target_feature(enable = "avx512f")]
14938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14939#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14940#[rustc_legacy_const_generics(1)]
14941pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14942    unsafe {
14943        static_assert_rounding!(ROUNDING);
14944        let a = a.as_f32x16();
14945        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14946        transmute(r)
14947    }
14948}
14949
14950/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14951///
14952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14958///
14959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14960#[inline]
14961#[target_feature(enable = "avx512f")]
14962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14963#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14964#[rustc_legacy_const_generics(3)]
14965pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14966    src: __m512i,
14967    k: __mmask16,
14968    a: __m512,
14969) -> __m512i {
14970    unsafe {
14971        static_assert_rounding!(ROUNDING);
14972        let a = a.as_f32x16();
14973        let src = src.as_u32x16();
14974        let r = vcvtps2udq(a, src, k, ROUNDING);
14975        transmute(r)
14976    }
14977}
14978
14979/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14980///
14981/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14982/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14983/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14984/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14985/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14986/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14987///
14988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14989#[inline]
14990#[target_feature(enable = "avx512f")]
14991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14992#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14993#[rustc_legacy_const_generics(2)]
14994pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14995    unsafe {
14996        static_assert_rounding!(ROUNDING);
14997        let a = a.as_f32x16();
14998        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14999        transmute(r)
15000    }
15001}
15002
15003/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
15004/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15005///
15006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
15007#[inline]
15008#[target_feature(enable = "avx512f")]
15009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15010#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15011#[rustc_legacy_const_generics(1)]
15012pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
15013    unsafe {
15014        static_assert_sae!(SAE);
15015        let a = a.as_f32x8();
15016        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
15017        transmute(r)
15018    }
15019}
15020
15021/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15023///
15024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
15025#[inline]
15026#[target_feature(enable = "avx512f")]
15027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15028#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15029#[rustc_legacy_const_generics(3)]
15030pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
15031    unsafe {
15032        static_assert_sae!(SAE);
15033        let a = a.as_f32x8();
15034        let src = src.as_f64x8();
15035        let r = vcvtps2pd(a, src, k, SAE);
15036        transmute(r)
15037    }
15038}
15039
15040/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15041/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15042///
15043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
15044#[inline]
15045#[target_feature(enable = "avx512f")]
15046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15047#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15048#[rustc_legacy_const_generics(2)]
15049pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
15050    unsafe {
15051        static_assert_sae!(SAE);
15052        let a = a.as_f32x8();
15053        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
15054        transmute(r)
15055    }
15056}
15057
15058/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
15059///
15060/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15061/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15062/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15063/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15064/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15065/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15066///
15067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
15068#[inline]
15069#[target_feature(enable = "avx512f")]
15070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15071#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15072#[rustc_legacy_const_generics(1)]
15073pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15074    unsafe {
15075        static_assert_rounding!(ROUNDING);
15076        let a = a.as_f64x8();
15077        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
15078        transmute(r)
15079    }
15080}
15081
15082/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15083///
15084/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15085/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15086/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15087/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15088/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15089/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15090///
15091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
15092#[inline]
15093#[target_feature(enable = "avx512f")]
15094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15095#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15096#[rustc_legacy_const_generics(3)]
15097pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
15098    src: __m256i,
15099    k: __mmask8,
15100    a: __m512d,
15101) -> __m256i {
15102    unsafe {
15103        static_assert_rounding!(ROUNDING);
15104        let a = a.as_f64x8();
15105        let src = src.as_i32x8();
15106        let r = vcvtpd2dq(a, src, k, ROUNDING);
15107        transmute(r)
15108    }
15109}
15110
15111/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15112///
15113/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15114/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15115/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15116/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15117/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15118/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15119///
15120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
15121#[inline]
15122#[target_feature(enable = "avx512f")]
15123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15124#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15125#[rustc_legacy_const_generics(2)]
15126pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15127    unsafe {
15128        static_assert_rounding!(ROUNDING);
15129        let a = a.as_f64x8();
15130        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
15131        transmute(r)
15132    }
15133}
15134
15135/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
15136///
15137/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15138/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15139/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15140/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15141/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15142/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15143///
15144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
15145#[inline]
15146#[target_feature(enable = "avx512f")]
15147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15148#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15149#[rustc_legacy_const_generics(1)]
15150pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15151    unsafe {
15152        static_assert_rounding!(ROUNDING);
15153        let a = a.as_f64x8();
15154        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
15155        transmute(r)
15156    }
15157}
15158
15159/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15160///
15161/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15162/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15163/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15164/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15165/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15166/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15167///
15168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
15169#[inline]
15170#[target_feature(enable = "avx512f")]
15171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15172#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15173#[rustc_legacy_const_generics(3)]
15174pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
15175    src: __m256i,
15176    k: __mmask8,
15177    a: __m512d,
15178) -> __m256i {
15179    unsafe {
15180        static_assert_rounding!(ROUNDING);
15181        let a = a.as_f64x8();
15182        let src = src.as_u32x8();
15183        let r = vcvtpd2udq(a, src, k, ROUNDING);
15184        transmute(r)
15185    }
15186}
15187
15188/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15189///
15190/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15191/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15192/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15193/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15194/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15195/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15196///
15197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
15198#[inline]
15199#[target_feature(enable = "avx512f")]
15200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15201#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15202#[rustc_legacy_const_generics(2)]
15203pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15204    unsafe {
15205        static_assert_rounding!(ROUNDING);
15206        let a = a.as_f64x8();
15207        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
15208        transmute(r)
15209    }
15210}
15211
15212/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15213///
15214/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15215/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15216/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15217/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15218/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15219/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15220///
15221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
15222#[inline]
15223#[target_feature(enable = "avx512f")]
15224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15225#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15226#[rustc_legacy_const_generics(1)]
15227pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
15228    unsafe {
15229        static_assert_rounding!(ROUNDING);
15230        let a = a.as_f64x8();
15231        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
15232        transmute(r)
15233    }
15234}
15235
15236/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15237///
15238/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15239/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15240/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15241/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15242/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15243/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15244///
15245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
15246#[inline]
15247#[target_feature(enable = "avx512f")]
15248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15249#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15250#[rustc_legacy_const_generics(3)]
15251pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
15252    src: __m256,
15253    k: __mmask8,
15254    a: __m512d,
15255) -> __m256 {
15256    unsafe {
15257        static_assert_rounding!(ROUNDING);
15258        let a = a.as_f64x8();
15259        let src = src.as_f32x8();
15260        let r = vcvtpd2ps(a, src, k, ROUNDING);
15261        transmute(r)
15262    }
15263}
15264
15265/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15266///
15267/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15273///
15274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
15275#[inline]
15276#[target_feature(enable = "avx512f")]
15277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15278#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15279#[rustc_legacy_const_generics(2)]
15280pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
15281    unsafe {
15282        static_assert_rounding!(ROUNDING);
15283        let a = a.as_f64x8();
15284        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
15285        transmute(r)
15286    }
15287}
15288
15289/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15290///
15291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15297///
15298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
15299#[inline]
15300#[target_feature(enable = "avx512f")]
15301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15302#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15303#[rustc_legacy_const_generics(1)]
15304pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15305    unsafe {
15306        static_assert_rounding!(ROUNDING);
15307        let a = a.as_i32x16();
15308        let r = vcvtdq2ps(a, ROUNDING);
15309        transmute(r)
15310    }
15311}
15312
15313/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15314///
15315/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15316/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15317/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15318/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15319/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15320/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15321///
15322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
15323#[inline]
15324#[target_feature(enable = "avx512f")]
15325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15326#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15327#[rustc_legacy_const_generics(3)]
15328pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
15329    src: __m512,
15330    k: __mmask16,
15331    a: __m512i,
15332) -> __m512 {
15333    unsafe {
15334        static_assert_rounding!(ROUNDING);
15335        let a = a.as_i32x16();
15336        let r = vcvtdq2ps(a, ROUNDING);
15337        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15338    }
15339}
15340
15341/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15342///
15343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15349///
15350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
15351#[inline]
15352#[target_feature(enable = "avx512f")]
15353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15354#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15355#[rustc_legacy_const_generics(2)]
15356pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15357    unsafe {
15358        static_assert_rounding!(ROUNDING);
15359        let a = a.as_i32x16();
15360        let r = vcvtdq2ps(a, ROUNDING);
15361        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15362    }
15363}
15364
15365/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15366///
15367/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15368/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15369/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15370/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15371/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15372/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15373///
15374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
15375#[inline]
15376#[target_feature(enable = "avx512f")]
15377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15378#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15379#[rustc_legacy_const_generics(1)]
15380pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15381    unsafe {
15382        static_assert_rounding!(ROUNDING);
15383        let a = a.as_u32x16();
15384        let r = vcvtudq2ps(a, ROUNDING);
15385        transmute(r)
15386    }
15387}
15388
15389/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15390///
15391/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15392/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15393/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15394/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15395/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15396/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15397///
15398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15399#[inline]
15400#[target_feature(enable = "avx512f")]
15401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15402#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15403#[rustc_legacy_const_generics(3)]
15404pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15405    src: __m512,
15406    k: __mmask16,
15407    a: __m512i,
15408) -> __m512 {
15409    unsafe {
15410        static_assert_rounding!(ROUNDING);
15411        let a = a.as_u32x16();
15412        let r = vcvtudq2ps(a, ROUNDING);
15413        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15414    }
15415}
15416
15417/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15418///
15419/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15420/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15421/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15422/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15423/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15424/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15425///
15426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15427#[inline]
15428#[target_feature(enable = "avx512f")]
15429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15430#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15431#[rustc_legacy_const_generics(2)]
15432pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15433    unsafe {
15434        static_assert_rounding!(ROUNDING);
15435        let a = a.as_u32x16();
15436        let r = vcvtudq2ps(a, ROUNDING);
15437        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15438    }
15439}
15440
15441/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15443///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15444///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15445///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15446///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15447///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15448///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15449///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15450///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15451///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15452///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15453///
15454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15455#[inline]
15456#[target_feature(enable = "avx512f")]
15457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15458#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15459#[rustc_legacy_const_generics(1)]
15460pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15461    unsafe {
15462        static_assert_extended_rounding!(ROUNDING);
15463        let a = a.as_f32x16();
15464        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15465        transmute(r)
15466    }
15467}
15468
15469/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15471///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15472///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15473///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15474///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15475///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15476///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15477///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15478///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15479///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15480///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15481///
15482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15483#[inline]
15484#[target_feature(enable = "avx512f")]
15485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15486#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15487#[rustc_legacy_const_generics(3)]
15488pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15489    src: __m256i,
15490    k: __mmask16,
15491    a: __m512,
15492) -> __m256i {
15493    unsafe {
15494        static_assert_extended_rounding!(ROUNDING);
15495        let a = a.as_f32x16();
15496        let src = src.as_i16x16();
15497        let r = vcvtps2ph(a, ROUNDING, src, k);
15498        transmute(r)
15499    }
15500}
15501
15502/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15503/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15504///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15505///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15506///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15507///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15508///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15509///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15510///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15511///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15512///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15513///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15514///
15515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15516#[inline]
15517#[target_feature(enable = "avx512f")]
15518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15519#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15520#[rustc_legacy_const_generics(2)]
15521pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15522    unsafe {
15523        static_assert_extended_rounding!(ROUNDING);
15524        let a = a.as_f32x16();
15525        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15526        transmute(r)
15527    }
15528}
15529
15530/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15532/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15533/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15534/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15535/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15536/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15537///
15538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15539#[inline]
15540#[target_feature(enable = "avx512f,avx512vl")]
15541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15542#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15543#[rustc_legacy_const_generics(3)]
15544pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15545    src: __m128i,
15546    k: __mmask8,
15547    a: __m256,
15548) -> __m128i {
15549    unsafe {
15550        static_assert_uimm_bits!(IMM8, 8);
15551        let a = a.as_f32x8();
15552        let src = src.as_i16x8();
15553        let r = vcvtps2ph256(a, IMM8, src, k);
15554        transmute(r)
15555    }
15556}
15557
15558/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15559/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15565///
15566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15567#[inline]
15568#[target_feature(enable = "avx512f,avx512vl")]
15569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15570#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15571#[rustc_legacy_const_generics(2)]
15572pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15573    unsafe {
15574        static_assert_uimm_bits!(IMM8, 8);
15575        let a = a.as_f32x8();
15576        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15577        transmute(r)
15578    }
15579}
15580
15581/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15582/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15583/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15584/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15585/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15586/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15587/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15588///
15589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15590#[inline]
15591#[target_feature(enable = "avx512f,avx512vl")]
15592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15593#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15594#[rustc_legacy_const_generics(3)]
15595pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15596    unsafe {
15597        static_assert_uimm_bits!(IMM8, 8);
15598        let a = a.as_f32x4();
15599        let src = src.as_i16x8();
15600        let r = vcvtps2ph128(a, IMM8, src, k);
15601        transmute(r)
15602    }
15603}
15604
15605/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15606/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15607/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15608/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15609/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15610/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15611/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15612///
15613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15614#[inline]
15615#[target_feature(enable = "avx512f,avx512vl")]
15616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15617#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15618#[rustc_legacy_const_generics(2)]
15619pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15620    unsafe {
15621        static_assert_uimm_bits!(IMM8, 8);
15622        let a = a.as_f32x4();
15623        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15624        transmute(r)
15625    }
15626}
15627
15628/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15629/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15630///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15631///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15632///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15633///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15634///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15635///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15636///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15637///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15638///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15639///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15640///
15641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15642#[inline]
15643#[target_feature(enable = "avx512f")]
15644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15645#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15646#[rustc_legacy_const_generics(1)]
15647pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15648    unsafe {
15649        static_assert_extended_rounding!(ROUNDING);
15650        let a = a.as_f32x16();
15651        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15652        transmute(r)
15653    }
15654}
15655
15656/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15658///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15659///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15660///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15661///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15662///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15663///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15664///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15665///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15666///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15667///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15668///
15669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15670#[inline]
15671#[target_feature(enable = "avx512f")]
15672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15673#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15674#[rustc_legacy_const_generics(3)]
15675pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15676    unsafe {
15677        static_assert_extended_rounding!(ROUNDING);
15678        let a = a.as_f32x16();
15679        let src = src.as_i16x16();
15680        let r = vcvtps2ph(a, ROUNDING, src, k);
15681        transmute(r)
15682    }
15683}
15684
15685/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15686/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15687///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15688///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15689///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15690///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15691///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15692///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15693///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15694///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15695///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15696///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15697///
15698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15699#[inline]
15700#[target_feature(enable = "avx512f")]
15701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15702#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15703#[rustc_legacy_const_generics(2)]
15704pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15705    unsafe {
15706        static_assert_extended_rounding!(ROUNDING);
15707        let a = a.as_f32x16();
15708        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15709        transmute(r)
15710    }
15711}
15712
15713/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15714/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15715/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15716/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15717/// * [`_MM_FROUND_TO_POS_INF`] : round up
15718/// * [`_MM_FROUND_TO_ZERO`] : truncate
15719/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15720///
15721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15722#[inline]
15723#[target_feature(enable = "avx512f,avx512vl")]
15724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15725#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15726#[rustc_legacy_const_generics(3)]
15727pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15728    unsafe {
15729        static_assert_uimm_bits!(IMM8, 8);
15730        let a = a.as_f32x8();
15731        let src = src.as_i16x8();
15732        let r = vcvtps2ph256(a, IMM8, src, k);
15733        transmute(r)
15734    }
15735}
15736
15737/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15738/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15739/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15740/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15741/// * [`_MM_FROUND_TO_POS_INF`] : round up
15742/// * [`_MM_FROUND_TO_ZERO`] : truncate
15743/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15744///
15745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15746#[inline]
15747#[target_feature(enable = "avx512f,avx512vl")]
15748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15749#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15750#[rustc_legacy_const_generics(2)]
15751pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15752    unsafe {
15753        static_assert_uimm_bits!(IMM8, 8);
15754        let a = a.as_f32x8();
15755        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15756        transmute(r)
15757    }
15758}
15759
15760/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15761/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15762/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15763/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15764/// * [`_MM_FROUND_TO_POS_INF`] : round up
15765/// * [`_MM_FROUND_TO_ZERO`] : truncate
15766/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15767///
15768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15769#[inline]
15770#[target_feature(enable = "avx512f,avx512vl")]
15771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15772#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15773#[rustc_legacy_const_generics(3)]
15774pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15775    unsafe {
15776        static_assert_uimm_bits!(IMM8, 8);
15777        let a = a.as_f32x4();
15778        let src = src.as_i16x8();
15779        let r = vcvtps2ph128(a, IMM8, src, k);
15780        transmute(r)
15781    }
15782}
15783
15784/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15785/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15786/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15787/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15788/// * [`_MM_FROUND_TO_POS_INF`] : round up
15789/// * [`_MM_FROUND_TO_ZERO`] : truncate
15790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15791///
15792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15793#[inline]
15794#[target_feature(enable = "avx512f,avx512vl")]
15795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15796#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15797#[rustc_legacy_const_generics(2)]
15798pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15799    unsafe {
15800        static_assert_uimm_bits!(IMM8, 8);
15801        let a = a.as_f32x4();
15802        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15803        transmute(r)
15804    }
15805}
15806
15807/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15808/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15809///
15810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15811#[inline]
15812#[target_feature(enable = "avx512f")]
15813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15814#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15815#[rustc_legacy_const_generics(1)]
15816pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15817    unsafe {
15818        static_assert_sae!(SAE);
15819        let a = a.as_i16x16();
15820        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15821        transmute(r)
15822    }
15823}
15824
15825/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15826/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15827///
15828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15829#[inline]
15830#[target_feature(enable = "avx512f")]
15831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15832#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15833#[rustc_legacy_const_generics(3)]
15834pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15835    unsafe {
15836        static_assert_sae!(SAE);
15837        let a = a.as_i16x16();
15838        let src = src.as_f32x16();
15839        let r = vcvtph2ps(a, src, k, SAE);
15840        transmute(r)
15841    }
15842}
15843
15844/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15846///
15847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15848#[inline]
15849#[target_feature(enable = "avx512f")]
15850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15851#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15852#[rustc_legacy_const_generics(2)]
15853pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15854    unsafe {
15855        static_assert_sae!(SAE);
15856        let a = a.as_i16x16();
15857        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15858        transmute(r)
15859    }
15860}
15861
15862/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15863///
15864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15865#[inline]
15866#[target_feature(enable = "avx512f")]
15867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15868#[cfg_attr(test, assert_instr(vcvtph2ps))]
15869pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15870    unsafe {
15871        transmute(vcvtph2ps(
15872            a.as_i16x16(),
15873            f32x16::ZERO,
15874            0b11111111_11111111,
15875            _MM_FROUND_NO_EXC,
15876        ))
15877    }
15878}
15879
15880/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15881///
15882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15883#[inline]
15884#[target_feature(enable = "avx512f")]
15885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15886#[cfg_attr(test, assert_instr(vcvtph2ps))]
15887pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15888    unsafe {
15889        transmute(vcvtph2ps(
15890            a.as_i16x16(),
15891            src.as_f32x16(),
15892            k,
15893            _MM_FROUND_NO_EXC,
15894        ))
15895    }
15896}
15897
15898/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15899///
15900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15901#[inline]
15902#[target_feature(enable = "avx512f")]
15903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15904#[cfg_attr(test, assert_instr(vcvtph2ps))]
15905pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15906    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15907}
15908
15909/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15910///
15911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15912#[inline]
15913#[target_feature(enable = "avx512f,avx512vl")]
15914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15915#[cfg_attr(test, assert_instr(vcvtph2ps))]
15916pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15917    unsafe {
15918        let convert = _mm256_cvtph_ps(a);
15919        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15920    }
15921}
15922
15923/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15924///
15925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15926#[inline]
15927#[target_feature(enable = "avx512f,avx512vl")]
15928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15929#[cfg_attr(test, assert_instr(vcvtph2ps))]
15930pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15931    unsafe {
15932        let convert = _mm256_cvtph_ps(a);
15933        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15934    }
15935}
15936
15937/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15938///
15939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15940#[inline]
15941#[target_feature(enable = "avx512f,avx512vl")]
15942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15943#[cfg_attr(test, assert_instr(vcvtph2ps))]
15944pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15945    unsafe {
15946        let convert = _mm_cvtph_ps(a);
15947        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15948    }
15949}
15950
15951/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15952///
15953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15954#[inline]
15955#[target_feature(enable = "avx512f,avx512vl")]
15956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15957#[cfg_attr(test, assert_instr(vcvtph2ps))]
15958pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15959    unsafe {
15960        let convert = _mm_cvtph_ps(a);
15961        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15962    }
15963}
15964
15965/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15967///
15968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15969#[inline]
15970#[target_feature(enable = "avx512f")]
15971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15972#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15973#[rustc_legacy_const_generics(1)]
15974pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15975    unsafe {
15976        static_assert_sae!(SAE);
15977        let a = a.as_f32x16();
15978        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15979        transmute(r)
15980    }
15981}
15982
15983/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15984/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15985///
15986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15987#[inline]
15988#[target_feature(enable = "avx512f")]
15989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15990#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15991#[rustc_legacy_const_generics(3)]
15992pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15993    src: __m512i,
15994    k: __mmask16,
15995    a: __m512,
15996) -> __m512i {
15997    unsafe {
15998        static_assert_sae!(SAE);
15999        let a = a.as_f32x16();
16000        let src = src.as_i32x16();
16001        let r = vcvttps2dq(a, src, k, SAE);
16002        transmute(r)
16003    }
16004}
16005
16006/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16007/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16008///
16009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
16010#[inline]
16011#[target_feature(enable = "avx512f")]
16012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16013#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
16014#[rustc_legacy_const_generics(2)]
16015pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16016    unsafe {
16017        static_assert_sae!(SAE);
16018        let a = a.as_f32x16();
16019        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
16020        transmute(r)
16021    }
16022}
16023
16024/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16026///
16027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
16028#[inline]
16029#[target_feature(enable = "avx512f")]
16030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16031#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16032#[rustc_legacy_const_generics(1)]
16033pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
16034    unsafe {
16035        static_assert_sae!(SAE);
16036        let a = a.as_f32x16();
16037        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
16038        transmute(r)
16039    }
16040}
16041
16042/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16043/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16044///
16045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
16046#[inline]
16047#[target_feature(enable = "avx512f")]
16048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16049#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16050#[rustc_legacy_const_generics(3)]
16051pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
16052    src: __m512i,
16053    k: __mmask16,
16054    a: __m512,
16055) -> __m512i {
16056    unsafe {
16057        static_assert_sae!(SAE);
16058        let a = a.as_f32x16();
16059        let src = src.as_u32x16();
16060        let r = vcvttps2udq(a, src, k, SAE);
16061        transmute(r)
16062    }
16063}
16064
16065/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16066/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16067///
16068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
16069#[inline]
16070#[target_feature(enable = "avx512f")]
16071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16072#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16073#[rustc_legacy_const_generics(2)]
16074pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16075    unsafe {
16076        static_assert_sae!(SAE);
16077        let a = a.as_f32x16();
16078        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
16079        transmute(r)
16080    }
16081}
16082
16083/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
16084/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16085///
16086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
16087#[inline]
16088#[target_feature(enable = "avx512f")]
16089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16090#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16091#[rustc_legacy_const_generics(1)]
16092pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
16093    unsafe {
16094        static_assert_sae!(SAE);
16095        let a = a.as_f64x8();
16096        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
16097        transmute(r)
16098    }
16099}
16100
16101/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16102/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16103///
16104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
16105#[inline]
16106#[target_feature(enable = "avx512f")]
16107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16108#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16109#[rustc_legacy_const_generics(3)]
16110pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
16111    src: __m256i,
16112    k: __mmask8,
16113    a: __m512d,
16114) -> __m256i {
16115    unsafe {
16116        static_assert_sae!(SAE);
16117        let a = a.as_f64x8();
16118        let src = src.as_i32x8();
16119        let r = vcvttpd2dq(a, src, k, SAE);
16120        transmute(r)
16121    }
16122}
16123
16124/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16125/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16126///
16127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
16128#[inline]
16129#[target_feature(enable = "avx512f")]
16130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16131#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16132#[rustc_legacy_const_generics(2)]
16133pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16134    unsafe {
16135        static_assert_sae!(SAE);
16136        let a = a.as_f64x8();
16137        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
16138        transmute(r)
16139    }
16140}
16141
16142/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16143/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16144///
16145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
16146#[inline]
16147#[target_feature(enable = "avx512f")]
16148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16149#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16150#[rustc_legacy_const_generics(1)]
16151pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
16152    unsafe {
16153        static_assert_sae!(SAE);
16154        let a = a.as_f64x8();
16155        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
16156        transmute(r)
16157    }
16158}
16159
16160/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16162///
16163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
16164#[inline]
16165#[target_feature(enable = "avx512f")]
16166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16167#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16168#[rustc_legacy_const_generics(3)]
16169pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
16170    src: __m256i,
16171    k: __mmask8,
16172    a: __m512d,
16173) -> __m256i {
16174    unsafe {
16175        static_assert_sae!(SAE);
16176        let a = a.as_f64x8();
16177        let src = src.as_i32x8();
16178        let r = vcvttpd2udq(a, src, k, SAE);
16179        transmute(r)
16180    }
16181}
16182
16183/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16184///
16185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
16186#[inline]
16187#[target_feature(enable = "avx512f")]
16188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16189#[cfg_attr(test, assert_instr(vcvttps2dq))]
16190pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
16191    unsafe {
16192        transmute(vcvttps2dq(
16193            a.as_f32x16(),
16194            i32x16::ZERO,
16195            0b11111111_11111111,
16196            _MM_FROUND_CUR_DIRECTION,
16197        ))
16198    }
16199}
16200
16201/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16202///
16203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
16204#[inline]
16205#[target_feature(enable = "avx512f")]
16206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16207#[cfg_attr(test, assert_instr(vcvttps2dq))]
16208pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16209    unsafe {
16210        transmute(vcvttps2dq(
16211            a.as_f32x16(),
16212            src.as_i32x16(),
16213            k,
16214            _MM_FROUND_CUR_DIRECTION,
16215        ))
16216    }
16217}
16218
16219/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16220///
16221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
16222#[inline]
16223#[target_feature(enable = "avx512f")]
16224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16225#[cfg_attr(test, assert_instr(vcvttps2dq))]
16226pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
16227    unsafe {
16228        transmute(vcvttps2dq(
16229            a.as_f32x16(),
16230            i32x16::ZERO,
16231            k,
16232            _MM_FROUND_CUR_DIRECTION,
16233        ))
16234    }
16235}
16236
16237/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16238///
16239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
16240#[inline]
16241#[target_feature(enable = "avx512f,avx512vl")]
16242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16243#[cfg_attr(test, assert_instr(vcvttps2dq))]
16244pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16245    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
16246}
16247
16248/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16249///
16250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
16251#[inline]
16252#[target_feature(enable = "avx512f,avx512vl")]
16253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16254#[cfg_attr(test, assert_instr(vcvttps2dq))]
16255pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
16256    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
16257}
16258
16259/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16260///
16261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
16262#[inline]
16263#[target_feature(enable = "avx512f,avx512vl")]
16264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16265#[cfg_attr(test, assert_instr(vcvttps2dq))]
16266pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16267    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
16268}
16269
16270/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16271///
16272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
16273#[inline]
16274#[target_feature(enable = "avx512f,avx512vl")]
16275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16276#[cfg_attr(test, assert_instr(vcvttps2dq))]
16277pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
16278    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
16279}
16280
16281/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16282///
16283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
16284#[inline]
16285#[target_feature(enable = "avx512f")]
16286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16287#[cfg_attr(test, assert_instr(vcvttps2udq))]
16288pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
16289    unsafe {
16290        transmute(vcvttps2udq(
16291            a.as_f32x16(),
16292            u32x16::ZERO,
16293            0b11111111_11111111,
16294            _MM_FROUND_CUR_DIRECTION,
16295        ))
16296    }
16297}
16298
16299/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16300///
16301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
16302#[inline]
16303#[target_feature(enable = "avx512f")]
16304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16305#[cfg_attr(test, assert_instr(vcvttps2udq))]
16306pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16307    unsafe {
16308        transmute(vcvttps2udq(
16309            a.as_f32x16(),
16310            src.as_u32x16(),
16311            k,
16312            _MM_FROUND_CUR_DIRECTION,
16313        ))
16314    }
16315}
16316
16317/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16318///
16319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
16320#[inline]
16321#[target_feature(enable = "avx512f")]
16322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16323#[cfg_attr(test, assert_instr(vcvttps2udq))]
16324pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
16325    unsafe {
16326        transmute(vcvttps2udq(
16327            a.as_f32x16(),
16328            u32x16::ZERO,
16329            k,
16330            _MM_FROUND_CUR_DIRECTION,
16331        ))
16332    }
16333}
16334
16335/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16336///
16337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
16338#[inline]
16339#[target_feature(enable = "avx512f,avx512vl")]
16340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16341#[cfg_attr(test, assert_instr(vcvttps2udq))]
16342pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
16343    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
16344}
16345
16346/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16347///
16348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
16349#[inline]
16350#[target_feature(enable = "avx512f,avx512vl")]
16351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16352#[cfg_attr(test, assert_instr(vcvttps2udq))]
16353pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16354    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
16355}
16356
16357/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16358///
16359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
16360#[inline]
16361#[target_feature(enable = "avx512f,avx512vl")]
16362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16363#[cfg_attr(test, assert_instr(vcvttps2udq))]
16364pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
16365    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
16366}
16367
16368/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16369///
16370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
16371#[inline]
16372#[target_feature(enable = "avx512f,avx512vl")]
16373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16374#[cfg_attr(test, assert_instr(vcvttps2udq))]
16375pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
16376    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
16377}
16378
16379/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16380///
16381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
16382#[inline]
16383#[target_feature(enable = "avx512f,avx512vl")]
16384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16385#[cfg_attr(test, assert_instr(vcvttps2udq))]
16386pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16387    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
16388}
16389
16390/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16391///
16392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16393#[inline]
16394#[target_feature(enable = "avx512f,avx512vl")]
16395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16396#[cfg_attr(test, assert_instr(vcvttps2udq))]
16397pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16398    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
16399}
16400
16401/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16402/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16403///
16404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16405#[inline]
16406#[target_feature(enable = "avx512f")]
16407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16408#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16409#[rustc_legacy_const_generics(2)]
16410pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16411    unsafe {
16412        static_assert_sae!(SAE);
16413        let a = a.as_f64x8();
16414        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
16415        transmute(r)
16416    }
16417}
16418
16419/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16425#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16426pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16427    unsafe {
16428        transmute(vcvttpd2dq(
16429            a.as_f64x8(),
16430            i32x8::ZERO,
16431            0b11111111,
16432            _MM_FROUND_CUR_DIRECTION,
16433        ))
16434    }
16435}
16436
16437/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16438///
16439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16440#[inline]
16441#[target_feature(enable = "avx512f")]
16442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16443#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16444pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16445    unsafe {
16446        transmute(vcvttpd2dq(
16447            a.as_f64x8(),
16448            src.as_i32x8(),
16449            k,
16450            _MM_FROUND_CUR_DIRECTION,
16451        ))
16452    }
16453}
16454
16455/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16456///
16457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16458#[inline]
16459#[target_feature(enable = "avx512f")]
16460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16461#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16462pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16463    unsafe {
16464        transmute(vcvttpd2dq(
16465            a.as_f64x8(),
16466            i32x8::ZERO,
16467            k,
16468            _MM_FROUND_CUR_DIRECTION,
16469        ))
16470    }
16471}
16472
16473/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16474///
16475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16476#[inline]
16477#[target_feature(enable = "avx512f,avx512vl")]
16478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16479#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16480pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16481    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16482}
16483
16484/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16485///
16486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16487#[inline]
16488#[target_feature(enable = "avx512f,avx512vl")]
16489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16490#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16491pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16492    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16493}
16494
16495/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16496///
16497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16498#[inline]
16499#[target_feature(enable = "avx512f,avx512vl")]
16500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16501#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16502pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16503    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16504}
16505
16506/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16507///
16508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16509#[inline]
16510#[target_feature(enable = "avx512f,avx512vl")]
16511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16512#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16513pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16514    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16515}
16516
16517/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16518///
16519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16520#[inline]
16521#[target_feature(enable = "avx512f")]
16522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16523#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16524pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16525    unsafe {
16526        transmute(vcvttpd2udq(
16527            a.as_f64x8(),
16528            i32x8::ZERO,
16529            0b11111111,
16530            _MM_FROUND_CUR_DIRECTION,
16531        ))
16532    }
16533}
16534
16535/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16536///
16537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16538#[inline]
16539#[target_feature(enable = "avx512f")]
16540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16541#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16542pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16543    unsafe {
16544        transmute(vcvttpd2udq(
16545            a.as_f64x8(),
16546            src.as_i32x8(),
16547            k,
16548            _MM_FROUND_CUR_DIRECTION,
16549        ))
16550    }
16551}
16552
16553/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16554///
16555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16556#[inline]
16557#[target_feature(enable = "avx512f")]
16558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16559#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16560pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16561    unsafe {
16562        transmute(vcvttpd2udq(
16563            a.as_f64x8(),
16564            i32x8::ZERO,
16565            k,
16566            _MM_FROUND_CUR_DIRECTION,
16567        ))
16568    }
16569}
16570
16571/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16572///
16573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16574#[inline]
16575#[target_feature(enable = "avx512f,avx512vl")]
16576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16577#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16578pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16579    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16580}
16581
16582/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16583///
16584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16585#[inline]
16586#[target_feature(enable = "avx512f,avx512vl")]
16587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16588#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16589pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16590    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16591}
16592
16593/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16594///
16595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16596#[inline]
16597#[target_feature(enable = "avx512f,avx512vl")]
16598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16599#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16600pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16601    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16602}
16603
16604/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16605///
16606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16607#[inline]
16608#[target_feature(enable = "avx512f,avx512vl")]
16609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16610#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16611pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16612    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16613}
16614
16615/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16616///
16617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16618#[inline]
16619#[target_feature(enable = "avx512f,avx512vl")]
16620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16621#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16622pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16623    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16624}
16625
16626/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16627///
16628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16629#[inline]
16630#[target_feature(enable = "avx512f,avx512vl")]
16631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16632#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16633pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16634    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16635}
16636
16637/// Returns vector of type `__m512d` with all elements set to zero.
16638///
16639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16640#[inline]
16641#[target_feature(enable = "avx512f")]
16642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16643#[cfg_attr(test, assert_instr(vxorps))]
16644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16645pub const fn _mm512_setzero_pd() -> __m512d {
16646    // All-0 is a properly initialized __m512d
16647    unsafe { const { mem::zeroed() } }
16648}
16649
16650/// Returns vector of type `__m512` with all elements set to zero.
16651///
16652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16653#[inline]
16654#[target_feature(enable = "avx512f")]
16655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16656#[cfg_attr(test, assert_instr(vxorps))]
16657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16658pub const fn _mm512_setzero_ps() -> __m512 {
16659    // All-0 is a properly initialized __m512
16660    unsafe { const { mem::zeroed() } }
16661}
16662
16663/// Return vector of type `__m512` with all elements set to zero.
16664///
16665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16666#[inline]
16667#[target_feature(enable = "avx512f")]
16668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16669#[cfg_attr(test, assert_instr(vxorps))]
16670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16671pub const fn _mm512_setzero() -> __m512 {
16672    // All-0 is a properly initialized __m512
16673    unsafe { const { mem::zeroed() } }
16674}
16675
16676/// Returns vector of type `__m512i` with all elements set to zero.
16677///
16678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16679#[inline]
16680#[target_feature(enable = "avx512f")]
16681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16682#[cfg_attr(test, assert_instr(vxorps))]
16683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16684pub const fn _mm512_setzero_si512() -> __m512i {
16685    // All-0 is a properly initialized __m512i
16686    unsafe { const { mem::zeroed() } }
16687}
16688
16689/// Return vector of type `__m512i` with all elements set to zero.
16690///
16691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16692#[inline]
16693#[target_feature(enable = "avx512f")]
16694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16695#[cfg_attr(test, assert_instr(vxorps))]
16696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16697pub const fn _mm512_setzero_epi32() -> __m512i {
16698    // All-0 is a properly initialized __m512i
16699    unsafe { const { mem::zeroed() } }
16700}
16701
16702/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16703/// order.
16704///
16705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16706#[inline]
16707#[target_feature(enable = "avx512f")]
16708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16710pub const fn _mm512_setr_epi32(
16711    e15: i32,
16712    e14: i32,
16713    e13: i32,
16714    e12: i32,
16715    e11: i32,
16716    e10: i32,
16717    e9: i32,
16718    e8: i32,
16719    e7: i32,
16720    e6: i32,
16721    e5: i32,
16722    e4: i32,
16723    e3: i32,
16724    e2: i32,
16725    e1: i32,
16726    e0: i32,
16727) -> __m512i {
16728    unsafe {
16729        let r = i32x16::new(
16730            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16731        );
16732        transmute(r)
16733    }
16734}
16735
16736/// Set packed 8-bit integers in dst with the supplied values.
16737///
16738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16739#[inline]
16740#[target_feature(enable = "avx512f")]
16741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16743pub const fn _mm512_set_epi8(
16744    e63: i8,
16745    e62: i8,
16746    e61: i8,
16747    e60: i8,
16748    e59: i8,
16749    e58: i8,
16750    e57: i8,
16751    e56: i8,
16752    e55: i8,
16753    e54: i8,
16754    e53: i8,
16755    e52: i8,
16756    e51: i8,
16757    e50: i8,
16758    e49: i8,
16759    e48: i8,
16760    e47: i8,
16761    e46: i8,
16762    e45: i8,
16763    e44: i8,
16764    e43: i8,
16765    e42: i8,
16766    e41: i8,
16767    e40: i8,
16768    e39: i8,
16769    e38: i8,
16770    e37: i8,
16771    e36: i8,
16772    e35: i8,
16773    e34: i8,
16774    e33: i8,
16775    e32: i8,
16776    e31: i8,
16777    e30: i8,
16778    e29: i8,
16779    e28: i8,
16780    e27: i8,
16781    e26: i8,
16782    e25: i8,
16783    e24: i8,
16784    e23: i8,
16785    e22: i8,
16786    e21: i8,
16787    e20: i8,
16788    e19: i8,
16789    e18: i8,
16790    e17: i8,
16791    e16: i8,
16792    e15: i8,
16793    e14: i8,
16794    e13: i8,
16795    e12: i8,
16796    e11: i8,
16797    e10: i8,
16798    e9: i8,
16799    e8: i8,
16800    e7: i8,
16801    e6: i8,
16802    e5: i8,
16803    e4: i8,
16804    e3: i8,
16805    e2: i8,
16806    e1: i8,
16807    e0: i8,
16808) -> __m512i {
16809    unsafe {
16810        let r = i8x64::new(
16811            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16812            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16813            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16814            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16815        );
16816        transmute(r)
16817    }
16818}
16819
16820/// Set packed 16-bit integers in dst with the supplied values.
16821///
16822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16823#[inline]
16824#[target_feature(enable = "avx512f")]
16825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16827pub const fn _mm512_set_epi16(
16828    e31: i16,
16829    e30: i16,
16830    e29: i16,
16831    e28: i16,
16832    e27: i16,
16833    e26: i16,
16834    e25: i16,
16835    e24: i16,
16836    e23: i16,
16837    e22: i16,
16838    e21: i16,
16839    e20: i16,
16840    e19: i16,
16841    e18: i16,
16842    e17: i16,
16843    e16: i16,
16844    e15: i16,
16845    e14: i16,
16846    e13: i16,
16847    e12: i16,
16848    e11: i16,
16849    e10: i16,
16850    e9: i16,
16851    e8: i16,
16852    e7: i16,
16853    e6: i16,
16854    e5: i16,
16855    e4: i16,
16856    e3: i16,
16857    e2: i16,
16858    e1: i16,
16859    e0: i16,
16860) -> __m512i {
16861    unsafe {
16862        let r = i16x32::new(
16863            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16864            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16865        );
16866        transmute(r)
16867    }
16868}
16869
16870/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16871///
16872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16873#[inline]
16874#[target_feature(enable = "avx512f")]
16875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16877pub const fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16878    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16879}
16880
16881/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16882///
16883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16884#[inline]
16885#[target_feature(enable = "avx512f")]
16886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16888pub const fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16889    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16890}
16891
16892/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16893///
16894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16895#[inline]
16896#[target_feature(enable = "avx512f")]
16897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16899pub const fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16900    _mm512_set_pd(d, c, b, a, d, c, b, a)
16901}
16902
16903/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16904///
16905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16906#[inline]
16907#[target_feature(enable = "avx512f")]
16908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16910pub const fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16911    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16912}
16913
16914/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16915///
16916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16917#[inline]
16918#[target_feature(enable = "avx512f")]
16919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16921pub const fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16922    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16923}
16924
16925/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16926///
16927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16928#[inline]
16929#[target_feature(enable = "avx512f")]
16930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16931#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16932pub const fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16933    _mm512_set_pd(a, b, c, d, a, b, c, d)
16934}
16935
16936/// Set packed 64-bit integers in dst with the supplied values.
16937///
16938/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16939#[inline]
16940#[target_feature(enable = "avx512f")]
16941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16943pub const fn _mm512_set_epi64(
16944    e0: i64,
16945    e1: i64,
16946    e2: i64,
16947    e3: i64,
16948    e4: i64,
16949    e5: i64,
16950    e6: i64,
16951    e7: i64,
16952) -> __m512i {
16953    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16954}
16955
16956/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16957///
16958/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16959#[inline]
16960#[target_feature(enable = "avx512f")]
16961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16963pub const fn _mm512_setr_epi64(
16964    e0: i64,
16965    e1: i64,
16966    e2: i64,
16967    e3: i64,
16968    e4: i64,
16969    e5: i64,
16970    e6: i64,
16971    e7: i64,
16972) -> __m512i {
16973    unsafe {
16974        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16975        transmute(r)
16976    }
16977}
16978
16979/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16980///
16981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16982#[inline]
16983#[target_feature(enable = "avx512f")]
16984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16985#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16986#[rustc_legacy_const_generics(2)]
16987pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16988    offsets: __m256i,
16989    slice: *const f64,
16990) -> __m512d {
16991    static_assert_imm8_scale!(SCALE);
16992    let zero = f64x8::ZERO;
16993    let neg_one = -1;
16994    let slice = slice as *const i8;
16995    let offsets = offsets.as_i32x8();
16996    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16997    transmute(r)
16998}
16999
17000/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17001///
17002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
17003#[inline]
17004#[target_feature(enable = "avx512f")]
17005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17006#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17007#[rustc_legacy_const_generics(4)]
17008pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
17009    src: __m512d,
17010    mask: __mmask8,
17011    offsets: __m256i,
17012    slice: *const f64,
17013) -> __m512d {
17014    static_assert_imm8_scale!(SCALE);
17015    let src = src.as_f64x8();
17016    let slice = slice as *const i8;
17017    let offsets = offsets.as_i32x8();
17018    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
17019    transmute(r)
17020}
17021
17022/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17023///
17024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
17025#[inline]
17026#[target_feature(enable = "avx512f")]
17027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17028#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17029#[rustc_legacy_const_generics(2)]
17030pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
17031    offsets: __m512i,
17032    slice: *const f64,
17033) -> __m512d {
17034    static_assert_imm8_scale!(SCALE);
17035    let zero = f64x8::ZERO;
17036    let neg_one = -1;
17037    let slice = slice as *const i8;
17038    let offsets = offsets.as_i64x8();
17039    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
17040    transmute(r)
17041}
17042
17043/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17044///
17045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
17046#[inline]
17047#[target_feature(enable = "avx512f")]
17048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17049#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17050#[rustc_legacy_const_generics(4)]
17051pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
17052    src: __m512d,
17053    mask: __mmask8,
17054    offsets: __m512i,
17055    slice: *const f64,
17056) -> __m512d {
17057    static_assert_imm8_scale!(SCALE);
17058    let src = src.as_f64x8();
17059    let slice = slice as *const i8;
17060    let offsets = offsets.as_i64x8();
17061    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
17062    transmute(r)
17063}
17064
17065/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17066///
17067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
17068#[inline]
17069#[target_feature(enable = "avx512f")]
17070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17071#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17072#[rustc_legacy_const_generics(2)]
17073pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
17074    static_assert_imm8_scale!(SCALE);
17075    let zero = f32x8::ZERO;
17076    let neg_one = -1;
17077    let slice = slice as *const i8;
17078    let offsets = offsets.as_i64x8();
17079    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
17080    transmute(r)
17081}
17082
17083/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17084///
17085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
17086#[inline]
17087#[target_feature(enable = "avx512f")]
17088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17089#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17090#[rustc_legacy_const_generics(4)]
17091pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
17092    src: __m256,
17093    mask: __mmask8,
17094    offsets: __m512i,
17095    slice: *const f32,
17096) -> __m256 {
17097    static_assert_imm8_scale!(SCALE);
17098    let src = src.as_f32x8();
17099    let slice = slice as *const i8;
17100    let offsets = offsets.as_i64x8();
17101    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
17102    transmute(r)
17103}
17104
17105/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17106///
17107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
17108#[inline]
17109#[target_feature(enable = "avx512f")]
17110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17111#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17112#[rustc_legacy_const_generics(2)]
17113pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
17114    static_assert_imm8_scale!(SCALE);
17115    let zero = f32x16::ZERO;
17116    let neg_one = -1;
17117    let slice = slice as *const i8;
17118    let offsets = offsets.as_i32x16();
17119    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
17120    transmute(r)
17121}
17122
17123/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17129#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17130#[rustc_legacy_const_generics(4)]
17131pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
17132    src: __m512,
17133    mask: __mmask16,
17134    offsets: __m512i,
17135    slice: *const f32,
17136) -> __m512 {
17137    static_assert_imm8_scale!(SCALE);
17138    let src = src.as_f32x16();
17139    let slice = slice as *const i8;
17140    let offsets = offsets.as_i32x16();
17141    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
17142    transmute(r)
17143}
17144
17145/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17146///
17147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
17148#[inline]
17149#[target_feature(enable = "avx512f")]
17150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17151#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17152#[rustc_legacy_const_generics(2)]
17153pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
17154    offsets: __m512i,
17155    slice: *const i32,
17156) -> __m512i {
17157    static_assert_imm8_scale!(SCALE);
17158    let zero = i32x16::ZERO;
17159    let neg_one = -1;
17160    let slice = slice as *const i8;
17161    let offsets = offsets.as_i32x16();
17162    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
17163    transmute(r)
17164}
17165
17166/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17167///
17168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
17169#[inline]
17170#[target_feature(enable = "avx512f")]
17171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17172#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17173#[rustc_legacy_const_generics(4)]
17174pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
17175    src: __m512i,
17176    mask: __mmask16,
17177    offsets: __m512i,
17178    slice: *const i32,
17179) -> __m512i {
17180    static_assert_imm8_scale!(SCALE);
17181    let src = src.as_i32x16();
17182    let mask = mask as i16;
17183    let slice = slice as *const i8;
17184    let offsets = offsets.as_i32x16();
17185    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
17186    transmute(r)
17187}
17188
17189/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17190///
17191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
17192#[inline]
17193#[target_feature(enable = "avx512f")]
17194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17195#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17196#[rustc_legacy_const_generics(2)]
17197pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
17198    offsets: __m256i,
17199    slice: *const i64,
17200) -> __m512i {
17201    static_assert_imm8_scale!(SCALE);
17202    let zero = i64x8::ZERO;
17203    let neg_one = -1;
17204    let slice = slice as *const i8;
17205    let offsets = offsets.as_i32x8();
17206    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
17207    transmute(r)
17208}
17209
17210/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17211///
17212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
17213#[inline]
17214#[target_feature(enable = "avx512f")]
17215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17216#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17217#[rustc_legacy_const_generics(4)]
17218pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
17219    src: __m512i,
17220    mask: __mmask8,
17221    offsets: __m256i,
17222    slice: *const i64,
17223) -> __m512i {
17224    static_assert_imm8_scale!(SCALE);
17225    let src = src.as_i64x8();
17226    let mask = mask as i8;
17227    let slice = slice as *const i8;
17228    let offsets = offsets.as_i32x8();
17229    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
17230    transmute(r)
17231}
17232
17233/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17234///
17235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
17236#[inline]
17237#[target_feature(enable = "avx512f")]
17238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17239#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17240#[rustc_legacy_const_generics(2)]
17241pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
17242    offsets: __m512i,
17243    slice: *const i64,
17244) -> __m512i {
17245    static_assert_imm8_scale!(SCALE);
17246    let zero = i64x8::ZERO;
17247    let neg_one = -1;
17248    let slice = slice as *const i8;
17249    let offsets = offsets.as_i64x8();
17250    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
17251    transmute(r)
17252}
17253
17254/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17255///
17256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
17257#[inline]
17258#[target_feature(enable = "avx512f")]
17259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17260#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17261#[rustc_legacy_const_generics(4)]
17262pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
17263    src: __m512i,
17264    mask: __mmask8,
17265    offsets: __m512i,
17266    slice: *const i64,
17267) -> __m512i {
17268    static_assert_imm8_scale!(SCALE);
17269    let src = src.as_i64x8();
17270    let mask = mask as i8;
17271    let slice = slice as *const i8;
17272    let offsets = offsets.as_i64x8();
17273    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
17274    transmute(r)
17275}
17276
17277/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17278///
17279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
17280#[inline]
17281#[target_feature(enable = "avx512f")]
17282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17283#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17284#[rustc_legacy_const_generics(2)]
17285pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
17286    offsets: __m512i,
17287    slice: *const i32,
17288) -> __m256i {
17289    static_assert_imm8_scale!(SCALE);
17290    let zeros = i32x8::ZERO;
17291    let neg_one = -1;
17292    let slice = slice as *const i8;
17293    let offsets = offsets.as_i64x8();
17294    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
17295    transmute(r)
17296}
17297
17298/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17299///
17300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
17301#[inline]
17302#[target_feature(enable = "avx512f")]
17303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17304#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17305#[rustc_legacy_const_generics(4)]
17306pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
17307    src: __m256i,
17308    mask: __mmask8,
17309    offsets: __m512i,
17310    slice: *const i32,
17311) -> __m256i {
17312    static_assert_imm8_scale!(SCALE);
17313    let src = src.as_i32x8();
17314    let mask = mask as i8;
17315    let slice = slice as *const i8;
17316    let offsets = offsets.as_i64x8();
17317    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
17318    transmute(r)
17319}
17320
17321/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17322///
17323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
17324#[inline]
17325#[target_feature(enable = "avx512f")]
17326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17327#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17328#[rustc_legacy_const_generics(3)]
17329pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
17330    slice: *mut f64,
17331    offsets: __m256i,
17332    src: __m512d,
17333) {
17334    static_assert_imm8_scale!(SCALE);
17335    let src = src.as_f64x8();
17336    let neg_one = -1;
17337    let slice = slice as *mut i8;
17338    let offsets = offsets.as_i32x8();
17339    vscatterdpd(slice, neg_one, offsets, src, SCALE);
17340}
17341
17342/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17343///
17344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
17345#[inline]
17346#[target_feature(enable = "avx512f")]
17347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17348#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17349#[rustc_legacy_const_generics(4)]
17350pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
17351    slice: *mut f64,
17352    mask: __mmask8,
17353    offsets: __m256i,
17354    src: __m512d,
17355) {
17356    static_assert_imm8_scale!(SCALE);
17357    let src = src.as_f64x8();
17358    let slice = slice as *mut i8;
17359    let offsets = offsets.as_i32x8();
17360    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
17361}
17362
17363/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17364///
17365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
17366#[inline]
17367#[target_feature(enable = "avx512f")]
17368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17369#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17370#[rustc_legacy_const_generics(3)]
17371pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
17372    slice: *mut f64,
17373    offsets: __m512i,
17374    src: __m512d,
17375) {
17376    static_assert_imm8_scale!(SCALE);
17377    let src = src.as_f64x8();
17378    let neg_one = -1;
17379    let slice = slice as *mut i8;
17380    let offsets = offsets.as_i64x8();
17381    vscatterqpd(slice, neg_one, offsets, src, SCALE);
17382}
17383
17384/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17385///
17386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
17387#[inline]
17388#[target_feature(enable = "avx512f")]
17389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17390#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17391#[rustc_legacy_const_generics(4)]
17392pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
17393    slice: *mut f64,
17394    mask: __mmask8,
17395    offsets: __m512i,
17396    src: __m512d,
17397) {
17398    static_assert_imm8_scale!(SCALE);
17399    let src = src.as_f64x8();
17400    let slice = slice as *mut i8;
17401    let offsets = offsets.as_i64x8();
17402    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
17403}
17404
17405/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17406///
17407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
17408#[inline]
17409#[target_feature(enable = "avx512f")]
17410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17411#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17412#[rustc_legacy_const_generics(3)]
17413pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17414    slice: *mut f32,
17415    offsets: __m512i,
17416    src: __m512,
17417) {
17418    static_assert_imm8_scale!(SCALE);
17419    let src = src.as_f32x16();
17420    let neg_one = -1;
17421    let slice = slice as *mut i8;
17422    let offsets = offsets.as_i32x16();
17423    vscatterdps(slice, neg_one, offsets, src, SCALE);
17424}
17425
17426/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17427///
17428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17429#[inline]
17430#[target_feature(enable = "avx512f")]
17431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17432#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17433#[rustc_legacy_const_generics(4)]
17434pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17435    slice: *mut f32,
17436    mask: __mmask16,
17437    offsets: __m512i,
17438    src: __m512,
17439) {
17440    static_assert_imm8_scale!(SCALE);
17441    let src = src.as_f32x16();
17442    let slice = slice as *mut i8;
17443    let offsets = offsets.as_i32x16();
17444    vscatterdps(slice, mask as i16, offsets, src, SCALE);
17445}
17446
17447/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17448///
17449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17450#[inline]
17451#[target_feature(enable = "avx512f")]
17452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17453#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17454#[rustc_legacy_const_generics(3)]
17455pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17456    slice: *mut f32,
17457    offsets: __m512i,
17458    src: __m256,
17459) {
17460    static_assert_imm8_scale!(SCALE);
17461    let src = src.as_f32x8();
17462    let neg_one = -1;
17463    let slice = slice as *mut i8;
17464    let offsets = offsets.as_i64x8();
17465    vscatterqps(slice, neg_one, offsets, src, SCALE);
17466}
17467
17468/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17469///
17470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17471#[inline]
17472#[target_feature(enable = "avx512f")]
17473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17474#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17475#[rustc_legacy_const_generics(4)]
17476pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17477    slice: *mut f32,
17478    mask: __mmask8,
17479    offsets: __m512i,
17480    src: __m256,
17481) {
17482    static_assert_imm8_scale!(SCALE);
17483    let src = src.as_f32x8();
17484    let slice = slice as *mut i8;
17485    let offsets = offsets.as_i64x8();
17486    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17487}
17488
17489/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17490///
17491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17492#[inline]
17493#[target_feature(enable = "avx512f")]
17494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17495#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17496#[rustc_legacy_const_generics(3)]
17497pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17498    slice: *mut i64,
17499    offsets: __m256i,
17500    src: __m512i,
17501) {
17502    static_assert_imm8_scale!(SCALE);
17503    let src = src.as_i64x8();
17504    let neg_one = -1;
17505    let slice = slice as *mut i8;
17506    let offsets = offsets.as_i32x8();
17507    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17508}
17509
17510/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17511///
17512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17513#[inline]
17514#[target_feature(enable = "avx512f")]
17515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17516#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17517#[rustc_legacy_const_generics(4)]
17518pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17519    slice: *mut i64,
17520    mask: __mmask8,
17521    offsets: __m256i,
17522    src: __m512i,
17523) {
17524    static_assert_imm8_scale!(SCALE);
17525    let src = src.as_i64x8();
17526    let mask = mask as i8;
17527    let slice = slice as *mut i8;
17528    let offsets = offsets.as_i32x8();
17529    vpscatterdq(slice, mask, offsets, src, SCALE);
17530}
17531
17532/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17533///
17534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17535#[inline]
17536#[target_feature(enable = "avx512f")]
17537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17538#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17539#[rustc_legacy_const_generics(3)]
17540pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17541    slice: *mut i64,
17542    offsets: __m512i,
17543    src: __m512i,
17544) {
17545    static_assert_imm8_scale!(SCALE);
17546    let src = src.as_i64x8();
17547    let neg_one = -1;
17548    let slice = slice as *mut i8;
17549    let offsets = offsets.as_i64x8();
17550    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17551}
17552
17553/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17554///
17555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17556#[inline]
17557#[target_feature(enable = "avx512f")]
17558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17559#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17560#[rustc_legacy_const_generics(4)]
17561pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17562    slice: *mut i64,
17563    mask: __mmask8,
17564    offsets: __m512i,
17565    src: __m512i,
17566) {
17567    static_assert_imm8_scale!(SCALE);
17568    let src = src.as_i64x8();
17569    let mask = mask as i8;
17570    let slice = slice as *mut i8;
17571    let offsets = offsets.as_i64x8();
17572    vpscatterqq(slice, mask, offsets, src, SCALE);
17573}
17574
17575/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17576///
17577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17578#[inline]
17579#[target_feature(enable = "avx512f")]
17580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17581#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17582#[rustc_legacy_const_generics(3)]
17583pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17584    slice: *mut i32,
17585    offsets: __m512i,
17586    src: __m512i,
17587) {
17588    static_assert_imm8_scale!(SCALE);
17589    let src = src.as_i32x16();
17590    let neg_one = -1;
17591    let slice = slice as *mut i8;
17592    let offsets = offsets.as_i32x16();
17593    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17594}
17595
17596/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17597///
17598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17599#[inline]
17600#[target_feature(enable = "avx512f")]
17601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17602#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17603#[rustc_legacy_const_generics(4)]
17604pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17605    slice: *mut i32,
17606    mask: __mmask16,
17607    offsets: __m512i,
17608    src: __m512i,
17609) {
17610    static_assert_imm8_scale!(SCALE);
17611    let src = src.as_i32x16();
17612    let mask = mask as i16;
17613    let slice = slice as *mut i8;
17614    let offsets = offsets.as_i32x16();
17615    vpscatterdd(slice, mask, offsets, src, SCALE);
17616}
17617
17618/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17619///
17620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17621#[inline]
17622#[target_feature(enable = "avx512f")]
17623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17624#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17625#[rustc_legacy_const_generics(3)]
17626pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17627    slice: *mut i32,
17628    offsets: __m512i,
17629    src: __m256i,
17630) {
17631    static_assert_imm8_scale!(SCALE);
17632    let src = src.as_i32x8();
17633    let neg_one = -1;
17634    let slice = slice as *mut i8;
17635    let offsets = offsets.as_i64x8();
17636    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17637}
17638
17639/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17640///
17641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17642#[inline]
17643#[target_feature(enable = "avx512f")]
17644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17645#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17646#[rustc_legacy_const_generics(4)]
17647pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17648    slice: *mut i32,
17649    mask: __mmask8,
17650    offsets: __m512i,
17651    src: __m256i,
17652) {
17653    static_assert_imm8_scale!(SCALE);
17654    let src = src.as_i32x8();
17655    let mask = mask as i8;
17656    let slice = slice as *mut i8;
17657    let offsets = offsets.as_i64x8();
17658    vpscatterqd(slice, mask, offsets, src, SCALE);
17659}
17660
17661/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17662/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17663///
17664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17665#[inline]
17666#[target_feature(enable = "avx512f")]
17667#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17668#[rustc_legacy_const_generics(2)]
17669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17670pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17671    vindex: __m512i,
17672    base_addr: *const i64,
17673) -> __m512i {
17674    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17675}
17676
17677/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17678/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17679/// (elements are copied from src when the corresponding mask bit is not set).
17680///
17681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17682#[inline]
17683#[target_feature(enable = "avx512f")]
17684#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17685#[rustc_legacy_const_generics(4)]
17686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17687pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17688    src: __m512i,
17689    k: __mmask8,
17690    vindex: __m512i,
17691    base_addr: *const i64,
17692) -> __m512i {
17693    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17694}
17695
17696/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17697/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17698///
17699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17700#[inline]
17701#[target_feature(enable = "avx512f")]
17702#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17703#[rustc_legacy_const_generics(2)]
17704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17705pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17706    vindex: __m512i,
17707    base_addr: *const f64,
17708) -> __m512d {
17709    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17710}
17711
17712/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17713/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17714/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17715///
17716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17717#[inline]
17718#[target_feature(enable = "avx512f")]
17719#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17720#[rustc_legacy_const_generics(4)]
17721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17722pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17723    src: __m512d,
17724    k: __mmask8,
17725    vindex: __m512i,
17726    base_addr: *const f64,
17727) -> __m512d {
17728    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17729}
17730
17731/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17732/// indices stored in the lower half of vindex scaled by scale.
17733///
17734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17735#[inline]
17736#[target_feature(enable = "avx512f")]
17737#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17738#[rustc_legacy_const_generics(3)]
17739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17740pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17741    base_addr: *mut i64,
17742    vindex: __m512i,
17743    a: __m512i,
17744) {
17745    _mm512_i32scatter_epi64::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17746}
17747
17748/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17749/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17750/// mask bit is not set are not written to memory).
17751///
17752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17753#[inline]
17754#[target_feature(enable = "avx512f")]
17755#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17756#[rustc_legacy_const_generics(4)]
17757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17758pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17759    base_addr: *mut i64,
17760    k: __mmask8,
17761    vindex: __m512i,
17762    a: __m512i,
17763) {
17764    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17765}
17766
17767/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17768/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17769///
17770/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17771#[inline]
17772#[target_feature(enable = "avx512f")]
17773#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17774#[rustc_legacy_const_generics(3)]
17775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17776pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17777    base_addr: *mut f64,
17778    vindex: __m512i,
17779    a: __m512d,
17780) {
17781    _mm512_i32scatter_pd::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17782}
17783
17784/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17785/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17786/// (elements whose corresponding mask bit is not set are not written to memory).
17787///
17788/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17789#[inline]
17790#[target_feature(enable = "avx512f")]
17791#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17792#[rustc_legacy_const_generics(4)]
17793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17794pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17795    base_addr: *mut f64,
17796    k: __mmask8,
17797    vindex: __m512i,
17798    a: __m512d,
17799) {
17800    _mm512_mask_i32scatter_pd::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17801}
17802
17803/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17804/// indices stored in vindex scaled by scale
17805///
17806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17807#[inline]
17808#[target_feature(enable = "avx512f,avx512vl")]
17809#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17810#[rustc_legacy_const_generics(3)]
17811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17812pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17813    base_addr: *mut i32,
17814    vindex: __m256i,
17815    a: __m256i,
17816) {
17817    static_assert_imm8_scale!(SCALE);
17818    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17819}
17820
17821/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17822/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17823/// are not written to memory).
17824///
17825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17826#[inline]
17827#[target_feature(enable = "avx512f,avx512vl")]
17828#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17829#[rustc_legacy_const_generics(4)]
17830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17831pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17832    base_addr: *mut i32,
17833    k: __mmask8,
17834    vindex: __m256i,
17835    a: __m256i,
17836) {
17837    static_assert_imm8_scale!(SCALE);
17838    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17839}
17840
17841/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17842///
17843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17844#[inline]
17845#[target_feature(enable = "avx512f,avx512vl")]
17846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17847#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17848#[rustc_legacy_const_generics(3)]
17849pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17850    slice: *mut i64,
17851    offsets: __m128i,
17852    src: __m256i,
17853) {
17854    static_assert_imm8_scale!(SCALE);
17855    let src = src.as_i64x4();
17856    let slice = slice as *mut i8;
17857    let offsets = offsets.as_i32x4();
17858    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17859}
17860
17861/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17862/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17863/// are not written to memory).
17864///
17865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17866#[inline]
17867#[target_feature(enable = "avx512f,avx512vl")]
17868#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17869#[rustc_legacy_const_generics(4)]
17870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17871pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17872    base_addr: *mut i64,
17873    k: __mmask8,
17874    vindex: __m128i,
17875    a: __m256i,
17876) {
17877    static_assert_imm8_scale!(SCALE);
17878    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17879}
17880
17881/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17882/// at packed 32-bit integer indices stored in vindex scaled by scale
17883///
17884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17885#[inline]
17886#[target_feature(enable = "avx512f,avx512vl")]
17887#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17888#[rustc_legacy_const_generics(3)]
17889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17890pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17891    base_addr: *mut f64,
17892    vindex: __m128i,
17893    a: __m256d,
17894) {
17895    static_assert_imm8_scale!(SCALE);
17896    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17897}
17898
17899/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17900/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17901/// mask bit is not set are not written to memory).
17902///
17903/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17904#[inline]
17905#[target_feature(enable = "avx512f,avx512vl")]
17906#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17907#[rustc_legacy_const_generics(4)]
17908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17909pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17910    base_addr: *mut f64,
17911    k: __mmask8,
17912    vindex: __m128i,
17913    a: __m256d,
17914) {
17915    static_assert_imm8_scale!(SCALE);
17916    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17917}
17918
17919/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17920/// at packed 32-bit integer indices stored in vindex scaled by scale
17921///
17922/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17923#[inline]
17924#[target_feature(enable = "avx512f,avx512vl")]
17925#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17926#[rustc_legacy_const_generics(3)]
17927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17928pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17929    base_addr: *mut f32,
17930    vindex: __m256i,
17931    a: __m256,
17932) {
17933    static_assert_imm8_scale!(SCALE);
17934    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17935}
17936
17937/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17938/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17939/// mask bit is not set are not written to memory).
17940///
17941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17942#[inline]
17943#[target_feature(enable = "avx512f,avx512vl")]
17944#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17945#[rustc_legacy_const_generics(4)]
17946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17947pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17948    base_addr: *mut f32,
17949    k: __mmask8,
17950    vindex: __m256i,
17951    a: __m256,
17952) {
17953    static_assert_imm8_scale!(SCALE);
17954    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17955}
17956
17957/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17958/// indices stored in vindex scaled by scale
17959///
17960/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17961#[inline]
17962#[target_feature(enable = "avx512f,avx512vl")]
17963#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17964#[rustc_legacy_const_generics(3)]
17965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17966pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17967    base_addr: *mut i32,
17968    vindex: __m256i,
17969    a: __m128i,
17970) {
17971    static_assert_imm8_scale!(SCALE);
17972    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17973}
17974
17975/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17976/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17977/// are not written to memory).
17978///
17979/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17980#[inline]
17981#[target_feature(enable = "avx512f,avx512vl")]
17982#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17983#[rustc_legacy_const_generics(4)]
17984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17985pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17986    base_addr: *mut i32,
17987    k: __mmask8,
17988    vindex: __m256i,
17989    a: __m128i,
17990) {
17991    static_assert_imm8_scale!(SCALE);
17992    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17993}
17994
17995/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17996/// indices stored in vindex scaled by scale
17997///
17998/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17999#[inline]
18000#[target_feature(enable = "avx512f,avx512vl")]
18001#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18002#[rustc_legacy_const_generics(3)]
18003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18004pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
18005    base_addr: *mut i64,
18006    vindex: __m256i,
18007    a: __m256i,
18008) {
18009    static_assert_imm8_scale!(SCALE);
18010    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
18011}
18012
18013/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18014/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18015/// are not written to memory).
18016///
18017/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
18018#[inline]
18019#[target_feature(enable = "avx512f,avx512vl")]
18020#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18021#[rustc_legacy_const_generics(4)]
18022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18023pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
18024    base_addr: *mut i64,
18025    k: __mmask8,
18026    vindex: __m256i,
18027    a: __m256i,
18028) {
18029    static_assert_imm8_scale!(SCALE);
18030    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
18031}
18032
18033/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18034/// at packed 64-bit integer indices stored in vindex scaled by scale
18035///
18036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
18037#[inline]
18038#[target_feature(enable = "avx512f,avx512vl")]
18039#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18040#[rustc_legacy_const_generics(3)]
18041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18042pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
18043    base_addr: *mut f64,
18044    vindex: __m256i,
18045    a: __m256d,
18046) {
18047    static_assert_imm8_scale!(SCALE);
18048    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
18049}
18050
18051/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18052/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18053/// mask bit is not set are not written to memory).
18054///
18055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
18056#[inline]
18057#[target_feature(enable = "avx512f,avx512vl")]
18058#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18059#[rustc_legacy_const_generics(4)]
18060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18061pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
18062    base_addr: *mut f64,
18063    k: __mmask8,
18064    vindex: __m256i,
18065    a: __m256d,
18066) {
18067    static_assert_imm8_scale!(SCALE);
18068    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
18069}
18070
18071/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18072/// at packed 64-bit integer indices stored in vindex scaled by scale
18073///
18074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
18075#[inline]
18076#[target_feature(enable = "avx512f,avx512vl")]
18077#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18078#[rustc_legacy_const_generics(3)]
18079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18080pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
18081    base_addr: *mut f32,
18082    vindex: __m256i,
18083    a: __m128,
18084) {
18085    static_assert_imm8_scale!(SCALE);
18086    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
18087}
18088
18089/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18090/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18091/// mask bit is not set are not written to memory).
18092///
18093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
18094#[inline]
18095#[target_feature(enable = "avx512f,avx512vl")]
18096#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18097#[rustc_legacy_const_generics(4)]
18098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18099pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
18100    base_addr: *mut f32,
18101    k: __mmask8,
18102    vindex: __m256i,
18103    a: __m128,
18104) {
18105    static_assert_imm8_scale!(SCALE);
18106    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
18107}
18108
18109/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18110/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18111/// mask bit is not set).
18112///
18113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
18114#[inline]
18115#[target_feature(enable = "avx512f,avx512vl")]
18116#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18117#[rustc_legacy_const_generics(4)]
18118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18119pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
18120    src: __m256i,
18121    k: __mmask8,
18122    vindex: __m256i,
18123    base_addr: *const i32,
18124) -> __m256i {
18125    static_assert_imm8_scale!(SCALE);
18126    transmute(vpgatherdd_256(
18127        src.as_i32x8(),
18128        base_addr as _,
18129        vindex.as_i32x8(),
18130        k,
18131        SCALE,
18132    ))
18133}
18134
18135/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18136/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18137/// mask bit is not set).
18138///
18139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
18140#[inline]
18141#[target_feature(enable = "avx512f,avx512vl")]
18142#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18143#[rustc_legacy_const_generics(4)]
18144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18145pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
18146    src: __m256i,
18147    k: __mmask8,
18148    vindex: __m128i,
18149    base_addr: *const i64,
18150) -> __m256i {
18151    static_assert_imm8_scale!(SCALE);
18152    transmute(vpgatherdq_256(
18153        src.as_i64x4(),
18154        base_addr as _,
18155        vindex.as_i32x4(),
18156        k,
18157        SCALE,
18158    ))
18159}
18160
18161/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18162/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18163/// from src when the corresponding mask bit is not set).
18164///
18165/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
18166#[inline]
18167#[target_feature(enable = "avx512f,avx512vl")]
18168#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18169#[rustc_legacy_const_generics(4)]
18170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18171pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
18172    src: __m256d,
18173    k: __mmask8,
18174    vindex: __m128i,
18175    base_addr: *const f64,
18176) -> __m256d {
18177    static_assert_imm8_scale!(SCALE);
18178    transmute(vgatherdpd_256(
18179        src.as_f64x4(),
18180        base_addr as _,
18181        vindex.as_i32x4(),
18182        k,
18183        SCALE,
18184    ))
18185}
18186
18187/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18188/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18189/// from src when the corresponding mask bit is not set).
18190///
18191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
18192#[inline]
18193#[target_feature(enable = "avx512f,avx512vl")]
18194#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18195#[rustc_legacy_const_generics(4)]
18196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18197pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
18198    src: __m256,
18199    k: __mmask8,
18200    vindex: __m256i,
18201    base_addr: *const f32,
18202) -> __m256 {
18203    static_assert_imm8_scale!(SCALE);
18204    transmute(vgatherdps_256(
18205        src.as_f32x8(),
18206        base_addr as _,
18207        vindex.as_i32x8(),
18208        k,
18209        SCALE,
18210    ))
18211}
18212
18213/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18214/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18215/// mask bit is not set).
18216///
18217/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
18218#[inline]
18219#[target_feature(enable = "avx512f,avx512vl")]
18220#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18221#[rustc_legacy_const_generics(4)]
18222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18223pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
18224    src: __m128i,
18225    k: __mmask8,
18226    vindex: __m256i,
18227    base_addr: *const i32,
18228) -> __m128i {
18229    static_assert_imm8_scale!(SCALE);
18230    transmute(vpgatherqd_256(
18231        src.as_i32x4(),
18232        base_addr as _,
18233        vindex.as_i64x4(),
18234        k,
18235        SCALE,
18236    ))
18237}
18238
18239/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18240/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18241/// mask bit is not set).
18242///
18243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
18244#[inline]
18245#[target_feature(enable = "avx512f,avx512vl")]
18246#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18247#[rustc_legacy_const_generics(4)]
18248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18249pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
18250    src: __m256i,
18251    k: __mmask8,
18252    vindex: __m256i,
18253    base_addr: *const i64,
18254) -> __m256i {
18255    static_assert_imm8_scale!(SCALE);
18256    transmute(vpgatherqq_256(
18257        src.as_i64x4(),
18258        base_addr as _,
18259        vindex.as_i64x4(),
18260        k,
18261        SCALE,
18262    ))
18263}
18264
18265/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18266/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18267/// from src when the corresponding mask bit is not set).
18268///
18269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
18270#[inline]
18271#[target_feature(enable = "avx512f,avx512vl")]
18272#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18273#[rustc_legacy_const_generics(4)]
18274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18275pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
18276    src: __m256d,
18277    k: __mmask8,
18278    vindex: __m256i,
18279    base_addr: *const f64,
18280) -> __m256d {
18281    static_assert_imm8_scale!(SCALE);
18282    transmute(vgatherqpd_256(
18283        src.as_f64x4(),
18284        base_addr as _,
18285        vindex.as_i64x4(),
18286        k,
18287        SCALE,
18288    ))
18289}
18290
18291/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18292/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18293/// from src when the corresponding mask bit is not set).
18294///
18295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
18296#[inline]
18297#[target_feature(enable = "avx512f,avx512vl")]
18298#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18299#[rustc_legacy_const_generics(4)]
18300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18301pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
18302    src: __m128,
18303    k: __mmask8,
18304    vindex: __m256i,
18305    base_addr: *const f32,
18306) -> __m128 {
18307    static_assert_imm8_scale!(SCALE);
18308    transmute(vgatherqps_256(
18309        src.as_f32x4(),
18310        base_addr as _,
18311        vindex.as_i64x4(),
18312        k,
18313        SCALE,
18314    ))
18315}
18316
18317/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18318/// indices stored in vindex scaled by scale
18319///
18320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
18321#[inline]
18322#[target_feature(enable = "avx512f,avx512vl")]
18323#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
18324#[rustc_legacy_const_generics(3)]
18325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18326pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
18327    base_addr: *mut i32,
18328    vindex: __m128i,
18329    a: __m128i,
18330) {
18331    static_assert_imm8_scale!(SCALE);
18332    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
18333}
18334
18335/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18336/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18337/// are not written to memory).
18338///
18339/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
18340#[inline]
18341#[target_feature(enable = "avx512f,avx512vl")]
18342#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
18343#[rustc_legacy_const_generics(4)]
18344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18345pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
18346    base_addr: *mut i32,
18347    k: __mmask8,
18348    vindex: __m128i,
18349    a: __m128i,
18350) {
18351    static_assert_imm8_scale!(SCALE);
18352    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
18353}
18354
18355/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18356/// indices stored in vindex scaled by scale
18357///
18358/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
18359#[inline]
18360#[target_feature(enable = "avx512f,avx512vl")]
18361#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
18362#[rustc_legacy_const_generics(3)]
18363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18364pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
18365    base_addr: *mut i64,
18366    vindex: __m128i,
18367    a: __m128i,
18368) {
18369    static_assert_imm8_scale!(SCALE);
18370    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
18371}
18372
18373/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18374/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18375/// are not written to memory).
18376///
18377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
18378#[inline]
18379#[target_feature(enable = "avx512f,avx512vl")]
18380#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
18381#[rustc_legacy_const_generics(4)]
18382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18383pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
18384    base_addr: *mut i64,
18385    k: __mmask8,
18386    vindex: __m128i,
18387    a: __m128i,
18388) {
18389    static_assert_imm8_scale!(SCALE);
18390    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
18391}
18392
18393/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18394/// at packed 32-bit integer indices stored in vindex scaled by scale
18395///
18396/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
18397#[inline]
18398#[target_feature(enable = "avx512f,avx512vl")]
18399#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18400#[rustc_legacy_const_generics(3)]
18401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18402pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
18403    base_addr: *mut f64,
18404    vindex: __m128i,
18405    a: __m128d,
18406) {
18407    static_assert_imm8_scale!(SCALE);
18408    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18409}
18410
18411/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18412/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18413/// mask bit is not set are not written to memory).
18414///
18415/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18416#[inline]
18417#[target_feature(enable = "avx512f,avx512vl")]
18418#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18419#[rustc_legacy_const_generics(4)]
18420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18421pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18422    base_addr: *mut f64,
18423    k: __mmask8,
18424    vindex: __m128i,
18425    a: __m128d,
18426) {
18427    static_assert_imm8_scale!(SCALE);
18428    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18429}
18430
18431/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18432/// at packed 32-bit integer indices stored in vindex scaled by scale
18433///
18434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18435#[inline]
18436#[target_feature(enable = "avx512f,avx512vl")]
18437#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18438#[rustc_legacy_const_generics(3)]
18439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18440pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18441    static_assert_imm8_scale!(SCALE);
18442    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18443}
18444
18445/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18446/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18447/// mask bit is not set are not written to memory).
18448///
18449/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18450#[inline]
18451#[target_feature(enable = "avx512f,avx512vl")]
18452#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18453#[rustc_legacy_const_generics(4)]
18454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18455pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18456    base_addr: *mut f32,
18457    k: __mmask8,
18458    vindex: __m128i,
18459    a: __m128,
18460) {
18461    static_assert_imm8_scale!(SCALE);
18462    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18463}
18464
18465/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18466/// indices stored in vindex scaled by scale
18467///
18468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18469#[inline]
18470#[target_feature(enable = "avx512f,avx512vl")]
18471#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18472#[rustc_legacy_const_generics(3)]
18473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18474pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18475    base_addr: *mut i32,
18476    vindex: __m128i,
18477    a: __m128i,
18478) {
18479    static_assert_imm8_scale!(SCALE);
18480    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18481}
18482
18483/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18484/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18485/// are not written to memory).
18486///
18487/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18488#[inline]
18489#[target_feature(enable = "avx512f,avx512vl")]
18490#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18491#[rustc_legacy_const_generics(4)]
18492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18493pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18494    base_addr: *mut i32,
18495    k: __mmask8,
18496    vindex: __m128i,
18497    a: __m128i,
18498) {
18499    static_assert_imm8_scale!(SCALE);
18500    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18501}
18502
18503/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18504/// indices stored in vindex scaled by scale
18505///
18506/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18507#[inline]
18508#[target_feature(enable = "avx512f,avx512vl")]
18509#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18510#[rustc_legacy_const_generics(3)]
18511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18512pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18513    base_addr: *mut i64,
18514    vindex: __m128i,
18515    a: __m128i,
18516) {
18517    static_assert_imm8_scale!(SCALE);
18518    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18519}
18520
18521/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18522/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18523/// are not written to memory).
18524///
18525/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18526#[inline]
18527#[target_feature(enable = "avx512f,avx512vl")]
18528#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18529#[rustc_legacy_const_generics(4)]
18530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18531pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18532    base_addr: *mut i64,
18533    k: __mmask8,
18534    vindex: __m128i,
18535    a: __m128i,
18536) {
18537    static_assert_imm8_scale!(SCALE);
18538    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18539}
18540
18541/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18542/// at packed 64-bit integer indices stored in vindex scaled by scale
18543///
18544/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18545#[inline]
18546#[target_feature(enable = "avx512f,avx512vl")]
18547#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18548#[rustc_legacy_const_generics(3)]
18549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18550pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18551    base_addr: *mut f64,
18552    vindex: __m128i,
18553    a: __m128d,
18554) {
18555    static_assert_imm8_scale!(SCALE);
18556    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18557}
18558
18559/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18560/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18561/// mask bit is not set are not written to memory).
18562///
18563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18564#[inline]
18565#[target_feature(enable = "avx512f,avx512vl")]
18566#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18567#[rustc_legacy_const_generics(4)]
18568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18569pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18570    base_addr: *mut f64,
18571    k: __mmask8,
18572    vindex: __m128i,
18573    a: __m128d,
18574) {
18575    static_assert_imm8_scale!(SCALE);
18576    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18577}
18578
18579/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18580/// at packed 64-bit integer indices stored in vindex scaled by scale
18581///
18582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18583#[inline]
18584#[target_feature(enable = "avx512f,avx512vl")]
18585#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18586#[rustc_legacy_const_generics(3)]
18587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18588pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18589    static_assert_imm8_scale!(SCALE);
18590    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18591}
18592
18593/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18594/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18595///
18596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18597#[inline]
18598#[target_feature(enable = "avx512f,avx512vl")]
18599#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18600#[rustc_legacy_const_generics(4)]
18601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18602pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18603    base_addr: *mut f32,
18604    k: __mmask8,
18605    vindex: __m128i,
18606    a: __m128,
18607) {
18608    static_assert_imm8_scale!(SCALE);
18609    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18610}
18611
18612/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18613/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18614/// mask bit is not set).
18615///
18616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18617#[inline]
18618#[target_feature(enable = "avx512f,avx512vl")]
18619#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18620#[rustc_legacy_const_generics(4)]
18621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18622pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18623    src: __m128i,
18624    k: __mmask8,
18625    vindex: __m128i,
18626    base_addr: *const i32,
18627) -> __m128i {
18628    static_assert_imm8_scale!(SCALE);
18629    transmute(vpgatherdd_128(
18630        src.as_i32x4(),
18631        base_addr as _,
18632        vindex.as_i32x4(),
18633        k,
18634        SCALE,
18635    ))
18636}
18637
18638/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18639/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18640/// mask bit is not set).
18641///
18642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18643#[inline]
18644#[target_feature(enable = "avx512f,avx512vl")]
18645#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18646#[rustc_legacy_const_generics(4)]
18647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18648pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18649    src: __m128i,
18650    k: __mmask8,
18651    vindex: __m128i,
18652    base_addr: *const i64,
18653) -> __m128i {
18654    static_assert_imm8_scale!(SCALE);
18655    transmute(vpgatherdq_128(
18656        src.as_i64x2(),
18657        base_addr as _,
18658        vindex.as_i32x4(),
18659        k,
18660        SCALE,
18661    ))
18662}
18663
18664/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18665/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18666/// from src when the corresponding mask bit is not set).
18667///
18668/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18669#[inline]
18670#[target_feature(enable = "avx512f,avx512vl")]
18671#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18672#[rustc_legacy_const_generics(4)]
18673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18674pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18675    src: __m128d,
18676    k: __mmask8,
18677    vindex: __m128i,
18678    base_addr: *const f64,
18679) -> __m128d {
18680    static_assert_imm8_scale!(SCALE);
18681    transmute(vgatherdpd_128(
18682        src.as_f64x2(),
18683        base_addr as _,
18684        vindex.as_i32x4(),
18685        k,
18686        SCALE,
18687    ))
18688}
18689
18690/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18691/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18692/// from src when the corresponding mask bit is not set).
18693///
18694/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18695#[inline]
18696#[target_feature(enable = "avx512f,avx512vl")]
18697#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18698#[rustc_legacy_const_generics(4)]
18699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18700pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18701    src: __m128,
18702    k: __mmask8,
18703    vindex: __m128i,
18704    base_addr: *const f32,
18705) -> __m128 {
18706    static_assert_imm8_scale!(SCALE);
18707    transmute(vgatherdps_128(
18708        src.as_f32x4(),
18709        base_addr as _,
18710        vindex.as_i32x4(),
18711        k,
18712        SCALE,
18713    ))
18714}
18715
18716/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18717/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18718/// mask bit is not set).
18719///
18720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18721#[inline]
18722#[target_feature(enable = "avx512f,avx512vl")]
18723#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18724#[rustc_legacy_const_generics(4)]
18725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18726pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18727    src: __m128i,
18728    k: __mmask8,
18729    vindex: __m128i,
18730    base_addr: *const i32,
18731) -> __m128i {
18732    static_assert_imm8_scale!(SCALE);
18733    transmute(vpgatherqd_128(
18734        src.as_i32x4(),
18735        base_addr as _,
18736        vindex.as_i64x2(),
18737        k,
18738        SCALE,
18739    ))
18740}
18741
18742/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18743/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18744/// mask bit is not set).
18745///
18746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18747#[inline]
18748#[target_feature(enable = "avx512f,avx512vl")]
18749#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18750#[rustc_legacy_const_generics(4)]
18751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18752pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18753    src: __m128i,
18754    k: __mmask8,
18755    vindex: __m128i,
18756    base_addr: *const i64,
18757) -> __m128i {
18758    static_assert_imm8_scale!(SCALE);
18759    transmute(vpgatherqq_128(
18760        src.as_i64x2(),
18761        base_addr as _,
18762        vindex.as_i64x2(),
18763        k,
18764        SCALE,
18765    ))
18766}
18767
18768/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18769/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18770/// from src when the corresponding mask bit is not set).
18771///
18772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18773#[inline]
18774#[target_feature(enable = "avx512f,avx512vl")]
18775#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18776#[rustc_legacy_const_generics(4)]
18777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18778pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18779    src: __m128d,
18780    k: __mmask8,
18781    vindex: __m128i,
18782    base_addr: *const f64,
18783) -> __m128d {
18784    static_assert_imm8_scale!(SCALE);
18785    transmute(vgatherqpd_128(
18786        src.as_f64x2(),
18787        base_addr as _,
18788        vindex.as_i64x2(),
18789        k,
18790        SCALE,
18791    ))
18792}
18793
18794/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18795/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18796/// from src when the corresponding mask bit is not set).
18797///
18798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18799#[inline]
18800#[target_feature(enable = "avx512f,avx512vl")]
18801#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18802#[rustc_legacy_const_generics(4)]
18803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18804pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18805    src: __m128,
18806    k: __mmask8,
18807    vindex: __m128i,
18808    base_addr: *const f32,
18809) -> __m128 {
18810    static_assert_imm8_scale!(SCALE);
18811    transmute(vgatherqps_128(
18812        src.as_f32x4(),
18813        base_addr as _,
18814        vindex.as_i64x2(),
18815        k,
18816        SCALE,
18817    ))
18818}
18819
18820/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18821///
18822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18823#[inline]
18824#[target_feature(enable = "avx512f")]
18825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18826#[cfg_attr(test, assert_instr(vpcompressd))]
18827pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18828    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18829}
18830
18831/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18832///
18833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18834#[inline]
18835#[target_feature(enable = "avx512f")]
18836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18837#[cfg_attr(test, assert_instr(vpcompressd))]
18838pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18839    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18840}
18841
18842/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18843///
18844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18845#[inline]
18846#[target_feature(enable = "avx512f,avx512vl")]
18847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18848#[cfg_attr(test, assert_instr(vpcompressd))]
18849pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18850    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18851}
18852
18853/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18854///
18855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18856#[inline]
18857#[target_feature(enable = "avx512f,avx512vl")]
18858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18859#[cfg_attr(test, assert_instr(vpcompressd))]
18860pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18861    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18862}
18863
18864/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18865///
18866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18867#[inline]
18868#[target_feature(enable = "avx512f,avx512vl")]
18869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18870#[cfg_attr(test, assert_instr(vpcompressd))]
18871pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18872    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18873}
18874
18875/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18876///
18877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18878#[inline]
18879#[target_feature(enable = "avx512f,avx512vl")]
18880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18881#[cfg_attr(test, assert_instr(vpcompressd))]
18882pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18883    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18884}
18885
18886/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18887///
18888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18889#[inline]
18890#[target_feature(enable = "avx512f")]
18891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18892#[cfg_attr(test, assert_instr(vpcompressq))]
18893pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18894    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18895}
18896
18897/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18898///
18899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18900#[inline]
18901#[target_feature(enable = "avx512f")]
18902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18903#[cfg_attr(test, assert_instr(vpcompressq))]
18904pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18905    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18906}
18907
18908/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18909///
18910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18911#[inline]
18912#[target_feature(enable = "avx512f,avx512vl")]
18913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18914#[cfg_attr(test, assert_instr(vpcompressq))]
18915pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18916    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18917}
18918
18919/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18920///
18921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18922#[inline]
18923#[target_feature(enable = "avx512f,avx512vl")]
18924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18925#[cfg_attr(test, assert_instr(vpcompressq))]
18926pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18927    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18928}
18929
18930/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18931///
18932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18933#[inline]
18934#[target_feature(enable = "avx512f,avx512vl")]
18935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18936#[cfg_attr(test, assert_instr(vpcompressq))]
18937pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18938    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18939}
18940
18941/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18942///
18943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18944#[inline]
18945#[target_feature(enable = "avx512f,avx512vl")]
18946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18947#[cfg_attr(test, assert_instr(vpcompressq))]
18948pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18949    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18950}
18951
18952/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18953///
18954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18955#[inline]
18956#[target_feature(enable = "avx512f")]
18957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18958#[cfg_attr(test, assert_instr(vcompressps))]
18959pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18960    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18961}
18962
18963/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18964///
18965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18966#[inline]
18967#[target_feature(enable = "avx512f")]
18968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18969#[cfg_attr(test, assert_instr(vcompressps))]
18970pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18971    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18972}
18973
18974/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18975///
18976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18977#[inline]
18978#[target_feature(enable = "avx512f,avx512vl")]
18979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18980#[cfg_attr(test, assert_instr(vcompressps))]
18981pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18982    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18983}
18984
18985/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18986///
18987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18988#[inline]
18989#[target_feature(enable = "avx512f,avx512vl")]
18990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18991#[cfg_attr(test, assert_instr(vcompressps))]
18992pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18993    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18994}
18995
18996/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18997///
18998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18999#[inline]
19000#[target_feature(enable = "avx512f,avx512vl")]
19001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19002#[cfg_attr(test, assert_instr(vcompressps))]
19003pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19004    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
19005}
19006
19007/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19008///
19009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
19010#[inline]
19011#[target_feature(enable = "avx512f,avx512vl")]
19012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19013#[cfg_attr(test, assert_instr(vcompressps))]
19014pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
19015    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
19016}
19017
19018/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19019///
19020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
19021#[inline]
19022#[target_feature(enable = "avx512f")]
19023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19024#[cfg_attr(test, assert_instr(vcompresspd))]
19025pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19026    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
19027}
19028
19029/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19030///
19031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
19032#[inline]
19033#[target_feature(enable = "avx512f")]
19034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19035#[cfg_attr(test, assert_instr(vcompresspd))]
19036pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
19037    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
19038}
19039
19040/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19041///
19042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
19043#[inline]
19044#[target_feature(enable = "avx512f,avx512vl")]
19045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19046#[cfg_attr(test, assert_instr(vcompresspd))]
19047pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19048    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
19049}
19050
19051/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19052///
19053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
19054#[inline]
19055#[target_feature(enable = "avx512f,avx512vl")]
19056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19057#[cfg_attr(test, assert_instr(vcompresspd))]
19058pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
19059    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
19060}
19061
19062/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19063///
19064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
19065#[inline]
19066#[target_feature(enable = "avx512f,avx512vl")]
19067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19068#[cfg_attr(test, assert_instr(vcompresspd))]
19069pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19070    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
19071}
19072
19073/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19074///
19075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
19076#[inline]
19077#[target_feature(enable = "avx512f,avx512vl")]
19078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19079#[cfg_attr(test, assert_instr(vcompresspd))]
19080pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
19081    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
19082}
19083
19084/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19085///
19086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
19087#[inline]
19088#[target_feature(enable = "avx512f")]
19089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19090#[cfg_attr(test, assert_instr(vpcompressd))]
19091pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
19092    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
19093}
19094
19095/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19096///
19097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
19098#[inline]
19099#[target_feature(enable = "avx512f,avx512vl")]
19100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19101#[cfg_attr(test, assert_instr(vpcompressd))]
19102pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
19103    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
19104}
19105
19106/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19107///
19108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
19109#[inline]
19110#[target_feature(enable = "avx512f,avx512vl")]
19111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19112#[cfg_attr(test, assert_instr(vpcompressd))]
19113pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
19114    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
19115}
19116
19117/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19118///
19119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
19120#[inline]
19121#[target_feature(enable = "avx512f")]
19122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19123#[cfg_attr(test, assert_instr(vpcompressq))]
19124pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
19125    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
19126}
19127
19128/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19129///
19130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
19131#[inline]
19132#[target_feature(enable = "avx512f,avx512vl")]
19133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19134#[cfg_attr(test, assert_instr(vpcompressq))]
19135pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
19136    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
19137}
19138
19139/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19140///
19141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
19142#[inline]
19143#[target_feature(enable = "avx512f,avx512vl")]
19144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19145#[cfg_attr(test, assert_instr(vpcompressq))]
19146pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
19147    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
19148}
19149
19150/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19151///
19152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
19153#[inline]
19154#[target_feature(enable = "avx512f")]
19155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19156#[cfg_attr(test, assert_instr(vcompressps))]
19157pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
19158    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
19159}
19160
19161/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19162///
19163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
19164#[inline]
19165#[target_feature(enable = "avx512f,avx512vl")]
19166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19167#[cfg_attr(test, assert_instr(vcompressps))]
19168pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
19169    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
19170}
19171
19172/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19173///
19174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
19175#[inline]
19176#[target_feature(enable = "avx512f,avx512vl")]
19177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19178#[cfg_attr(test, assert_instr(vcompressps))]
19179pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
19180    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
19181}
19182
19183/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19184///
19185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
19186#[inline]
19187#[target_feature(enable = "avx512f")]
19188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19189#[cfg_attr(test, assert_instr(vcompresspd))]
19190pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
19191    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
19192}
19193
19194/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19195///
19196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
19197#[inline]
19198#[target_feature(enable = "avx512f,avx512vl")]
19199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19200#[cfg_attr(test, assert_instr(vcompresspd))]
19201pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
19202    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
19203}
19204
19205/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19206///
19207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
19208#[inline]
19209#[target_feature(enable = "avx512f,avx512vl")]
19210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19211#[cfg_attr(test, assert_instr(vcompresspd))]
19212pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
19213    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
19214}
19215
19216/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19217///
19218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
19219#[inline]
19220#[target_feature(enable = "avx512f")]
19221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19222#[cfg_attr(test, assert_instr(vpexpandd))]
19223pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19224    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
19225}
19226
19227/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19228///
19229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
19230#[inline]
19231#[target_feature(enable = "avx512f")]
19232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19233#[cfg_attr(test, assert_instr(vpexpandd))]
19234pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
19235    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
19236}
19237
19238/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19239///
19240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
19241#[inline]
19242#[target_feature(enable = "avx512f,avx512vl")]
19243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19244#[cfg_attr(test, assert_instr(vpexpandd))]
19245pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19246    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
19247}
19248
19249/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19250///
19251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
19252#[inline]
19253#[target_feature(enable = "avx512f,avx512vl")]
19254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19255#[cfg_attr(test, assert_instr(vpexpandd))]
19256pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
19257    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
19258}
19259
19260/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19261///
19262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
19263#[inline]
19264#[target_feature(enable = "avx512f,avx512vl")]
19265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19266#[cfg_attr(test, assert_instr(vpexpandd))]
19267pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19268    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
19269}
19270
19271/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19272///
19273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
19274#[inline]
19275#[target_feature(enable = "avx512f,avx512vl")]
19276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19277#[cfg_attr(test, assert_instr(vpexpandd))]
19278pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
19279    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
19280}
19281
19282/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19283///
19284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
19285#[inline]
19286#[target_feature(enable = "avx512f")]
19287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19288#[cfg_attr(test, assert_instr(vpexpandq))]
19289pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19290    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
19291}
19292
19293/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19294///
19295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
19296#[inline]
19297#[target_feature(enable = "avx512f")]
19298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19299#[cfg_attr(test, assert_instr(vpexpandq))]
19300pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
19301    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
19302}
19303
19304/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19305///
19306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
19307#[inline]
19308#[target_feature(enable = "avx512f,avx512vl")]
19309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19310#[cfg_attr(test, assert_instr(vpexpandq))]
19311pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19312    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
19313}
19314
19315/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19316///
19317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
19318#[inline]
19319#[target_feature(enable = "avx512f,avx512vl")]
19320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19321#[cfg_attr(test, assert_instr(vpexpandq))]
19322pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
19323    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
19324}
19325
19326/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19327///
19328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
19329#[inline]
19330#[target_feature(enable = "avx512f,avx512vl")]
19331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19332#[cfg_attr(test, assert_instr(vpexpandq))]
19333pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19334    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
19335}
19336
19337/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338///
19339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
19340#[inline]
19341#[target_feature(enable = "avx512f,avx512vl")]
19342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19343#[cfg_attr(test, assert_instr(vpexpandq))]
19344pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
19345    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
19346}
19347
19348/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19349///
19350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
19351#[inline]
19352#[target_feature(enable = "avx512f")]
19353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19354#[cfg_attr(test, assert_instr(vexpandps))]
19355pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
19356    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
19357}
19358
19359/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19360///
19361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
19362#[inline]
19363#[target_feature(enable = "avx512f")]
19364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19365#[cfg_attr(test, assert_instr(vexpandps))]
19366pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
19367    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
19368}
19369
19370/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19371///
19372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
19373#[inline]
19374#[target_feature(enable = "avx512f,avx512vl")]
19375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19376#[cfg_attr(test, assert_instr(vexpandps))]
19377pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
19378    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
19379}
19380
19381/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19382///
19383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
19384#[inline]
19385#[target_feature(enable = "avx512f,avx512vl")]
19386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19387#[cfg_attr(test, assert_instr(vexpandps))]
19388pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
19389    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
19390}
19391
19392/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19393///
19394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
19395#[inline]
19396#[target_feature(enable = "avx512f,avx512vl")]
19397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19398#[cfg_attr(test, assert_instr(vexpandps))]
19399pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19400    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
19401}
19402
19403/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19404///
19405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
19406#[inline]
19407#[target_feature(enable = "avx512f,avx512vl")]
19408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19409#[cfg_attr(test, assert_instr(vexpandps))]
19410pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19411    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
19412}
19413
19414/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19415///
19416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19417#[inline]
19418#[target_feature(enable = "avx512f")]
19419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19420#[cfg_attr(test, assert_instr(vexpandpd))]
19421pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19422    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
19423}
19424
19425/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19426///
19427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19428#[inline]
19429#[target_feature(enable = "avx512f")]
19430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19431#[cfg_attr(test, assert_instr(vexpandpd))]
19432pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19433    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
19434}
19435
19436/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19437///
19438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19439#[inline]
19440#[target_feature(enable = "avx512f,avx512vl")]
19441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19442#[cfg_attr(test, assert_instr(vexpandpd))]
19443pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19444    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
19445}
19446
19447/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19448///
19449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19450#[inline]
19451#[target_feature(enable = "avx512f,avx512vl")]
19452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19453#[cfg_attr(test, assert_instr(vexpandpd))]
19454pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19455    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
19456}
19457
19458/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19459///
19460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19461#[inline]
19462#[target_feature(enable = "avx512f,avx512vl")]
19463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19464#[cfg_attr(test, assert_instr(vexpandpd))]
19465pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19466    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
19467}
19468
19469/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19470///
19471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19472#[inline]
19473#[target_feature(enable = "avx512f,avx512vl")]
19474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19475#[cfg_attr(test, assert_instr(vexpandpd))]
19476pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19477    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
19478}
19479
19480/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19481///
19482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19483#[inline]
19484#[target_feature(enable = "avx512f")]
19485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19486#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19487#[rustc_legacy_const_generics(1)]
19488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19489pub const fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19490    static_assert_uimm_bits!(IMM8, 8);
19491    _mm512_rolv_epi32(a, _mm512_set1_epi32(IMM8))
19492}
19493
19494/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19495///
19496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19497#[inline]
19498#[target_feature(enable = "avx512f")]
19499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19500#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19501#[rustc_legacy_const_generics(3)]
19502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19503pub const fn _mm512_mask_rol_epi32<const IMM8: i32>(
19504    src: __m512i,
19505    k: __mmask16,
19506    a: __m512i,
19507) -> __m512i {
19508    static_assert_uimm_bits!(IMM8, 8);
19509    _mm512_mask_rolv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
19510}
19511
19512/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19513///
19514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19515#[inline]
19516#[target_feature(enable = "avx512f")]
19517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19518#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19519#[rustc_legacy_const_generics(2)]
19520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19521pub const fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19522    static_assert_uimm_bits!(IMM8, 8);
19523    _mm512_maskz_rolv_epi32(k, a, _mm512_set1_epi32(IMM8))
19524}
19525
19526/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19527///
19528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19529#[inline]
19530#[target_feature(enable = "avx512f,avx512vl")]
19531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19532#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19533#[rustc_legacy_const_generics(1)]
19534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19535pub const fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19536    static_assert_uimm_bits!(IMM8, 8);
19537    _mm256_rolv_epi32(a, _mm256_set1_epi32(IMM8))
19538}
19539
19540/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19541///
19542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19543#[inline]
19544#[target_feature(enable = "avx512f,avx512vl")]
19545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19546#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19547#[rustc_legacy_const_generics(3)]
19548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19549pub const fn _mm256_mask_rol_epi32<const IMM8: i32>(
19550    src: __m256i,
19551    k: __mmask8,
19552    a: __m256i,
19553) -> __m256i {
19554    static_assert_uimm_bits!(IMM8, 8);
19555    _mm256_mask_rolv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
19556}
19557
19558/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19559///
19560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19561#[inline]
19562#[target_feature(enable = "avx512f,avx512vl")]
19563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19564#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19565#[rustc_legacy_const_generics(2)]
19566#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19567pub const fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19568    static_assert_uimm_bits!(IMM8, 8);
19569    _mm256_maskz_rolv_epi32(k, a, _mm256_set1_epi32(IMM8))
19570}
19571
19572/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19573///
19574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19575#[inline]
19576#[target_feature(enable = "avx512f,avx512vl")]
19577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19578#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19579#[rustc_legacy_const_generics(1)]
19580#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19581pub const fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19582    static_assert_uimm_bits!(IMM8, 8);
19583    _mm_rolv_epi32(a, _mm_set1_epi32(IMM8))
19584}
19585
19586/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19587///
19588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19589#[inline]
19590#[target_feature(enable = "avx512f,avx512vl")]
19591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19592#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19593#[rustc_legacy_const_generics(3)]
19594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19595pub const fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19596    static_assert_uimm_bits!(IMM8, 8);
19597    _mm_mask_rolv_epi32(src, k, a, _mm_set1_epi32(IMM8))
19598}
19599
19600/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19601///
19602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19603#[inline]
19604#[target_feature(enable = "avx512f,avx512vl")]
19605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19606#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19607#[rustc_legacy_const_generics(2)]
19608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19609pub const fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19610    static_assert_uimm_bits!(IMM8, 8);
19611    _mm_maskz_rolv_epi32(k, a, _mm_set1_epi32(IMM8))
19612}
19613
19614/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19615///
19616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19617#[inline]
19618#[target_feature(enable = "avx512f")]
19619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19620#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19621#[rustc_legacy_const_generics(1)]
19622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19623pub const fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19624    static_assert_uimm_bits!(IMM8, 8);
19625    _mm512_rorv_epi32(a, _mm512_set1_epi32(IMM8))
19626}
19627
19628/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629///
19630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19631#[inline]
19632#[target_feature(enable = "avx512f")]
19633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19634#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19635#[rustc_legacy_const_generics(3)]
19636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19637pub const fn _mm512_mask_ror_epi32<const IMM8: i32>(
19638    src: __m512i,
19639    k: __mmask16,
19640    a: __m512i,
19641) -> __m512i {
19642    static_assert_uimm_bits!(IMM8, 8);
19643    _mm512_mask_rorv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
19644}
19645
19646/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19647///
19648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19649#[inline]
19650#[target_feature(enable = "avx512f")]
19651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19652#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19653#[rustc_legacy_const_generics(2)]
19654#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19655pub const fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19656    static_assert_uimm_bits!(IMM8, 8);
19657    _mm512_maskz_rorv_epi32(k, a, _mm512_set1_epi32(IMM8))
19658}
19659
19660/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19661///
19662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19663#[inline]
19664#[target_feature(enable = "avx512f,avx512vl")]
19665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19666#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19667#[rustc_legacy_const_generics(1)]
19668#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19669pub const fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19670    static_assert_uimm_bits!(IMM8, 8);
19671    _mm256_rorv_epi32(a, _mm256_set1_epi32(IMM8))
19672}
19673
19674/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19675///
19676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19677#[inline]
19678#[target_feature(enable = "avx512f,avx512vl")]
19679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19680#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19681#[rustc_legacy_const_generics(3)]
19682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19683pub const fn _mm256_mask_ror_epi32<const IMM8: i32>(
19684    src: __m256i,
19685    k: __mmask8,
19686    a: __m256i,
19687) -> __m256i {
19688    static_assert_uimm_bits!(IMM8, 8);
19689    _mm256_mask_rorv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
19690}
19691
19692/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19693///
19694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19695#[inline]
19696#[target_feature(enable = "avx512f,avx512vl")]
19697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19698#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19699#[rustc_legacy_const_generics(2)]
19700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19701pub const fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19702    static_assert_uimm_bits!(IMM8, 8);
19703    _mm256_maskz_rorv_epi32(k, a, _mm256_set1_epi32(IMM8))
19704}
19705
19706/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19707///
19708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19709#[inline]
19710#[target_feature(enable = "avx512f,avx512vl")]
19711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19712#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19713#[rustc_legacy_const_generics(1)]
19714#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19715pub const fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19716    static_assert_uimm_bits!(IMM8, 8);
19717    _mm_rorv_epi32(a, _mm_set1_epi32(IMM8))
19718}
19719
19720/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19721///
19722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19723#[inline]
19724#[target_feature(enable = "avx512f,avx512vl")]
19725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19726#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19727#[rustc_legacy_const_generics(3)]
19728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19729pub const fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19730    static_assert_uimm_bits!(IMM8, 8);
19731    _mm_mask_rorv_epi32(src, k, a, _mm_set1_epi32(IMM8))
19732}
19733
19734/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19735///
19736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19737#[inline]
19738#[target_feature(enable = "avx512f,avx512vl")]
19739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19740#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19741#[rustc_legacy_const_generics(2)]
19742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19743pub const fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19744    static_assert_uimm_bits!(IMM8, 8);
19745    _mm_maskz_rorv_epi32(k, a, _mm_set1_epi32(IMM8))
19746}
19747
19748/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19749///
19750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19751#[inline]
19752#[target_feature(enable = "avx512f")]
19753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19754#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19755#[rustc_legacy_const_generics(1)]
19756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19757pub const fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19758    static_assert_uimm_bits!(IMM8, 8);
19759    _mm512_rolv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
19760}
19761
19762/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19763///
19764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19765#[inline]
19766#[target_feature(enable = "avx512f")]
19767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19768#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19769#[rustc_legacy_const_generics(3)]
19770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19771pub const fn _mm512_mask_rol_epi64<const IMM8: i32>(
19772    src: __m512i,
19773    k: __mmask8,
19774    a: __m512i,
19775) -> __m512i {
19776    static_assert_uimm_bits!(IMM8, 8);
19777    _mm512_mask_rolv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
19778}
19779
19780/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19781///
19782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19783#[inline]
19784#[target_feature(enable = "avx512f")]
19785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19786#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19787#[rustc_legacy_const_generics(2)]
19788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19789pub const fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19790    static_assert_uimm_bits!(IMM8, 8);
19791    _mm512_maskz_rolv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
19792}
19793
19794/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19795///
19796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19797#[inline]
19798#[target_feature(enable = "avx512f,avx512vl")]
19799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19800#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19801#[rustc_legacy_const_generics(1)]
19802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19803pub const fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19804    static_assert_uimm_bits!(IMM8, 8);
19805    _mm256_rolv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
19806}
19807
19808/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19809///
19810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19811#[inline]
19812#[target_feature(enable = "avx512f,avx512vl")]
19813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19814#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19815#[rustc_legacy_const_generics(3)]
19816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19817pub const fn _mm256_mask_rol_epi64<const IMM8: i32>(
19818    src: __m256i,
19819    k: __mmask8,
19820    a: __m256i,
19821) -> __m256i {
19822    static_assert_uimm_bits!(IMM8, 8);
19823    _mm256_mask_rolv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
19824}
19825
19826/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19827///
19828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19829#[inline]
19830#[target_feature(enable = "avx512f,avx512vl")]
19831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19832#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19833#[rustc_legacy_const_generics(2)]
19834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19835pub const fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19836    static_assert_uimm_bits!(IMM8, 8);
19837    _mm256_maskz_rolv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
19838}
19839
19840/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19841///
19842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19843#[inline]
19844#[target_feature(enable = "avx512f,avx512vl")]
19845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19846#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19847#[rustc_legacy_const_generics(1)]
19848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19849pub const fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19850    static_assert_uimm_bits!(IMM8, 8);
19851    _mm_rolv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
19852}
19853
19854/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19855///
19856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19857#[inline]
19858#[target_feature(enable = "avx512f,avx512vl")]
19859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19860#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19861#[rustc_legacy_const_generics(3)]
19862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19863pub const fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19864    static_assert_uimm_bits!(IMM8, 8);
19865    _mm_mask_rolv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
19866}
19867
19868/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19869///
19870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19871#[inline]
19872#[target_feature(enable = "avx512f,avx512vl")]
19873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19874#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19875#[rustc_legacy_const_generics(2)]
19876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19877pub const fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19878    static_assert_uimm_bits!(IMM8, 8);
19879    _mm_maskz_rolv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
19880}
19881
19882/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19883///
19884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19885#[inline]
19886#[target_feature(enable = "avx512f")]
19887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19888#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19889#[rustc_legacy_const_generics(1)]
19890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19891pub const fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19892    static_assert_uimm_bits!(IMM8, 8);
19893    _mm512_rorv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
19894}
19895
19896/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19897///
19898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19899#[inline]
19900#[target_feature(enable = "avx512f")]
19901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19902#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19903#[rustc_legacy_const_generics(3)]
19904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19905pub const fn _mm512_mask_ror_epi64<const IMM8: i32>(
19906    src: __m512i,
19907    k: __mmask8,
19908    a: __m512i,
19909) -> __m512i {
19910    static_assert_uimm_bits!(IMM8, 8);
19911    _mm512_mask_rorv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
19912}
19913
19914/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19915///
19916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19917#[inline]
19918#[target_feature(enable = "avx512f")]
19919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19920#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19921#[rustc_legacy_const_generics(2)]
19922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19923pub const fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19924    static_assert_uimm_bits!(IMM8, 8);
19925    _mm512_maskz_rorv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
19926}
19927
19928/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19929///
19930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19931#[inline]
19932#[target_feature(enable = "avx512f,avx512vl")]
19933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19934#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19935#[rustc_legacy_const_generics(1)]
19936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19937pub const fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19938    static_assert_uimm_bits!(IMM8, 8);
19939    _mm256_rorv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
19940}
19941
19942/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19943///
19944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19945#[inline]
19946#[target_feature(enable = "avx512f,avx512vl")]
19947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19948#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19949#[rustc_legacy_const_generics(3)]
19950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19951pub const fn _mm256_mask_ror_epi64<const IMM8: i32>(
19952    src: __m256i,
19953    k: __mmask8,
19954    a: __m256i,
19955) -> __m256i {
19956    static_assert_uimm_bits!(IMM8, 8);
19957    _mm256_mask_rorv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
19958}
19959
19960/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19961///
19962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19963#[inline]
19964#[target_feature(enable = "avx512f,avx512vl")]
19965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19966#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19967#[rustc_legacy_const_generics(2)]
19968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19969pub const fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19970    static_assert_uimm_bits!(IMM8, 8);
19971    _mm256_maskz_rorv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
19972}
19973
19974/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19975///
19976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19977#[inline]
19978#[target_feature(enable = "avx512f,avx512vl")]
19979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19980#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19981#[rustc_legacy_const_generics(1)]
19982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19983pub const fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19984    static_assert_uimm_bits!(IMM8, 8);
19985    _mm_rorv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
19986}
19987
19988/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19989///
19990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19991#[inline]
19992#[target_feature(enable = "avx512f,avx512vl")]
19993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19994#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19995#[rustc_legacy_const_generics(3)]
19996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19997pub const fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19998    static_assert_uimm_bits!(IMM8, 8);
19999    _mm_mask_rorv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
20000}
20001
20002/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20003///
20004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
20005#[inline]
20006#[target_feature(enable = "avx512f,avx512vl")]
20007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20008#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
20009#[rustc_legacy_const_generics(2)]
20010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20011pub const fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
20012    static_assert_uimm_bits!(IMM8, 8);
20013    _mm_maskz_rorv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
20014}
20015
20016/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20017///
20018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
20019#[inline]
20020#[target_feature(enable = "avx512f")]
20021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20022#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20023#[rustc_legacy_const_generics(1)]
20024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20025pub const fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20026    unsafe {
20027        static_assert_uimm_bits!(IMM8, 8);
20028        if IMM8 >= 32 {
20029            _mm512_setzero_si512()
20030        } else {
20031            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
20032        }
20033    }
20034}
20035
20036/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20037///
20038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
20039#[inline]
20040#[target_feature(enable = "avx512f")]
20041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20042#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20043#[rustc_legacy_const_generics(3)]
20044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20045pub const fn _mm512_mask_slli_epi32<const IMM8: u32>(
20046    src: __m512i,
20047    k: __mmask16,
20048    a: __m512i,
20049) -> __m512i {
20050    unsafe {
20051        static_assert_uimm_bits!(IMM8, 8);
20052        let shf = if IMM8 >= 32 {
20053            u32x16::ZERO
20054        } else {
20055            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
20056        };
20057        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
20058    }
20059}
20060
20061/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20062///
20063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
20064#[inline]
20065#[target_feature(enable = "avx512f")]
20066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20067#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20068#[rustc_legacy_const_generics(2)]
20069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20070pub const fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20071    unsafe {
20072        static_assert_uimm_bits!(IMM8, 8);
20073        if IMM8 >= 32 {
20074            _mm512_setzero_si512()
20075        } else {
20076            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
20077            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
20078        }
20079    }
20080}
20081
20082/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20083///
20084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
20085#[inline]
20086#[target_feature(enable = "avx512f,avx512vl")]
20087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20088#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20089#[rustc_legacy_const_generics(3)]
20090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20091pub const fn _mm256_mask_slli_epi32<const IMM8: u32>(
20092    src: __m256i,
20093    k: __mmask8,
20094    a: __m256i,
20095) -> __m256i {
20096    unsafe {
20097        static_assert_uimm_bits!(IMM8, 8);
20098        let r = if IMM8 >= 32 {
20099            u32x8::ZERO
20100        } else {
20101            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
20102        };
20103        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
20104    }
20105}
20106
20107/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20108///
20109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
20110#[inline]
20111#[target_feature(enable = "avx512f,avx512vl")]
20112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20113#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20114#[rustc_legacy_const_generics(2)]
20115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20116pub const fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20117    unsafe {
20118        static_assert_uimm_bits!(IMM8, 8);
20119        if IMM8 >= 32 {
20120            _mm256_setzero_si256()
20121        } else {
20122            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
20123            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
20124        }
20125    }
20126}
20127
20128/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20129///
20130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
20131#[inline]
20132#[target_feature(enable = "avx512f,avx512vl")]
20133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20134#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20135#[rustc_legacy_const_generics(3)]
20136#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20137pub const fn _mm_mask_slli_epi32<const IMM8: u32>(
20138    src: __m128i,
20139    k: __mmask8,
20140    a: __m128i,
20141) -> __m128i {
20142    unsafe {
20143        static_assert_uimm_bits!(IMM8, 8);
20144        let r = if IMM8 >= 32 {
20145            u32x4::ZERO
20146        } else {
20147            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
20148        };
20149        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
20150    }
20151}
20152
20153/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20154///
20155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
20156#[inline]
20157#[target_feature(enable = "avx512f,avx512vl")]
20158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20159#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20160#[rustc_legacy_const_generics(2)]
20161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20162pub const fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20163    unsafe {
20164        static_assert_uimm_bits!(IMM8, 8);
20165        if IMM8 >= 32 {
20166            _mm_setzero_si128()
20167        } else {
20168            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
20169            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
20170        }
20171    }
20172}
20173
20174/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20175///
20176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
20177#[inline]
20178#[target_feature(enable = "avx512f")]
20179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20180#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20181#[rustc_legacy_const_generics(1)]
20182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20183pub const fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20184    unsafe {
20185        static_assert_uimm_bits!(IMM8, 8);
20186        if IMM8 >= 32 {
20187            _mm512_setzero_si512()
20188        } else {
20189            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
20190        }
20191    }
20192}
20193
20194/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20195///
20196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
20197#[inline]
20198#[target_feature(enable = "avx512f")]
20199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20200#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20201#[rustc_legacy_const_generics(3)]
20202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20203pub const fn _mm512_mask_srli_epi32<const IMM8: u32>(
20204    src: __m512i,
20205    k: __mmask16,
20206    a: __m512i,
20207) -> __m512i {
20208    unsafe {
20209        static_assert_uimm_bits!(IMM8, 8);
20210        let shf = if IMM8 >= 32 {
20211            u32x16::ZERO
20212        } else {
20213            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
20214        };
20215        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
20216    }
20217}
20218
20219/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20220///
20221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
20222#[inline]
20223#[target_feature(enable = "avx512f")]
20224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20225#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20226#[rustc_legacy_const_generics(2)]
20227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20228pub const fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20229    unsafe {
20230        static_assert_uimm_bits!(IMM8, 8);
20231        if IMM8 >= 32 {
20232            _mm512_setzero_si512()
20233        } else {
20234            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
20235            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
20236        }
20237    }
20238}
20239
20240/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20241///
20242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
20243#[inline]
20244#[target_feature(enable = "avx512f,avx512vl")]
20245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20246#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20247#[rustc_legacy_const_generics(3)]
20248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20249pub const fn _mm256_mask_srli_epi32<const IMM8: u32>(
20250    src: __m256i,
20251    k: __mmask8,
20252    a: __m256i,
20253) -> __m256i {
20254    unsafe {
20255        static_assert_uimm_bits!(IMM8, 8);
20256        let r = if IMM8 >= 32 {
20257            u32x8::ZERO
20258        } else {
20259            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
20260        };
20261        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
20262    }
20263}
20264
20265/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20266///
20267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
20268#[inline]
20269#[target_feature(enable = "avx512f,avx512vl")]
20270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20271#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20272#[rustc_legacy_const_generics(2)]
20273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20274pub const fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20275    unsafe {
20276        static_assert_uimm_bits!(IMM8, 8);
20277        if IMM8 >= 32 {
20278            _mm256_setzero_si256()
20279        } else {
20280            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
20281            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
20282        }
20283    }
20284}
20285
20286/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20287///
20288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
20289#[inline]
20290#[target_feature(enable = "avx512f,avx512vl")]
20291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20292#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20293#[rustc_legacy_const_generics(3)]
20294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20295pub const fn _mm_mask_srli_epi32<const IMM8: u32>(
20296    src: __m128i,
20297    k: __mmask8,
20298    a: __m128i,
20299) -> __m128i {
20300    unsafe {
20301        static_assert_uimm_bits!(IMM8, 8);
20302        let r = if IMM8 >= 32 {
20303            u32x4::ZERO
20304        } else {
20305            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
20306        };
20307        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
20308    }
20309}
20310
20311/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20312///
20313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
20314#[inline]
20315#[target_feature(enable = "avx512f,avx512vl")]
20316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20317#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20318#[rustc_legacy_const_generics(2)]
20319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20320pub const fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20321    unsafe {
20322        static_assert_uimm_bits!(IMM8, 8);
20323        if IMM8 >= 32 {
20324            _mm_setzero_si128()
20325        } else {
20326            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
20327            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
20328        }
20329    }
20330}
20331
20332/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20333///
20334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
20335#[inline]
20336#[target_feature(enable = "avx512f")]
20337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20338#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20339#[rustc_legacy_const_generics(1)]
20340#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20341pub const fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20342    unsafe {
20343        static_assert_uimm_bits!(IMM8, 8);
20344        if IMM8 >= 64 {
20345            _mm512_setzero_si512()
20346        } else {
20347            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20348        }
20349    }
20350}
20351
20352/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20353///
20354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
20355#[inline]
20356#[target_feature(enable = "avx512f")]
20357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20358#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20359#[rustc_legacy_const_generics(3)]
20360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20361pub const fn _mm512_mask_slli_epi64<const IMM8: u32>(
20362    src: __m512i,
20363    k: __mmask8,
20364    a: __m512i,
20365) -> __m512i {
20366    unsafe {
20367        static_assert_uimm_bits!(IMM8, 8);
20368        let shf = if IMM8 >= 64 {
20369            u64x8::ZERO
20370        } else {
20371            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20372        };
20373        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20374    }
20375}
20376
20377/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20378///
20379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
20380#[inline]
20381#[target_feature(enable = "avx512f")]
20382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20383#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20384#[rustc_legacy_const_generics(2)]
20385#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20386pub const fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20387    unsafe {
20388        static_assert_uimm_bits!(IMM8, 8);
20389        if IMM8 >= 64 {
20390            _mm512_setzero_si512()
20391        } else {
20392            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20393            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20394        }
20395    }
20396}
20397
20398/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20399///
20400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
20401#[inline]
20402#[target_feature(enable = "avx512f,avx512vl")]
20403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20404#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20405#[rustc_legacy_const_generics(3)]
20406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20407pub const fn _mm256_mask_slli_epi64<const IMM8: u32>(
20408    src: __m256i,
20409    k: __mmask8,
20410    a: __m256i,
20411) -> __m256i {
20412    unsafe {
20413        static_assert_uimm_bits!(IMM8, 8);
20414        let r = if IMM8 >= 64 {
20415            u64x4::ZERO
20416        } else {
20417            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20418        };
20419        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20420    }
20421}
20422
20423/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20424///
20425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
20426#[inline]
20427#[target_feature(enable = "avx512f,avx512vl")]
20428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20429#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20430#[rustc_legacy_const_generics(2)]
20431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20432pub const fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20433    unsafe {
20434        static_assert_uimm_bits!(IMM8, 8);
20435        if IMM8 >= 64 {
20436            _mm256_setzero_si256()
20437        } else {
20438            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20439            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20440        }
20441    }
20442}
20443
20444/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20445///
20446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20447#[inline]
20448#[target_feature(enable = "avx512f,avx512vl")]
20449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20450#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20451#[rustc_legacy_const_generics(3)]
20452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20453pub const fn _mm_mask_slli_epi64<const IMM8: u32>(
20454    src: __m128i,
20455    k: __mmask8,
20456    a: __m128i,
20457) -> __m128i {
20458    unsafe {
20459        static_assert_uimm_bits!(IMM8, 8);
20460        let r = if IMM8 >= 64 {
20461            u64x2::ZERO
20462        } else {
20463            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20464        };
20465        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20466    }
20467}
20468
20469/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20470///
20471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20472#[inline]
20473#[target_feature(enable = "avx512f,avx512vl")]
20474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20475#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20476#[rustc_legacy_const_generics(2)]
20477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20478pub const fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20479    unsafe {
20480        static_assert_uimm_bits!(IMM8, 8);
20481        if IMM8 >= 64 {
20482            _mm_setzero_si128()
20483        } else {
20484            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20485            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20486        }
20487    }
20488}
20489
20490/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20491///
20492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20493#[inline]
20494#[target_feature(enable = "avx512f")]
20495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20496#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20497#[rustc_legacy_const_generics(1)]
20498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20499pub const fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20500    unsafe {
20501        static_assert_uimm_bits!(IMM8, 8);
20502        if IMM8 >= 64 {
20503            _mm512_setzero_si512()
20504        } else {
20505            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20506        }
20507    }
20508}
20509
20510/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20511///
20512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20513#[inline]
20514#[target_feature(enable = "avx512f")]
20515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20516#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20517#[rustc_legacy_const_generics(3)]
20518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20519pub const fn _mm512_mask_srli_epi64<const IMM8: u32>(
20520    src: __m512i,
20521    k: __mmask8,
20522    a: __m512i,
20523) -> __m512i {
20524    unsafe {
20525        static_assert_uimm_bits!(IMM8, 8);
20526        let shf = if IMM8 >= 64 {
20527            u64x8::ZERO
20528        } else {
20529            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20530        };
20531        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20532    }
20533}
20534
20535/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20536///
20537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20538#[inline]
20539#[target_feature(enable = "avx512f")]
20540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20541#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20542#[rustc_legacy_const_generics(2)]
20543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20544pub const fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20545    unsafe {
20546        static_assert_uimm_bits!(IMM8, 8);
20547        if IMM8 >= 64 {
20548            _mm512_setzero_si512()
20549        } else {
20550            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20551            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20552        }
20553    }
20554}
20555
20556/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20559#[inline]
20560#[target_feature(enable = "avx512f,avx512vl")]
20561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20562#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20563#[rustc_legacy_const_generics(3)]
20564#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20565pub const fn _mm256_mask_srli_epi64<const IMM8: u32>(
20566    src: __m256i,
20567    k: __mmask8,
20568    a: __m256i,
20569) -> __m256i {
20570    unsafe {
20571        static_assert_uimm_bits!(IMM8, 8);
20572        let r = if IMM8 >= 64 {
20573            u64x4::ZERO
20574        } else {
20575            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20576        };
20577        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20578    }
20579}
20580
20581/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20582///
20583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20584#[inline]
20585#[target_feature(enable = "avx512f,avx512vl")]
20586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20587#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20588#[rustc_legacy_const_generics(2)]
20589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20590pub const fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20591    unsafe {
20592        static_assert_uimm_bits!(IMM8, 8);
20593        if IMM8 >= 64 {
20594            _mm256_setzero_si256()
20595        } else {
20596            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20597            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20598        }
20599    }
20600}
20601
20602/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20603///
20604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20605#[inline]
20606#[target_feature(enable = "avx512f,avx512vl")]
20607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20608#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20609#[rustc_legacy_const_generics(3)]
20610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20611pub const fn _mm_mask_srli_epi64<const IMM8: u32>(
20612    src: __m128i,
20613    k: __mmask8,
20614    a: __m128i,
20615) -> __m128i {
20616    unsafe {
20617        static_assert_uimm_bits!(IMM8, 8);
20618        let r = if IMM8 >= 64 {
20619            u64x2::ZERO
20620        } else {
20621            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20622        };
20623        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20624    }
20625}
20626
20627/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20628///
20629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20630#[inline]
20631#[target_feature(enable = "avx512f,avx512vl")]
20632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20633#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20634#[rustc_legacy_const_generics(2)]
20635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20636pub const fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20637    unsafe {
20638        static_assert_uimm_bits!(IMM8, 8);
20639        if IMM8 >= 64 {
20640            _mm_setzero_si128()
20641        } else {
20642            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20643            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20644        }
20645    }
20646}
20647
20648/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20649///
20650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20651#[inline]
20652#[target_feature(enable = "avx512f")]
20653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20654#[cfg_attr(test, assert_instr(vpslld))]
20655pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20656    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20657}
20658
20659/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20660///
20661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20662#[inline]
20663#[target_feature(enable = "avx512f")]
20664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20665#[cfg_attr(test, assert_instr(vpslld))]
20666pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20667    unsafe {
20668        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20669        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20670    }
20671}
20672
20673/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20674///
20675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20676#[inline]
20677#[target_feature(enable = "avx512f")]
20678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20679#[cfg_attr(test, assert_instr(vpslld))]
20680pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20681    unsafe {
20682        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20683        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20684    }
20685}
20686
20687/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20688///
20689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20690#[inline]
20691#[target_feature(enable = "avx512f,avx512vl")]
20692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20693#[cfg_attr(test, assert_instr(vpslld))]
20694pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20695    unsafe {
20696        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20697        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20698    }
20699}
20700
20701/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20702///
20703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20704#[inline]
20705#[target_feature(enable = "avx512f,avx512vl")]
20706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20707#[cfg_attr(test, assert_instr(vpslld))]
20708pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20709    unsafe {
20710        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20711        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20712    }
20713}
20714
20715/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20716///
20717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20718#[inline]
20719#[target_feature(enable = "avx512f,avx512vl")]
20720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20721#[cfg_attr(test, assert_instr(vpslld))]
20722pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20723    unsafe {
20724        let shf = _mm_sll_epi32(a, count).as_i32x4();
20725        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20726    }
20727}
20728
20729/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20730///
20731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20732#[inline]
20733#[target_feature(enable = "avx512f,avx512vl")]
20734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20735#[cfg_attr(test, assert_instr(vpslld))]
20736pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737    unsafe {
20738        let shf = _mm_sll_epi32(a, count).as_i32x4();
20739        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20740    }
20741}
20742
20743/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20744///
20745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20746#[inline]
20747#[target_feature(enable = "avx512f")]
20748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20749#[cfg_attr(test, assert_instr(vpsrld))]
20750pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20751    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20752}
20753
20754/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20755///
20756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20757#[inline]
20758#[target_feature(enable = "avx512f")]
20759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20760#[cfg_attr(test, assert_instr(vpsrld))]
20761pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20762    unsafe {
20763        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20764        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20765    }
20766}
20767
20768/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20769///
20770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20771#[inline]
20772#[target_feature(enable = "avx512f")]
20773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20774#[cfg_attr(test, assert_instr(vpsrld))]
20775pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20776    unsafe {
20777        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20778        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20779    }
20780}
20781
20782/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20783///
20784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20785#[inline]
20786#[target_feature(enable = "avx512f,avx512vl")]
20787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20788#[cfg_attr(test, assert_instr(vpsrld))]
20789pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20790    unsafe {
20791        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20792        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20793    }
20794}
20795
20796/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20797///
20798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20799#[inline]
20800#[target_feature(enable = "avx512f,avx512vl")]
20801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20802#[cfg_attr(test, assert_instr(vpsrld))]
20803pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20804    unsafe {
20805        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20806        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20807    }
20808}
20809
20810/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20811///
20812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20813#[inline]
20814#[target_feature(enable = "avx512f,avx512vl")]
20815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20816#[cfg_attr(test, assert_instr(vpsrld))]
20817pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20818    unsafe {
20819        let shf = _mm_srl_epi32(a, count).as_i32x4();
20820        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20821    }
20822}
20823
20824/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20825///
20826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20827#[inline]
20828#[target_feature(enable = "avx512f,avx512vl")]
20829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20830#[cfg_attr(test, assert_instr(vpsrld))]
20831pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20832    unsafe {
20833        let shf = _mm_srl_epi32(a, count).as_i32x4();
20834        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20835    }
20836}
20837
20838/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20839///
20840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20841#[inline]
20842#[target_feature(enable = "avx512f")]
20843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20844#[cfg_attr(test, assert_instr(vpsllq))]
20845pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20846    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20847}
20848
20849/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20850///
20851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20852#[inline]
20853#[target_feature(enable = "avx512f")]
20854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20855#[cfg_attr(test, assert_instr(vpsllq))]
20856pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20857    unsafe {
20858        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20859        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20860    }
20861}
20862
20863/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20864///
20865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20866#[inline]
20867#[target_feature(enable = "avx512f")]
20868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20869#[cfg_attr(test, assert_instr(vpsllq))]
20870pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20871    unsafe {
20872        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20873        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20874    }
20875}
20876
20877/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20878///
20879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20880#[inline]
20881#[target_feature(enable = "avx512f,avx512vl")]
20882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20883#[cfg_attr(test, assert_instr(vpsllq))]
20884pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20885    unsafe {
20886        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20887        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20888    }
20889}
20890
20891/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20892///
20893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20894#[inline]
20895#[target_feature(enable = "avx512f,avx512vl")]
20896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20897#[cfg_attr(test, assert_instr(vpsllq))]
20898pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20899    unsafe {
20900        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20901        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20902    }
20903}
20904
20905/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20906///
20907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20908#[inline]
20909#[target_feature(enable = "avx512f,avx512vl")]
20910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20911#[cfg_attr(test, assert_instr(vpsllq))]
20912pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20913    unsafe {
20914        let shf = _mm_sll_epi64(a, count).as_i64x2();
20915        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20916    }
20917}
20918
20919/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20920///
20921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20922#[inline]
20923#[target_feature(enable = "avx512f,avx512vl")]
20924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20925#[cfg_attr(test, assert_instr(vpsllq))]
20926pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20927    unsafe {
20928        let shf = _mm_sll_epi64(a, count).as_i64x2();
20929        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20930    }
20931}
20932
20933/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20934///
20935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20936#[inline]
20937#[target_feature(enable = "avx512f")]
20938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20939#[cfg_attr(test, assert_instr(vpsrlq))]
20940pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20941    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20942}
20943
20944/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20945///
20946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20947#[inline]
20948#[target_feature(enable = "avx512f")]
20949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20950#[cfg_attr(test, assert_instr(vpsrlq))]
20951pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20952    unsafe {
20953        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20954        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20955    }
20956}
20957
20958/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20959///
20960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20961#[inline]
20962#[target_feature(enable = "avx512f")]
20963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20964#[cfg_attr(test, assert_instr(vpsrlq))]
20965pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20966    unsafe {
20967        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20968        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20969    }
20970}
20971
20972/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20973///
20974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20975#[inline]
20976#[target_feature(enable = "avx512f,avx512vl")]
20977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20978#[cfg_attr(test, assert_instr(vpsrlq))]
20979pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20980    unsafe {
20981        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20982        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20983    }
20984}
20985
20986/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20987///
20988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20989#[inline]
20990#[target_feature(enable = "avx512f,avx512vl")]
20991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20992#[cfg_attr(test, assert_instr(vpsrlq))]
20993pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20994    unsafe {
20995        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20996        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20997    }
20998}
20999
21000/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21001///
21002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
21003#[inline]
21004#[target_feature(enable = "avx512f,avx512vl")]
21005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21006#[cfg_attr(test, assert_instr(vpsrlq))]
21007pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21008    unsafe {
21009        let shf = _mm_srl_epi64(a, count).as_i64x2();
21010        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21011    }
21012}
21013
21014/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21015///
21016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
21017#[inline]
21018#[target_feature(enable = "avx512f,avx512vl")]
21019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21020#[cfg_attr(test, assert_instr(vpsrlq))]
21021pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21022    unsafe {
21023        let shf = _mm_srl_epi64(a, count).as_i64x2();
21024        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21025    }
21026}
21027
21028/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21029///
21030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
21031#[inline]
21032#[target_feature(enable = "avx512f")]
21033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21034#[cfg_attr(test, assert_instr(vpsrad))]
21035pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
21036    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
21037}
21038
21039/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21040///
21041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
21042#[inline]
21043#[target_feature(enable = "avx512f")]
21044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21045#[cfg_attr(test, assert_instr(vpsrad))]
21046pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21047    unsafe {
21048        let shf = _mm512_sra_epi32(a, count).as_i32x16();
21049        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21050    }
21051}
21052
21053/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21054///
21055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
21056#[inline]
21057#[target_feature(enable = "avx512f")]
21058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21059#[cfg_attr(test, assert_instr(vpsrad))]
21060pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21061    unsafe {
21062        let shf = _mm512_sra_epi32(a, count).as_i32x16();
21063        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21064    }
21065}
21066
21067/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21068///
21069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
21070#[inline]
21071#[target_feature(enable = "avx512f,avx512vl")]
21072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21073#[cfg_attr(test, assert_instr(vpsrad))]
21074pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21075    unsafe {
21076        let shf = _mm256_sra_epi32(a, count).as_i32x8();
21077        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21078    }
21079}
21080
21081/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21082///
21083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
21084#[inline]
21085#[target_feature(enable = "avx512f,avx512vl")]
21086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21087#[cfg_attr(test, assert_instr(vpsrad))]
21088pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21089    unsafe {
21090        let shf = _mm256_sra_epi32(a, count).as_i32x8();
21091        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21092    }
21093}
21094
21095/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21096///
21097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
21098#[inline]
21099#[target_feature(enable = "avx512f,avx512vl")]
21100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21101#[cfg_attr(test, assert_instr(vpsrad))]
21102pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21103    unsafe {
21104        let shf = _mm_sra_epi32(a, count).as_i32x4();
21105        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21106    }
21107}
21108
21109/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21110///
21111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
21112#[inline]
21113#[target_feature(enable = "avx512f,avx512vl")]
21114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21115#[cfg_attr(test, assert_instr(vpsrad))]
21116pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21117    unsafe {
21118        let shf = _mm_sra_epi32(a, count).as_i32x4();
21119        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21120    }
21121}
21122
21123/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21124///
21125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
21126#[inline]
21127#[target_feature(enable = "avx512f")]
21128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21129#[cfg_attr(test, assert_instr(vpsraq))]
21130pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
21131    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
21132}
21133
21134/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21135///
21136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
21137#[inline]
21138#[target_feature(enable = "avx512f")]
21139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21140#[cfg_attr(test, assert_instr(vpsraq))]
21141pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21142    unsafe {
21143        let shf = _mm512_sra_epi64(a, count).as_i64x8();
21144        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21145    }
21146}
21147
21148/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21149///
21150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
21151#[inline]
21152#[target_feature(enable = "avx512f")]
21153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21154#[cfg_attr(test, assert_instr(vpsraq))]
21155pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21156    unsafe {
21157        let shf = _mm512_sra_epi64(a, count).as_i64x8();
21158        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21159    }
21160}
21161
21162/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21163///
21164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
21165#[inline]
21166#[target_feature(enable = "avx512f,avx512vl")]
21167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21168#[cfg_attr(test, assert_instr(vpsraq))]
21169pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
21170    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
21171}
21172
21173/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21174///
21175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
21176#[inline]
21177#[target_feature(enable = "avx512f,avx512vl")]
21178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21179#[cfg_attr(test, assert_instr(vpsraq))]
21180pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21181    unsafe {
21182        let shf = _mm256_sra_epi64(a, count).as_i64x4();
21183        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21184    }
21185}
21186
21187/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21188///
21189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
21190#[inline]
21191#[target_feature(enable = "avx512f,avx512vl")]
21192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21193#[cfg_attr(test, assert_instr(vpsraq))]
21194pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21195    unsafe {
21196        let shf = _mm256_sra_epi64(a, count).as_i64x4();
21197        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21198    }
21199}
21200
21201/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21202///
21203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
21204#[inline]
21205#[target_feature(enable = "avx512f,avx512vl")]
21206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21207#[cfg_attr(test, assert_instr(vpsraq))]
21208pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
21209    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
21210}
21211
21212/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21213///
21214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
21215#[inline]
21216#[target_feature(enable = "avx512f,avx512vl")]
21217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21218#[cfg_attr(test, assert_instr(vpsraq))]
21219pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21220    unsafe {
21221        let shf = _mm_sra_epi64(a, count).as_i64x2();
21222        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21223    }
21224}
21225
21226/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21227///
21228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
21229#[inline]
21230#[target_feature(enable = "avx512f,avx512vl")]
21231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21232#[cfg_attr(test, assert_instr(vpsraq))]
21233pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21234    unsafe {
21235        let shf = _mm_sra_epi64(a, count).as_i64x2();
21236        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21237    }
21238}
21239
21240/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21241///
21242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
21243#[inline]
21244#[target_feature(enable = "avx512f")]
21245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21246#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21247#[rustc_legacy_const_generics(1)]
21248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21249pub const fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
21250    unsafe {
21251        static_assert_uimm_bits!(IMM8, 8);
21252        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
21253    }
21254}
21255
21256/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21257///
21258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
21259#[inline]
21260#[target_feature(enable = "avx512f")]
21261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21262#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21263#[rustc_legacy_const_generics(3)]
21264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21265pub const fn _mm512_mask_srai_epi32<const IMM8: u32>(
21266    src: __m512i,
21267    k: __mmask16,
21268    a: __m512i,
21269) -> __m512i {
21270    unsafe {
21271        static_assert_uimm_bits!(IMM8, 8);
21272        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
21273        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
21274    }
21275}
21276
21277/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21278///
21279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
21280#[inline]
21281#[target_feature(enable = "avx512f")]
21282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21283#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21284#[rustc_legacy_const_generics(2)]
21285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21286pub const fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
21287    unsafe {
21288        static_assert_uimm_bits!(IMM8, 8);
21289        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
21290        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
21291    }
21292}
21293
21294/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21295///
21296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
21297#[inline]
21298#[target_feature(enable = "avx512f,avx512vl")]
21299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21300#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21301#[rustc_legacy_const_generics(3)]
21302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21303pub const fn _mm256_mask_srai_epi32<const IMM8: u32>(
21304    src: __m256i,
21305    k: __mmask8,
21306    a: __m256i,
21307) -> __m256i {
21308    unsafe {
21309        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
21310        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
21311    }
21312}
21313
21314/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21315///
21316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
21317#[inline]
21318#[target_feature(enable = "avx512f,avx512vl")]
21319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21320#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21321#[rustc_legacy_const_generics(2)]
21322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21323pub const fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21324    unsafe {
21325        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
21326        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
21327    }
21328}
21329
21330/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21331///
21332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
21333#[inline]
21334#[target_feature(enable = "avx512f,avx512vl")]
21335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21336#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21337#[rustc_legacy_const_generics(3)]
21338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21339pub const fn _mm_mask_srai_epi32<const IMM8: u32>(
21340    src: __m128i,
21341    k: __mmask8,
21342    a: __m128i,
21343) -> __m128i {
21344    unsafe {
21345        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
21346        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
21347    }
21348}
21349
21350/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21351///
21352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
21353#[inline]
21354#[target_feature(enable = "avx512f,avx512vl")]
21355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21356#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21357#[rustc_legacy_const_generics(2)]
21358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21359pub const fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21360    unsafe {
21361        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
21362        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
21363    }
21364}
21365
21366/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21367///
21368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
21369#[inline]
21370#[target_feature(enable = "avx512f")]
21371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21372#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21373#[rustc_legacy_const_generics(1)]
21374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21375pub const fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
21376    unsafe {
21377        static_assert_uimm_bits!(IMM8, 8);
21378        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
21379    }
21380}
21381
21382/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21383///
21384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
21385#[inline]
21386#[target_feature(enable = "avx512f")]
21387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21388#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21389#[rustc_legacy_const_generics(3)]
21390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21391pub const fn _mm512_mask_srai_epi64<const IMM8: u32>(
21392    src: __m512i,
21393    k: __mmask8,
21394    a: __m512i,
21395) -> __m512i {
21396    unsafe {
21397        static_assert_uimm_bits!(IMM8, 8);
21398        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
21399        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21400    }
21401}
21402
21403/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21404///
21405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
21406#[inline]
21407#[target_feature(enable = "avx512f")]
21408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21409#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21410#[rustc_legacy_const_generics(2)]
21411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21412pub const fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
21413    unsafe {
21414        static_assert_uimm_bits!(IMM8, 8);
21415        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
21416        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21417    }
21418}
21419
21420/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21421///
21422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
21423#[inline]
21424#[target_feature(enable = "avx512f,avx512vl")]
21425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21426#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21427#[rustc_legacy_const_generics(1)]
21428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21429pub const fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
21430    unsafe {
21431        static_assert_uimm_bits!(IMM8, 8);
21432        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
21433    }
21434}
21435
21436/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21437///
21438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
21439#[inline]
21440#[target_feature(enable = "avx512f,avx512vl")]
21441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21442#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21443#[rustc_legacy_const_generics(3)]
21444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21445pub const fn _mm256_mask_srai_epi64<const IMM8: u32>(
21446    src: __m256i,
21447    k: __mmask8,
21448    a: __m256i,
21449) -> __m256i {
21450    unsafe {
21451        static_assert_uimm_bits!(IMM8, 8);
21452        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21453        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21454    }
21455}
21456
21457/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21458///
21459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
21460#[inline]
21461#[target_feature(enable = "avx512f,avx512vl")]
21462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21463#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21464#[rustc_legacy_const_generics(2)]
21465#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21466pub const fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21467    unsafe {
21468        static_assert_uimm_bits!(IMM8, 8);
21469        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21470        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21471    }
21472}
21473
21474/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21475///
21476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
21477#[inline]
21478#[target_feature(enable = "avx512f,avx512vl")]
21479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21480#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21481#[rustc_legacy_const_generics(1)]
21482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21483pub const fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
21484    unsafe {
21485        static_assert_uimm_bits!(IMM8, 8);
21486        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
21487    }
21488}
21489
21490/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
21493#[inline]
21494#[target_feature(enable = "avx512f,avx512vl")]
21495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21496#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21497#[rustc_legacy_const_generics(3)]
21498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21499pub const fn _mm_mask_srai_epi64<const IMM8: u32>(
21500    src: __m128i,
21501    k: __mmask8,
21502    a: __m128i,
21503) -> __m128i {
21504    unsafe {
21505        static_assert_uimm_bits!(IMM8, 8);
21506        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21507        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21508    }
21509}
21510
21511/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21512///
21513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21514#[inline]
21515#[target_feature(enable = "avx512f,avx512vl")]
21516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21517#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21518#[rustc_legacy_const_generics(2)]
21519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21520pub const fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21521    unsafe {
21522        static_assert_uimm_bits!(IMM8, 8);
21523        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21524        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21525    }
21526}
21527
21528/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21529///
21530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21531#[inline]
21532#[target_feature(enable = "avx512f")]
21533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21534#[cfg_attr(test, assert_instr(vpsravd))]
21535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21536pub const fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21537    unsafe {
21538        let count = count.as_u32x16();
21539        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
21540        let count = simd_select(no_overflow, transmute(count), i32x16::splat(31));
21541        simd_shr(a.as_i32x16(), count).as_m512i()
21542    }
21543}
21544
21545/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21546///
21547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21548#[inline]
21549#[target_feature(enable = "avx512f")]
21550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21551#[cfg_attr(test, assert_instr(vpsravd))]
21552#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21553pub const fn _mm512_mask_srav_epi32(
21554    src: __m512i,
21555    k: __mmask16,
21556    a: __m512i,
21557    count: __m512i,
21558) -> __m512i {
21559    unsafe {
21560        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21561        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21562    }
21563}
21564
21565/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21566///
21567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21568#[inline]
21569#[target_feature(enable = "avx512f")]
21570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21571#[cfg_attr(test, assert_instr(vpsravd))]
21572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21573pub const fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21574    unsafe {
21575        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21576        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21577    }
21578}
21579
21580/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21581///
21582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21583#[inline]
21584#[target_feature(enable = "avx512f,avx512vl")]
21585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21586#[cfg_attr(test, assert_instr(vpsravd))]
21587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21588pub const fn _mm256_mask_srav_epi32(
21589    src: __m256i,
21590    k: __mmask8,
21591    a: __m256i,
21592    count: __m256i,
21593) -> __m256i {
21594    unsafe {
21595        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21596        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21597    }
21598}
21599
21600/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21601///
21602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21603#[inline]
21604#[target_feature(enable = "avx512f,avx512vl")]
21605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21606#[cfg_attr(test, assert_instr(vpsravd))]
21607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21608pub const fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21609    unsafe {
21610        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21611        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21612    }
21613}
21614
21615/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21616///
21617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21618#[inline]
21619#[target_feature(enable = "avx512f,avx512vl")]
21620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21621#[cfg_attr(test, assert_instr(vpsravd))]
21622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21623pub const fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21624    unsafe {
21625        let shf = _mm_srav_epi32(a, count).as_i32x4();
21626        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21627    }
21628}
21629
21630/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21631///
21632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21633#[inline]
21634#[target_feature(enable = "avx512f,avx512vl")]
21635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21636#[cfg_attr(test, assert_instr(vpsravd))]
21637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21638pub const fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21639    unsafe {
21640        let shf = _mm_srav_epi32(a, count).as_i32x4();
21641        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21642    }
21643}
21644
21645/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21646///
21647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21648#[inline]
21649#[target_feature(enable = "avx512f")]
21650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21651#[cfg_attr(test, assert_instr(vpsravq))]
21652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21653pub const fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21654    unsafe {
21655        let count = count.as_u64x8();
21656        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
21657        let count = simd_select(no_overflow, transmute(count), i64x8::splat(63));
21658        simd_shr(a.as_i64x8(), count).as_m512i()
21659    }
21660}
21661
21662/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21663///
21664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21665#[inline]
21666#[target_feature(enable = "avx512f")]
21667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21668#[cfg_attr(test, assert_instr(vpsravq))]
21669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21670pub const fn _mm512_mask_srav_epi64(
21671    src: __m512i,
21672    k: __mmask8,
21673    a: __m512i,
21674    count: __m512i,
21675) -> __m512i {
21676    unsafe {
21677        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21678        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21679    }
21680}
21681
21682/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21683///
21684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21685#[inline]
21686#[target_feature(enable = "avx512f")]
21687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21688#[cfg_attr(test, assert_instr(vpsravq))]
21689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21690pub const fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21691    unsafe {
21692        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21693        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21694    }
21695}
21696
21697/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21698///
21699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21700#[inline]
21701#[target_feature(enable = "avx512f,avx512vl")]
21702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21703#[cfg_attr(test, assert_instr(vpsravq))]
21704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21705pub const fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21706    unsafe {
21707        let count = count.as_u64x4();
21708        let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
21709        let count = simd_select(no_overflow, transmute(count), i64x4::splat(63));
21710        simd_shr(a.as_i64x4(), count).as_m256i()
21711    }
21712}
21713
21714/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21715///
21716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21717#[inline]
21718#[target_feature(enable = "avx512f,avx512vl")]
21719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21720#[cfg_attr(test, assert_instr(vpsravq))]
21721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21722pub const fn _mm256_mask_srav_epi64(
21723    src: __m256i,
21724    k: __mmask8,
21725    a: __m256i,
21726    count: __m256i,
21727) -> __m256i {
21728    unsafe {
21729        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21730        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21731    }
21732}
21733
21734/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21735///
21736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21737#[inline]
21738#[target_feature(enable = "avx512f,avx512vl")]
21739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21740#[cfg_attr(test, assert_instr(vpsravq))]
21741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21742pub const fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21743    unsafe {
21744        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21745        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21746    }
21747}
21748
21749/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21750///
21751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21752#[inline]
21753#[target_feature(enable = "avx512f,avx512vl")]
21754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21755#[cfg_attr(test, assert_instr(vpsravq))]
21756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21757pub const fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21758    unsafe {
21759        let count = count.as_u64x2();
21760        let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
21761        let count = simd_select(no_overflow, transmute(count), i64x2::splat(63));
21762        simd_shr(a.as_i64x2(), count).as_m128i()
21763    }
21764}
21765
21766/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21769#[inline]
21770#[target_feature(enable = "avx512f,avx512vl")]
21771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21772#[cfg_attr(test, assert_instr(vpsravq))]
21773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21774pub const fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21775    unsafe {
21776        let shf = _mm_srav_epi64(a, count).as_i64x2();
21777        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21778    }
21779}
21780
21781/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21782///
21783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21784#[inline]
21785#[target_feature(enable = "avx512f,avx512vl")]
21786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21787#[cfg_attr(test, assert_instr(vpsravq))]
21788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21789pub const fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21790    unsafe {
21791        let shf = _mm_srav_epi64(a, count).as_i64x2();
21792        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21793    }
21794}
21795
21796/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21797///
21798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21799#[inline]
21800#[target_feature(enable = "avx512f")]
21801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21802#[cfg_attr(test, assert_instr(vprolvd))]
21803#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21804pub const fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21805    unsafe {
21806        transmute(simd_funnel_shl(
21807            a.as_u32x16(),
21808            a.as_u32x16(),
21809            simd_and(b.as_u32x16(), u32x16::splat(31)),
21810        ))
21811    }
21812}
21813
21814/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21815///
21816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21817#[inline]
21818#[target_feature(enable = "avx512f")]
21819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21820#[cfg_attr(test, assert_instr(vprolvd))]
21821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21822pub const fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21823    unsafe {
21824        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21825        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21826    }
21827}
21828
21829/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21830///
21831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21832#[inline]
21833#[target_feature(enable = "avx512f")]
21834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21835#[cfg_attr(test, assert_instr(vprolvd))]
21836#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21837pub const fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21838    unsafe {
21839        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21840        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21841    }
21842}
21843
21844/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21845///
21846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21847#[inline]
21848#[target_feature(enable = "avx512f,avx512vl")]
21849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21850#[cfg_attr(test, assert_instr(vprolvd))]
21851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21852pub const fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21853    unsafe {
21854        transmute(simd_funnel_shl(
21855            a.as_u32x8(),
21856            a.as_u32x8(),
21857            simd_and(b.as_u32x8(), u32x8::splat(31)),
21858        ))
21859    }
21860}
21861
21862/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21863///
21864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21865#[inline]
21866#[target_feature(enable = "avx512f,avx512vl")]
21867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21868#[cfg_attr(test, assert_instr(vprolvd))]
21869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21870pub const fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21871    unsafe {
21872        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21873        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21874    }
21875}
21876
21877/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21878///
21879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21880#[inline]
21881#[target_feature(enable = "avx512f,avx512vl")]
21882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21883#[cfg_attr(test, assert_instr(vprolvd))]
21884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21885pub const fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21886    unsafe {
21887        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21888        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21889    }
21890}
21891
21892/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21893///
21894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21895#[inline]
21896#[target_feature(enable = "avx512f,avx512vl")]
21897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21898#[cfg_attr(test, assert_instr(vprolvd))]
21899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21900pub const fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21901    unsafe {
21902        transmute(simd_funnel_shl(
21903            a.as_u32x4(),
21904            a.as_u32x4(),
21905            simd_and(b.as_u32x4(), u32x4::splat(31)),
21906        ))
21907    }
21908}
21909
21910/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21911///
21912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21913#[inline]
21914#[target_feature(enable = "avx512f,avx512vl")]
21915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21916#[cfg_attr(test, assert_instr(vprolvd))]
21917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21918pub const fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21919    unsafe {
21920        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21921        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21922    }
21923}
21924
21925/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21926///
21927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21928#[inline]
21929#[target_feature(enable = "avx512f,avx512vl")]
21930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21931#[cfg_attr(test, assert_instr(vprolvd))]
21932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21933pub const fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21934    unsafe {
21935        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21936        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21937    }
21938}
21939
21940/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21941///
21942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21943#[inline]
21944#[target_feature(enable = "avx512f")]
21945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21946#[cfg_attr(test, assert_instr(vprorvd))]
21947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21948pub const fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21949    unsafe {
21950        transmute(simd_funnel_shr(
21951            a.as_u32x16(),
21952            a.as_u32x16(),
21953            simd_and(b.as_u32x16(), u32x16::splat(31)),
21954        ))
21955    }
21956}
21957
21958/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21959///
21960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21961#[inline]
21962#[target_feature(enable = "avx512f")]
21963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21964#[cfg_attr(test, assert_instr(vprorvd))]
21965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21966pub const fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21967    unsafe {
21968        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21969        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21970    }
21971}
21972
21973/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21974///
21975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21976#[inline]
21977#[target_feature(enable = "avx512f")]
21978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21979#[cfg_attr(test, assert_instr(vprorvd))]
21980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21981pub const fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21982    unsafe {
21983        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21984        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21985    }
21986}
21987
21988/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21989///
21990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21991#[inline]
21992#[target_feature(enable = "avx512f,avx512vl")]
21993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21994#[cfg_attr(test, assert_instr(vprorvd))]
21995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21996pub const fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21997    unsafe {
21998        transmute(simd_funnel_shr(
21999            a.as_u32x8(),
22000            a.as_u32x8(),
22001            simd_and(b.as_u32x8(), u32x8::splat(31)),
22002        ))
22003    }
22004}
22005
22006/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22007///
22008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
22009#[inline]
22010#[target_feature(enable = "avx512f,avx512vl")]
22011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22012#[cfg_attr(test, assert_instr(vprorvd))]
22013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22014pub const fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22015    unsafe {
22016        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
22017        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
22018    }
22019}
22020
22021/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22022///
22023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
22024#[inline]
22025#[target_feature(enable = "avx512f,avx512vl")]
22026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22027#[cfg_attr(test, assert_instr(vprorvd))]
22028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22029pub const fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22030    unsafe {
22031        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
22032        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
22033    }
22034}
22035
22036/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22037///
22038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
22039#[inline]
22040#[target_feature(enable = "avx512f,avx512vl")]
22041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22042#[cfg_attr(test, assert_instr(vprorvd))]
22043#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22044pub const fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
22045    unsafe {
22046        transmute(simd_funnel_shr(
22047            a.as_u32x4(),
22048            a.as_u32x4(),
22049            simd_and(b.as_u32x4(), u32x4::splat(31)),
22050        ))
22051    }
22052}
22053
22054/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22055///
22056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
22057#[inline]
22058#[target_feature(enable = "avx512f,avx512vl")]
22059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22060#[cfg_attr(test, assert_instr(vprorvd))]
22061#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22062pub const fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22063    unsafe {
22064        let ror = _mm_rorv_epi32(a, b).as_i32x4();
22065        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
22066    }
22067}
22068
22069/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22070///
22071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
22072#[inline]
22073#[target_feature(enable = "avx512f,avx512vl")]
22074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22075#[cfg_attr(test, assert_instr(vprorvd))]
22076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22077pub const fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22078    unsafe {
22079        let ror = _mm_rorv_epi32(a, b).as_i32x4();
22080        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
22081    }
22082}
22083
22084/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22085///
22086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
22087#[inline]
22088#[target_feature(enable = "avx512f")]
22089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22090#[cfg_attr(test, assert_instr(vprolvq))]
22091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22092pub const fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
22093    unsafe {
22094        transmute(simd_funnel_shl(
22095            a.as_u64x8(),
22096            a.as_u64x8(),
22097            simd_and(b.as_u64x8(), u64x8::splat(63)),
22098        ))
22099    }
22100}
22101
22102/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22103///
22104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
22105#[inline]
22106#[target_feature(enable = "avx512f")]
22107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22108#[cfg_attr(test, assert_instr(vprolvq))]
22109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22110pub const fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22111    unsafe {
22112        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
22113        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
22114    }
22115}
22116
22117/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118///
22119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
22120#[inline]
22121#[target_feature(enable = "avx512f")]
22122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22123#[cfg_attr(test, assert_instr(vprolvq))]
22124#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22125pub const fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22126    unsafe {
22127        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
22128        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
22129    }
22130}
22131
22132/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22133///
22134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
22135#[inline]
22136#[target_feature(enable = "avx512f,avx512vl")]
22137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22138#[cfg_attr(test, assert_instr(vprolvq))]
22139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22140pub const fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
22141    unsafe {
22142        transmute(simd_funnel_shl(
22143            a.as_u64x4(),
22144            a.as_u64x4(),
22145            simd_and(b.as_u64x4(), u64x4::splat(63)),
22146        ))
22147    }
22148}
22149
22150/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22151///
22152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
22153#[inline]
22154#[target_feature(enable = "avx512f,avx512vl")]
22155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22156#[cfg_attr(test, assert_instr(vprolvq))]
22157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22158pub const fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22159    unsafe {
22160        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
22161        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
22162    }
22163}
22164
22165/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22166///
22167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
22168#[inline]
22169#[target_feature(enable = "avx512f,avx512vl")]
22170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22171#[cfg_attr(test, assert_instr(vprolvq))]
22172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22173pub const fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22174    unsafe {
22175        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
22176        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
22177    }
22178}
22179
22180/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22181///
22182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
22183#[inline]
22184#[target_feature(enable = "avx512f,avx512vl")]
22185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22186#[cfg_attr(test, assert_instr(vprolvq))]
22187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22188pub const fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
22189    unsafe {
22190        transmute(simd_funnel_shl(
22191            a.as_u64x2(),
22192            a.as_u64x2(),
22193            simd_and(b.as_u64x2(), u64x2::splat(63)),
22194        ))
22195    }
22196}
22197
22198/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22199///
22200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
22201#[inline]
22202#[target_feature(enable = "avx512f,avx512vl")]
22203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22204#[cfg_attr(test, assert_instr(vprolvq))]
22205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22206pub const fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22207    unsafe {
22208        let rol = _mm_rolv_epi64(a, b).as_i64x2();
22209        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
22210    }
22211}
22212
22213/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22214///
22215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
22216#[inline]
22217#[target_feature(enable = "avx512f,avx512vl")]
22218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22219#[cfg_attr(test, assert_instr(vprolvq))]
22220#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22221pub const fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22222    unsafe {
22223        let rol = _mm_rolv_epi64(a, b).as_i64x2();
22224        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
22225    }
22226}
22227
22228/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22229///
22230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
22231#[inline]
22232#[target_feature(enable = "avx512f")]
22233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22234#[cfg_attr(test, assert_instr(vprorvq))]
22235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22236pub const fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
22237    unsafe {
22238        transmute(simd_funnel_shr(
22239            a.as_u64x8(),
22240            a.as_u64x8(),
22241            simd_and(b.as_u64x8(), u64x8::splat(63)),
22242        ))
22243    }
22244}
22245
22246/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22247///
22248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
22249#[inline]
22250#[target_feature(enable = "avx512f")]
22251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22252#[cfg_attr(test, assert_instr(vprorvq))]
22253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22254pub const fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22255    unsafe {
22256        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
22257        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
22258    }
22259}
22260
22261/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22262///
22263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
22264#[inline]
22265#[target_feature(enable = "avx512f")]
22266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22267#[cfg_attr(test, assert_instr(vprorvq))]
22268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22269pub const fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22270    unsafe {
22271        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
22272        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
22273    }
22274}
22275
22276/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22277///
22278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
22279#[inline]
22280#[target_feature(enable = "avx512f,avx512vl")]
22281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22282#[cfg_attr(test, assert_instr(vprorvq))]
22283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22284pub const fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
22285    unsafe {
22286        transmute(simd_funnel_shr(
22287            a.as_u64x4(),
22288            a.as_u64x4(),
22289            simd_and(b.as_u64x4(), u64x4::splat(63)),
22290        ))
22291    }
22292}
22293
22294/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22295///
22296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
22297#[inline]
22298#[target_feature(enable = "avx512f,avx512vl")]
22299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22300#[cfg_attr(test, assert_instr(vprorvq))]
22301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22302pub const fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22303    unsafe {
22304        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
22305        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
22306    }
22307}
22308
22309/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22310///
22311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
22312#[inline]
22313#[target_feature(enable = "avx512f,avx512vl")]
22314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22315#[cfg_attr(test, assert_instr(vprorvq))]
22316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22317pub const fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22318    unsafe {
22319        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
22320        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
22321    }
22322}
22323
22324/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22325///
22326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
22327#[inline]
22328#[target_feature(enable = "avx512f,avx512vl")]
22329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22330#[cfg_attr(test, assert_instr(vprorvq))]
22331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22332pub const fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
22333    unsafe {
22334        transmute(simd_funnel_shr(
22335            a.as_u64x2(),
22336            a.as_u64x2(),
22337            simd_and(b.as_u64x2(), u64x2::splat(63)),
22338        ))
22339    }
22340}
22341
22342/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22343///
22344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
22345#[inline]
22346#[target_feature(enable = "avx512f,avx512vl")]
22347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22348#[cfg_attr(test, assert_instr(vprorvq))]
22349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22350pub const fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22351    unsafe {
22352        let ror = _mm_rorv_epi64(a, b).as_i64x2();
22353        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
22354    }
22355}
22356
22357/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22358///
22359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
22360#[inline]
22361#[target_feature(enable = "avx512f,avx512vl")]
22362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22363#[cfg_attr(test, assert_instr(vprorvq))]
22364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22365pub const fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22366    unsafe {
22367        let ror = _mm_rorv_epi64(a, b).as_i64x2();
22368        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
22369    }
22370}
22371
22372/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22373///
22374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
22375#[inline]
22376#[target_feature(enable = "avx512f")]
22377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22378#[cfg_attr(test, assert_instr(vpsllvd))]
22379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22380pub const fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
22381    unsafe {
22382        let count = count.as_u32x16();
22383        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
22384        let count = simd_select(no_overflow, count, u32x16::ZERO);
22385        simd_select(no_overflow, simd_shl(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
22386    }
22387}
22388
22389/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22390///
22391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
22392#[inline]
22393#[target_feature(enable = "avx512f")]
22394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22395#[cfg_attr(test, assert_instr(vpsllvd))]
22396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22397pub const fn _mm512_mask_sllv_epi32(
22398    src: __m512i,
22399    k: __mmask16,
22400    a: __m512i,
22401    count: __m512i,
22402) -> __m512i {
22403    unsafe {
22404        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
22405        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
22406    }
22407}
22408
22409/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22410///
22411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
22412#[inline]
22413#[target_feature(enable = "avx512f")]
22414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22415#[cfg_attr(test, assert_instr(vpsllvd))]
22416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22417pub const fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22418    unsafe {
22419        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
22420        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
22421    }
22422}
22423
22424/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22425///
22426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
22427#[inline]
22428#[target_feature(enable = "avx512f,avx512vl")]
22429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22430#[cfg_attr(test, assert_instr(vpsllvd))]
22431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22432pub const fn _mm256_mask_sllv_epi32(
22433    src: __m256i,
22434    k: __mmask8,
22435    a: __m256i,
22436    count: __m256i,
22437) -> __m256i {
22438    unsafe {
22439        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
22440        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
22441    }
22442}
22443
22444/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22445///
22446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
22447#[inline]
22448#[target_feature(enable = "avx512f,avx512vl")]
22449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22450#[cfg_attr(test, assert_instr(vpsllvd))]
22451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22452pub const fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22453    unsafe {
22454        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
22455        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
22456    }
22457}
22458
22459/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22460///
22461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
22462#[inline]
22463#[target_feature(enable = "avx512f,avx512vl")]
22464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22465#[cfg_attr(test, assert_instr(vpsllvd))]
22466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22467pub const fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22468    unsafe {
22469        let shf = _mm_sllv_epi32(a, count).as_i32x4();
22470        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
22471    }
22472}
22473
22474/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22475///
22476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
22477#[inline]
22478#[target_feature(enable = "avx512f,avx512vl")]
22479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22480#[cfg_attr(test, assert_instr(vpsllvd))]
22481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22482pub const fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22483    unsafe {
22484        let shf = _mm_sllv_epi32(a, count).as_i32x4();
22485        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
22486    }
22487}
22488
22489/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22490///
22491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
22492#[inline]
22493#[target_feature(enable = "avx512f")]
22494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22495#[cfg_attr(test, assert_instr(vpsrlvd))]
22496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22497pub const fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
22498    unsafe {
22499        let count = count.as_u32x16();
22500        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
22501        let count = simd_select(no_overflow, count, u32x16::ZERO);
22502        simd_select(no_overflow, simd_shr(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
22503    }
22504}
22505
22506/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22507///
22508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
22509#[inline]
22510#[target_feature(enable = "avx512f")]
22511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22512#[cfg_attr(test, assert_instr(vpsrlvd))]
22513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22514pub const fn _mm512_mask_srlv_epi32(
22515    src: __m512i,
22516    k: __mmask16,
22517    a: __m512i,
22518    count: __m512i,
22519) -> __m512i {
22520    unsafe {
22521        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
22522        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
22523    }
22524}
22525
22526/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22527///
22528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
22529#[inline]
22530#[target_feature(enable = "avx512f")]
22531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22532#[cfg_attr(test, assert_instr(vpsrlvd))]
22533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22534pub const fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22535    unsafe {
22536        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
22537        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
22538    }
22539}
22540
22541/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22542///
22543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
22544#[inline]
22545#[target_feature(enable = "avx512f,avx512vl")]
22546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22547#[cfg_attr(test, assert_instr(vpsrlvd))]
22548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22549pub const fn _mm256_mask_srlv_epi32(
22550    src: __m256i,
22551    k: __mmask8,
22552    a: __m256i,
22553    count: __m256i,
22554) -> __m256i {
22555    unsafe {
22556        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
22557        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
22558    }
22559}
22560
22561/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22562///
22563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
22564#[inline]
22565#[target_feature(enable = "avx512f,avx512vl")]
22566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22567#[cfg_attr(test, assert_instr(vpsrlvd))]
22568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22569pub const fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22570    unsafe {
22571        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
22572        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
22573    }
22574}
22575
22576/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22577///
22578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
22579#[inline]
22580#[target_feature(enable = "avx512f,avx512vl")]
22581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22582#[cfg_attr(test, assert_instr(vpsrlvd))]
22583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22584pub const fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22585    unsafe {
22586        let shf = _mm_srlv_epi32(a, count).as_i32x4();
22587        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
22588    }
22589}
22590
22591/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22592///
22593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
22594#[inline]
22595#[target_feature(enable = "avx512f,avx512vl")]
22596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22597#[cfg_attr(test, assert_instr(vpsrlvd))]
22598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22599pub const fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22600    unsafe {
22601        let shf = _mm_srlv_epi32(a, count).as_i32x4();
22602        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
22603    }
22604}
22605
22606/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22607///
22608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
22609#[inline]
22610#[target_feature(enable = "avx512f")]
22611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22612#[cfg_attr(test, assert_instr(vpsllvq))]
22613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22614pub const fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
22615    unsafe {
22616        let count = count.as_u64x8();
22617        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
22618        let count = simd_select(no_overflow, count, u64x8::ZERO);
22619        simd_select(no_overflow, simd_shl(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
22620    }
22621}
22622
22623/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22624///
22625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
22626#[inline]
22627#[target_feature(enable = "avx512f")]
22628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22629#[cfg_attr(test, assert_instr(vpsllvq))]
22630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22631pub const fn _mm512_mask_sllv_epi64(
22632    src: __m512i,
22633    k: __mmask8,
22634    a: __m512i,
22635    count: __m512i,
22636) -> __m512i {
22637    unsafe {
22638        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
22639        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22640    }
22641}
22642
22643/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22644///
22645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
22646#[inline]
22647#[target_feature(enable = "avx512f")]
22648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22649#[cfg_attr(test, assert_instr(vpsllvq))]
22650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22651pub const fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22652    unsafe {
22653        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
22654        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22655    }
22656}
22657
22658/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22659///
22660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
22661#[inline]
22662#[target_feature(enable = "avx512f,avx512vl")]
22663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22664#[cfg_attr(test, assert_instr(vpsllvq))]
22665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22666pub const fn _mm256_mask_sllv_epi64(
22667    src: __m256i,
22668    k: __mmask8,
22669    a: __m256i,
22670    count: __m256i,
22671) -> __m256i {
22672    unsafe {
22673        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
22674        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22675    }
22676}
22677
22678/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22679///
22680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
22681#[inline]
22682#[target_feature(enable = "avx512f,avx512vl")]
22683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22684#[cfg_attr(test, assert_instr(vpsllvq))]
22685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22686pub const fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22687    unsafe {
22688        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
22689        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22690    }
22691}
22692
22693/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22694///
22695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
22696#[inline]
22697#[target_feature(enable = "avx512f,avx512vl")]
22698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22699#[cfg_attr(test, assert_instr(vpsllvq))]
22700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22701pub const fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22702    unsafe {
22703        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22704        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22705    }
22706}
22707
22708/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22709///
22710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
22711#[inline]
22712#[target_feature(enable = "avx512f,avx512vl")]
22713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22714#[cfg_attr(test, assert_instr(vpsllvq))]
22715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22716pub const fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22717    unsafe {
22718        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22719        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22720    }
22721}
22722
22723/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22724///
22725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22726#[inline]
22727#[target_feature(enable = "avx512f")]
22728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22729#[cfg_attr(test, assert_instr(vpsrlvq))]
22730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22731pub const fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22732    unsafe {
22733        let count = count.as_u64x8();
22734        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
22735        let count = simd_select(no_overflow, count, u64x8::ZERO);
22736        simd_select(no_overflow, simd_shr(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
22737    }
22738}
22739
22740/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22741///
22742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22743#[inline]
22744#[target_feature(enable = "avx512f")]
22745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22746#[cfg_attr(test, assert_instr(vpsrlvq))]
22747#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22748pub const fn _mm512_mask_srlv_epi64(
22749    src: __m512i,
22750    k: __mmask8,
22751    a: __m512i,
22752    count: __m512i,
22753) -> __m512i {
22754    unsafe {
22755        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22756        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22757    }
22758}
22759
22760/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22761///
22762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22763#[inline]
22764#[target_feature(enable = "avx512f")]
22765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22766#[cfg_attr(test, assert_instr(vpsrlvq))]
22767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22768pub const fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22769    unsafe {
22770        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22771        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22772    }
22773}
22774
22775/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22778#[inline]
22779#[target_feature(enable = "avx512f,avx512vl")]
22780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22781#[cfg_attr(test, assert_instr(vpsrlvq))]
22782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22783pub const fn _mm256_mask_srlv_epi64(
22784    src: __m256i,
22785    k: __mmask8,
22786    a: __m256i,
22787    count: __m256i,
22788) -> __m256i {
22789    unsafe {
22790        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22791        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22792    }
22793}
22794
22795/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22796///
22797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22798#[inline]
22799#[target_feature(enable = "avx512f,avx512vl")]
22800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22801#[cfg_attr(test, assert_instr(vpsrlvq))]
22802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22803pub const fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22804    unsafe {
22805        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22806        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22807    }
22808}
22809
22810/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22811///
22812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22813#[inline]
22814#[target_feature(enable = "avx512f,avx512vl")]
22815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22816#[cfg_attr(test, assert_instr(vpsrlvq))]
22817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22818pub const fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22819    unsafe {
22820        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22821        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22822    }
22823}
22824
22825/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22826///
22827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22828#[inline]
22829#[target_feature(enable = "avx512f,avx512vl")]
22830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22831#[cfg_attr(test, assert_instr(vpsrlvq))]
22832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22833pub const fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22834    unsafe {
22835        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22836        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22837    }
22838}
22839
22840/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22841///
22842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22843#[inline]
22844#[target_feature(enable = "avx512f")]
22845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22846#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22847#[rustc_legacy_const_generics(1)]
22848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22849pub const fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22850    unsafe {
22851        static_assert_uimm_bits!(MASK, 8);
22852        simd_shuffle!(
22853            a,
22854            a,
22855            [
22856                MASK as u32 & 0b11,
22857                (MASK as u32 >> 2) & 0b11,
22858                ((MASK as u32 >> 4) & 0b11),
22859                ((MASK as u32 >> 6) & 0b11),
22860                (MASK as u32 & 0b11) + 4,
22861                ((MASK as u32 >> 2) & 0b11) + 4,
22862                ((MASK as u32 >> 4) & 0b11) + 4,
22863                ((MASK as u32 >> 6) & 0b11) + 4,
22864                (MASK as u32 & 0b11) + 8,
22865                ((MASK as u32 >> 2) & 0b11) + 8,
22866                ((MASK as u32 >> 4) & 0b11) + 8,
22867                ((MASK as u32 >> 6) & 0b11) + 8,
22868                (MASK as u32 & 0b11) + 12,
22869                ((MASK as u32 >> 2) & 0b11) + 12,
22870                ((MASK as u32 >> 4) & 0b11) + 12,
22871                ((MASK as u32 >> 6) & 0b11) + 12,
22872            ],
22873        )
22874    }
22875}
22876
22877/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22878///
22879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22880#[inline]
22881#[target_feature(enable = "avx512f")]
22882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22883#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22884#[rustc_legacy_const_generics(3)]
22885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22886pub const fn _mm512_mask_permute_ps<const MASK: i32>(
22887    src: __m512,
22888    k: __mmask16,
22889    a: __m512,
22890) -> __m512 {
22891    unsafe {
22892        static_assert_uimm_bits!(MASK, 8);
22893        let r = _mm512_permute_ps::<MASK>(a);
22894        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22895    }
22896}
22897
22898/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22899///
22900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22901#[inline]
22902#[target_feature(enable = "avx512f")]
22903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22904#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22905#[rustc_legacy_const_generics(2)]
22906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22907pub const fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22908    unsafe {
22909        static_assert_uimm_bits!(MASK, 8);
22910        let r = _mm512_permute_ps::<MASK>(a);
22911        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22912    }
22913}
22914
22915/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22916///
22917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22918#[inline]
22919#[target_feature(enable = "avx512f,avx512vl")]
22920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22921#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22922#[rustc_legacy_const_generics(3)]
22923#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22924pub const fn _mm256_mask_permute_ps<const MASK: i32>(
22925    src: __m256,
22926    k: __mmask8,
22927    a: __m256,
22928) -> __m256 {
22929    unsafe {
22930        let r = _mm256_permute_ps::<MASK>(a);
22931        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22932    }
22933}
22934
22935/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22936///
22937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22938#[inline]
22939#[target_feature(enable = "avx512f,avx512vl")]
22940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22941#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22942#[rustc_legacy_const_generics(2)]
22943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22944pub const fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22945    unsafe {
22946        let r = _mm256_permute_ps::<MASK>(a);
22947        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22948    }
22949}
22950
22951/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22954#[inline]
22955#[target_feature(enable = "avx512f,avx512vl")]
22956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22957#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22958#[rustc_legacy_const_generics(3)]
22959#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22960pub const fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22961    unsafe {
22962        let r = _mm_permute_ps::<MASK>(a);
22963        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22964    }
22965}
22966
22967/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22968///
22969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22970#[inline]
22971#[target_feature(enable = "avx512f,avx512vl")]
22972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22973#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22974#[rustc_legacy_const_generics(2)]
22975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22976pub const fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22977    unsafe {
22978        let r = _mm_permute_ps::<MASK>(a);
22979        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22980    }
22981}
22982
22983/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22984///
22985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22986#[inline]
22987#[target_feature(enable = "avx512f")]
22988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22989#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22990#[rustc_legacy_const_generics(1)]
22991#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22992pub const fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22993    unsafe {
22994        static_assert_uimm_bits!(MASK, 8);
22995        simd_shuffle!(
22996            a,
22997            a,
22998            [
22999                MASK as u32 & 0b1,
23000                ((MASK as u32 >> 1) & 0b1),
23001                ((MASK as u32 >> 2) & 0b1) + 2,
23002                ((MASK as u32 >> 3) & 0b1) + 2,
23003                ((MASK as u32 >> 4) & 0b1) + 4,
23004                ((MASK as u32 >> 5) & 0b1) + 4,
23005                ((MASK as u32 >> 6) & 0b1) + 6,
23006                ((MASK as u32 >> 7) & 0b1) + 6,
23007            ],
23008        )
23009    }
23010}
23011
23012/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23013///
23014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
23015#[inline]
23016#[target_feature(enable = "avx512f")]
23017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23018#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
23019#[rustc_legacy_const_generics(3)]
23020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23021pub const fn _mm512_mask_permute_pd<const MASK: i32>(
23022    src: __m512d,
23023    k: __mmask8,
23024    a: __m512d,
23025) -> __m512d {
23026    unsafe {
23027        static_assert_uimm_bits!(MASK, 8);
23028        let r = _mm512_permute_pd::<MASK>(a);
23029        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
23030    }
23031}
23032
23033/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23034///
23035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
23036#[inline]
23037#[target_feature(enable = "avx512f")]
23038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23039#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
23040#[rustc_legacy_const_generics(2)]
23041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23042pub const fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23043    unsafe {
23044        static_assert_uimm_bits!(MASK, 8);
23045        let r = _mm512_permute_pd::<MASK>(a);
23046        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
23047    }
23048}
23049
23050/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23051///
23052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
23053#[inline]
23054#[target_feature(enable = "avx512f,avx512vl")]
23055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23056#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
23057#[rustc_legacy_const_generics(3)]
23058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23059pub const fn _mm256_mask_permute_pd<const MASK: i32>(
23060    src: __m256d,
23061    k: __mmask8,
23062    a: __m256d,
23063) -> __m256d {
23064    unsafe {
23065        static_assert_uimm_bits!(MASK, 4);
23066        let r = _mm256_permute_pd::<MASK>(a);
23067        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
23068    }
23069}
23070
23071/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23072///
23073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
23074#[inline]
23075#[target_feature(enable = "avx512f,avx512vl")]
23076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23077#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
23078#[rustc_legacy_const_generics(2)]
23079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23080pub const fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23081    unsafe {
23082        static_assert_uimm_bits!(MASK, 4);
23083        let r = _mm256_permute_pd::<MASK>(a);
23084        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
23085    }
23086}
23087
23088/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23089///
23090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
23091#[inline]
23092#[target_feature(enable = "avx512f,avx512vl")]
23093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23094#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
23095#[rustc_legacy_const_generics(3)]
23096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23097pub const fn _mm_mask_permute_pd<const IMM2: i32>(
23098    src: __m128d,
23099    k: __mmask8,
23100    a: __m128d,
23101) -> __m128d {
23102    unsafe {
23103        static_assert_uimm_bits!(IMM2, 2);
23104        let r = _mm_permute_pd::<IMM2>(a);
23105        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
23106    }
23107}
23108
23109/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23110///
23111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
23112#[inline]
23113#[target_feature(enable = "avx512f,avx512vl")]
23114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23115#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
23116#[rustc_legacy_const_generics(2)]
23117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23118pub const fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
23119    unsafe {
23120        static_assert_uimm_bits!(IMM2, 2);
23121        let r = _mm_permute_pd::<IMM2>(a);
23122        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
23123    }
23124}
23125
23126/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23127///
23128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
23129#[inline]
23130#[target_feature(enable = "avx512f")]
23131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23132#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23133#[rustc_legacy_const_generics(1)]
23134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23135pub const fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
23136    unsafe {
23137        static_assert_uimm_bits!(MASK, 8);
23138        simd_shuffle!(
23139            a,
23140            a,
23141            [
23142                MASK as u32 & 0b11,
23143                (MASK as u32 >> 2) & 0b11,
23144                ((MASK as u32 >> 4) & 0b11),
23145                ((MASK as u32 >> 6) & 0b11),
23146                (MASK as u32 & 0b11) + 4,
23147                ((MASK as u32 >> 2) & 0b11) + 4,
23148                ((MASK as u32 >> 4) & 0b11) + 4,
23149                ((MASK as u32 >> 6) & 0b11) + 4,
23150            ],
23151        )
23152    }
23153}
23154
23155/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23156///
23157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
23158#[inline]
23159#[target_feature(enable = "avx512f")]
23160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23161#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23162#[rustc_legacy_const_generics(3)]
23163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23164pub const fn _mm512_mask_permutex_epi64<const MASK: i32>(
23165    src: __m512i,
23166    k: __mmask8,
23167    a: __m512i,
23168) -> __m512i {
23169    unsafe {
23170        static_assert_uimm_bits!(MASK, 8);
23171        let r = _mm512_permutex_epi64::<MASK>(a);
23172        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
23173    }
23174}
23175
23176/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23177///
23178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
23179#[inline]
23180#[target_feature(enable = "avx512f")]
23181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23182#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23183#[rustc_legacy_const_generics(2)]
23184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23185pub const fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
23186    unsafe {
23187        static_assert_uimm_bits!(MASK, 8);
23188        let r = _mm512_permutex_epi64::<MASK>(a);
23189        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
23190    }
23191}
23192
23193/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23194///
23195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
23196#[inline]
23197#[target_feature(enable = "avx512f,avx512vl")]
23198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23199#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23200#[rustc_legacy_const_generics(1)]
23201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23202pub const fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
23203    unsafe {
23204        static_assert_uimm_bits!(MASK, 8);
23205        simd_shuffle!(
23206            a,
23207            a,
23208            [
23209                MASK as u32 & 0b11,
23210                (MASK as u32 >> 2) & 0b11,
23211                ((MASK as u32 >> 4) & 0b11),
23212                ((MASK as u32 >> 6) & 0b11),
23213            ],
23214        )
23215    }
23216}
23217
23218/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23219///
23220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
23221#[inline]
23222#[target_feature(enable = "avx512f,avx512vl")]
23223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23224#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23225#[rustc_legacy_const_generics(3)]
23226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23227pub const fn _mm256_mask_permutex_epi64<const MASK: i32>(
23228    src: __m256i,
23229    k: __mmask8,
23230    a: __m256i,
23231) -> __m256i {
23232    unsafe {
23233        static_assert_uimm_bits!(MASK, 8);
23234        let r = _mm256_permutex_epi64::<MASK>(a);
23235        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
23236    }
23237}
23238
23239/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23240///
23241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
23242#[inline]
23243#[target_feature(enable = "avx512f,avx512vl")]
23244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23245#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23246#[rustc_legacy_const_generics(2)]
23247#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23248pub const fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
23249    unsafe {
23250        static_assert_uimm_bits!(MASK, 8);
23251        let r = _mm256_permutex_epi64::<MASK>(a);
23252        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
23253    }
23254}
23255
23256/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23257///
23258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
23259#[inline]
23260#[target_feature(enable = "avx512f")]
23261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23262#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23263#[rustc_legacy_const_generics(1)]
23264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23265pub const fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
23266    unsafe {
23267        static_assert_uimm_bits!(MASK, 8);
23268        simd_shuffle!(
23269            a,
23270            a,
23271            [
23272                MASK as u32 & 0b11,
23273                (MASK as u32 >> 2) & 0b11,
23274                ((MASK as u32 >> 4) & 0b11),
23275                ((MASK as u32 >> 6) & 0b11),
23276                (MASK as u32 & 0b11) + 4,
23277                ((MASK as u32 >> 2) & 0b11) + 4,
23278                ((MASK as u32 >> 4) & 0b11) + 4,
23279                ((MASK as u32 >> 6) & 0b11) + 4,
23280            ],
23281        )
23282    }
23283}
23284
23285/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23286///
23287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
23288#[inline]
23289#[target_feature(enable = "avx512f")]
23290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23291#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23292#[rustc_legacy_const_generics(3)]
23293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23294pub const fn _mm512_mask_permutex_pd<const MASK: i32>(
23295    src: __m512d,
23296    k: __mmask8,
23297    a: __m512d,
23298) -> __m512d {
23299    unsafe {
23300        let r = _mm512_permutex_pd::<MASK>(a);
23301        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
23302    }
23303}
23304
23305/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23306///
23307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
23308#[inline]
23309#[target_feature(enable = "avx512f")]
23310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23311#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23312#[rustc_legacy_const_generics(2)]
23313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23314pub const fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23315    unsafe {
23316        let r = _mm512_permutex_pd::<MASK>(a);
23317        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
23318    }
23319}
23320
23321/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23322///
23323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
23324#[inline]
23325#[target_feature(enable = "avx512f,avx512vl")]
23326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23327#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23328#[rustc_legacy_const_generics(1)]
23329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23330pub const fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
23331    unsafe {
23332        static_assert_uimm_bits!(MASK, 8);
23333        simd_shuffle!(
23334            a,
23335            a,
23336            [
23337                MASK as u32 & 0b11,
23338                (MASK as u32 >> 2) & 0b11,
23339                ((MASK as u32 >> 4) & 0b11),
23340                ((MASK as u32 >> 6) & 0b11),
23341            ],
23342        )
23343    }
23344}
23345
23346/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23347///
23348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
23349#[inline]
23350#[target_feature(enable = "avx512f,avx512vl")]
23351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23352#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23353#[rustc_legacy_const_generics(3)]
23354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23355pub const fn _mm256_mask_permutex_pd<const MASK: i32>(
23356    src: __m256d,
23357    k: __mmask8,
23358    a: __m256d,
23359) -> __m256d {
23360    unsafe {
23361        static_assert_uimm_bits!(MASK, 8);
23362        let r = _mm256_permutex_pd::<MASK>(a);
23363        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
23364    }
23365}
23366
23367/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23368///
23369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
23370#[inline]
23371#[target_feature(enable = "avx512f,avx512vl")]
23372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23373#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23374#[rustc_legacy_const_generics(2)]
23375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23376pub const fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23377    unsafe {
23378        static_assert_uimm_bits!(MASK, 8);
23379        let r = _mm256_permutex_pd::<MASK>(a);
23380        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
23381    }
23382}
23383
23384/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23385///
23386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
23387#[inline]
23388#[target_feature(enable = "avx512f")]
23389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23390#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23391pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23392    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
23393}
23394
23395/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23396///
23397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
23398#[inline]
23399#[target_feature(enable = "avx512f")]
23400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23401#[cfg_attr(test, assert_instr(vpermd))]
23402pub fn _mm512_mask_permutevar_epi32(
23403    src: __m512i,
23404    k: __mmask16,
23405    idx: __m512i,
23406    a: __m512i,
23407) -> __m512i {
23408    unsafe {
23409        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
23410        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
23411    }
23412}
23413
23414/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23415///
23416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
23417#[inline]
23418#[target_feature(enable = "avx512f")]
23419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23420#[cfg_attr(test, assert_instr(vpermilps))]
23421pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
23422    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
23423}
23424
23425/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23426///
23427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
23428#[inline]
23429#[target_feature(enable = "avx512f")]
23430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23431#[cfg_attr(test, assert_instr(vpermilps))]
23432pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23433    unsafe {
23434        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
23435        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23436    }
23437}
23438
23439/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23440///
23441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
23442#[inline]
23443#[target_feature(enable = "avx512f")]
23444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23445#[cfg_attr(test, assert_instr(vpermilps))]
23446pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23447    unsafe {
23448        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
23449        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23450    }
23451}
23452
23453/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23454///
23455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
23456#[inline]
23457#[target_feature(enable = "avx512f,avx512vl")]
23458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23459#[cfg_attr(test, assert_instr(vpermilps))]
23460pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23461    unsafe {
23462        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
23463        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23464    }
23465}
23466
23467/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23468///
23469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
23470#[inline]
23471#[target_feature(enable = "avx512f,avx512vl")]
23472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23473#[cfg_attr(test, assert_instr(vpermilps))]
23474pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23475    unsafe {
23476        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
23477        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23478    }
23479}
23480
23481/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23482///
23483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
23484#[inline]
23485#[target_feature(enable = "avx512f,avx512vl")]
23486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23487#[cfg_attr(test, assert_instr(vpermilps))]
23488pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23489    unsafe {
23490        let permute = _mm_permutevar_ps(a, b).as_f32x4();
23491        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
23492    }
23493}
23494
23495/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23496///
23497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
23498#[inline]
23499#[target_feature(enable = "avx512f,avx512vl")]
23500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23501#[cfg_attr(test, assert_instr(vpermilps))]
23502pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23503    unsafe {
23504        let permute = _mm_permutevar_ps(a, b).as_f32x4();
23505        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23506    }
23507}
23508
23509/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23510///
23511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
23512#[inline]
23513#[target_feature(enable = "avx512f")]
23514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23515#[cfg_attr(test, assert_instr(vpermilpd))]
23516pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
23517    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
23518}
23519
23520/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23521///
23522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
23523#[inline]
23524#[target_feature(enable = "avx512f")]
23525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23526#[cfg_attr(test, assert_instr(vpermilpd))]
23527pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23528    unsafe {
23529        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
23530        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23531    }
23532}
23533
23534/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23535///
23536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
23537#[inline]
23538#[target_feature(enable = "avx512f")]
23539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23540#[cfg_attr(test, assert_instr(vpermilpd))]
23541pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23542    unsafe {
23543        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
23544        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23545    }
23546}
23547
23548/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23549///
23550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
23551#[inline]
23552#[target_feature(enable = "avx512f,avx512vl")]
23553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23554#[cfg_attr(test, assert_instr(vpermilpd))]
23555pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23556    unsafe {
23557        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
23558        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23559    }
23560}
23561
23562/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23563///
23564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
23565#[inline]
23566#[target_feature(enable = "avx512f,avx512vl")]
23567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23568#[cfg_attr(test, assert_instr(vpermilpd))]
23569pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23570    unsafe {
23571        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
23572        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23573    }
23574}
23575
23576/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23577///
23578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
23579#[inline]
23580#[target_feature(enable = "avx512f,avx512vl")]
23581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23582#[cfg_attr(test, assert_instr(vpermilpd))]
23583pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23584    unsafe {
23585        let permute = _mm_permutevar_pd(a, b).as_f64x2();
23586        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
23587    }
23588}
23589
23590/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23591///
23592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
23593#[inline]
23594#[target_feature(enable = "avx512f,avx512vl")]
23595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23596#[cfg_attr(test, assert_instr(vpermilpd))]
23597pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23598    unsafe {
23599        let permute = _mm_permutevar_pd(a, b).as_f64x2();
23600        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23601    }
23602}
23603
23604/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23605///
23606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
23607#[inline]
23608#[target_feature(enable = "avx512f")]
23609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23610#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23611pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23612    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
23613}
23614
23615/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23616///
23617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
23618#[inline]
23619#[target_feature(enable = "avx512f")]
23620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23621#[cfg_attr(test, assert_instr(vpermd))]
23622pub fn _mm512_mask_permutexvar_epi32(
23623    src: __m512i,
23624    k: __mmask16,
23625    idx: __m512i,
23626    a: __m512i,
23627) -> __m512i {
23628    unsafe {
23629        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23630        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
23631    }
23632}
23633
23634/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23635///
23636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
23637#[inline]
23638#[target_feature(enable = "avx512f")]
23639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23640#[cfg_attr(test, assert_instr(vpermd))]
23641pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
23642    unsafe {
23643        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23644        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23645    }
23646}
23647
23648/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23649///
23650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
23651#[inline]
23652#[target_feature(enable = "avx512f,avx512vl")]
23653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23654#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23655pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
23656    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
23657}
23658
23659/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23660///
23661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
23662#[inline]
23663#[target_feature(enable = "avx512f,avx512vl")]
23664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23665#[cfg_attr(test, assert_instr(vpermd))]
23666pub fn _mm256_mask_permutexvar_epi32(
23667    src: __m256i,
23668    k: __mmask8,
23669    idx: __m256i,
23670    a: __m256i,
23671) -> __m256i {
23672    unsafe {
23673        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23674        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
23675    }
23676}
23677
23678/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23679///
23680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
23681#[inline]
23682#[target_feature(enable = "avx512f,avx512vl")]
23683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23684#[cfg_attr(test, assert_instr(vpermd))]
23685pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23686    unsafe {
23687        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23688        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23689    }
23690}
23691
23692/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23693///
23694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
23695#[inline]
23696#[target_feature(enable = "avx512f")]
23697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23698#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23699pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
23700    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
23701}
23702
23703/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23704///
23705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
23706#[inline]
23707#[target_feature(enable = "avx512f")]
23708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23709#[cfg_attr(test, assert_instr(vpermq))]
23710pub fn _mm512_mask_permutexvar_epi64(
23711    src: __m512i,
23712    k: __mmask8,
23713    idx: __m512i,
23714    a: __m512i,
23715) -> __m512i {
23716    unsafe {
23717        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23718        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
23719    }
23720}
23721
23722/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23723///
23724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
23725#[inline]
23726#[target_feature(enable = "avx512f")]
23727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23728#[cfg_attr(test, assert_instr(vpermq))]
23729pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
23730    unsafe {
23731        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23732        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23733    }
23734}
23735
23736/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23737///
23738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
23739#[inline]
23740#[target_feature(enable = "avx512f,avx512vl")]
23741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23742#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23743pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
23744    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
23745}
23746
23747/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23748///
23749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
23750#[inline]
23751#[target_feature(enable = "avx512f,avx512vl")]
23752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23753#[cfg_attr(test, assert_instr(vpermq))]
23754pub fn _mm256_mask_permutexvar_epi64(
23755    src: __m256i,
23756    k: __mmask8,
23757    idx: __m256i,
23758    a: __m256i,
23759) -> __m256i {
23760    unsafe {
23761        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23762        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
23763    }
23764}
23765
23766/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23767///
23768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
23769#[inline]
23770#[target_feature(enable = "avx512f,avx512vl")]
23771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23772#[cfg_attr(test, assert_instr(vpermq))]
23773pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23774    unsafe {
23775        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23776        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23777    }
23778}
23779
23780/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23781///
23782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
23783#[inline]
23784#[target_feature(enable = "avx512f")]
23785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23786#[cfg_attr(test, assert_instr(vpermps))]
23787pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23788    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
23789}
23790
23791/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23792///
23793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23794#[inline]
23795#[target_feature(enable = "avx512f")]
23796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23797#[cfg_attr(test, assert_instr(vpermps))]
23798pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23799    unsafe {
23800        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23801        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23802    }
23803}
23804
23805/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23806///
23807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23808#[inline]
23809#[target_feature(enable = "avx512f")]
23810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23811#[cfg_attr(test, assert_instr(vpermps))]
23812pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23813    unsafe {
23814        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23815        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23816    }
23817}
23818
23819/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23820///
23821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23822#[inline]
23823#[target_feature(enable = "avx512f,avx512vl")]
23824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23825#[cfg_attr(test, assert_instr(vpermps))]
23826pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23827    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23828}
23829
23830/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23831///
23832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23833#[inline]
23834#[target_feature(enable = "avx512f,avx512vl")]
23835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23836#[cfg_attr(test, assert_instr(vpermps))]
23837pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23838    unsafe {
23839        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23840        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23841    }
23842}
23843
23844/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23845///
23846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23847#[inline]
23848#[target_feature(enable = "avx512f,avx512vl")]
23849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23850#[cfg_attr(test, assert_instr(vpermps))]
23851pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23852    unsafe {
23853        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23854        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23855    }
23856}
23857
23858/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23859///
23860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23861#[inline]
23862#[target_feature(enable = "avx512f")]
23863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23864#[cfg_attr(test, assert_instr(vpermpd))]
23865pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23866    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23867}
23868
23869/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23870///
23871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23872#[inline]
23873#[target_feature(enable = "avx512f")]
23874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23875#[cfg_attr(test, assert_instr(vpermpd))]
23876pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23877    unsafe {
23878        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23879        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23880    }
23881}
23882
23883/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23884///
23885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23886#[inline]
23887#[target_feature(enable = "avx512f")]
23888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23889#[cfg_attr(test, assert_instr(vpermpd))]
23890pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23891    unsafe {
23892        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23893        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23894    }
23895}
23896
23897/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23898///
23899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23900#[inline]
23901#[target_feature(enable = "avx512f,avx512vl")]
23902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23903#[cfg_attr(test, assert_instr(vpermpd))]
23904pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23905    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23906}
23907
23908/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23909///
23910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23911#[inline]
23912#[target_feature(enable = "avx512f,avx512vl")]
23913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23914#[cfg_attr(test, assert_instr(vpermpd))]
23915pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23916    unsafe {
23917        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23918        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23919    }
23920}
23921
23922/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23923///
23924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23925#[inline]
23926#[target_feature(enable = "avx512f,avx512vl")]
23927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23928#[cfg_attr(test, assert_instr(vpermpd))]
23929pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23930    unsafe {
23931        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23932        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23933    }
23934}
23935
23936/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23937///
23938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23939#[inline]
23940#[target_feature(enable = "avx512f")]
23941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23942#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23943pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23944    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23945}
23946
23947/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23948///
23949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23950#[inline]
23951#[target_feature(enable = "avx512f")]
23952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23953#[cfg_attr(test, assert_instr(vpermt2d))]
23954pub fn _mm512_mask_permutex2var_epi32(
23955    a: __m512i,
23956    k: __mmask16,
23957    idx: __m512i,
23958    b: __m512i,
23959) -> __m512i {
23960    unsafe {
23961        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23962        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23963    }
23964}
23965
23966/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23967///
23968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23969#[inline]
23970#[target_feature(enable = "avx512f")]
23971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23972#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23973pub fn _mm512_maskz_permutex2var_epi32(
23974    k: __mmask16,
23975    a: __m512i,
23976    idx: __m512i,
23977    b: __m512i,
23978) -> __m512i {
23979    unsafe {
23980        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23981        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23982    }
23983}
23984
23985/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23986///
23987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23988#[inline]
23989#[target_feature(enable = "avx512f")]
23990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23991#[cfg_attr(test, assert_instr(vpermi2d))]
23992pub fn _mm512_mask2_permutex2var_epi32(
23993    a: __m512i,
23994    idx: __m512i,
23995    k: __mmask16,
23996    b: __m512i,
23997) -> __m512i {
23998    unsafe {
23999        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
24000        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
24001    }
24002}
24003
24004/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24005///
24006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
24007#[inline]
24008#[target_feature(enable = "avx512f,avx512vl")]
24009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24010#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24011pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24012    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
24013}
24014
24015/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24016///
24017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
24018#[inline]
24019#[target_feature(enable = "avx512f,avx512vl")]
24020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24021#[cfg_attr(test, assert_instr(vpermt2d))]
24022pub fn _mm256_mask_permutex2var_epi32(
24023    a: __m256i,
24024    k: __mmask8,
24025    idx: __m256i,
24026    b: __m256i,
24027) -> __m256i {
24028    unsafe {
24029        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24030        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
24031    }
24032}
24033
24034/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24035///
24036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
24037#[inline]
24038#[target_feature(enable = "avx512f,avx512vl")]
24039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24040#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24041pub fn _mm256_maskz_permutex2var_epi32(
24042    k: __mmask8,
24043    a: __m256i,
24044    idx: __m256i,
24045    b: __m256i,
24046) -> __m256i {
24047    unsafe {
24048        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24049        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
24050    }
24051}
24052
24053/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24054///
24055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
24056#[inline]
24057#[target_feature(enable = "avx512f,avx512vl")]
24058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24059#[cfg_attr(test, assert_instr(vpermi2d))]
24060pub fn _mm256_mask2_permutex2var_epi32(
24061    a: __m256i,
24062    idx: __m256i,
24063    k: __mmask8,
24064    b: __m256i,
24065) -> __m256i {
24066    unsafe {
24067        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24068        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
24069    }
24070}
24071
24072/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24073///
24074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
24075#[inline]
24076#[target_feature(enable = "avx512f,avx512vl")]
24077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24078#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24079pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24080    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
24081}
24082
24083/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24084///
24085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
24086#[inline]
24087#[target_feature(enable = "avx512f,avx512vl")]
24088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24089#[cfg_attr(test, assert_instr(vpermt2d))]
24090pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24091    unsafe {
24092        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24093        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
24094    }
24095}
24096
24097/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24098///
24099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
24100#[inline]
24101#[target_feature(enable = "avx512f,avx512vl")]
24102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24103#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24104pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24105    unsafe {
24106        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24107        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
24108    }
24109}
24110
24111/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24112///
24113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
24114#[inline]
24115#[target_feature(enable = "avx512f,avx512vl")]
24116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24117#[cfg_attr(test, assert_instr(vpermi2d))]
24118pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24119    unsafe {
24120        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24121        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
24122    }
24123}
24124
24125/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24126///
24127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
24128#[inline]
24129#[target_feature(enable = "avx512f")]
24130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24131#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24132pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
24133    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
24134}
24135
24136/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24137///
24138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
24139#[inline]
24140#[target_feature(enable = "avx512f")]
24141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24142#[cfg_attr(test, assert_instr(vpermt2q))]
24143pub fn _mm512_mask_permutex2var_epi64(
24144    a: __m512i,
24145    k: __mmask8,
24146    idx: __m512i,
24147    b: __m512i,
24148) -> __m512i {
24149    unsafe {
24150        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24151        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
24152    }
24153}
24154
24155/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24156///
24157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
24158#[inline]
24159#[target_feature(enable = "avx512f")]
24160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24161#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24162pub fn _mm512_maskz_permutex2var_epi64(
24163    k: __mmask8,
24164    a: __m512i,
24165    idx: __m512i,
24166    b: __m512i,
24167) -> __m512i {
24168    unsafe {
24169        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24170        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
24171    }
24172}
24173
24174/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24175///
24176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
24177#[inline]
24178#[target_feature(enable = "avx512f")]
24179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24180#[cfg_attr(test, assert_instr(vpermi2q))]
24181pub fn _mm512_mask2_permutex2var_epi64(
24182    a: __m512i,
24183    idx: __m512i,
24184    k: __mmask8,
24185    b: __m512i,
24186) -> __m512i {
24187    unsafe {
24188        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24189        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
24190    }
24191}
24192
24193/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24194///
24195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
24196#[inline]
24197#[target_feature(enable = "avx512f,avx512vl")]
24198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24199#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24200pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24201    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
24202}
24203
24204/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24205///
24206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
24207#[inline]
24208#[target_feature(enable = "avx512f,avx512vl")]
24209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24210#[cfg_attr(test, assert_instr(vpermt2q))]
24211pub fn _mm256_mask_permutex2var_epi64(
24212    a: __m256i,
24213    k: __mmask8,
24214    idx: __m256i,
24215    b: __m256i,
24216) -> __m256i {
24217    unsafe {
24218        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24219        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
24220    }
24221}
24222
24223/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24224///
24225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
24226#[inline]
24227#[target_feature(enable = "avx512f,avx512vl")]
24228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24229#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24230pub fn _mm256_maskz_permutex2var_epi64(
24231    k: __mmask8,
24232    a: __m256i,
24233    idx: __m256i,
24234    b: __m256i,
24235) -> __m256i {
24236    unsafe {
24237        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24238        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
24239    }
24240}
24241
24242/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24243///
24244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
24245#[inline]
24246#[target_feature(enable = "avx512f,avx512vl")]
24247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24248#[cfg_attr(test, assert_instr(vpermi2q))]
24249pub fn _mm256_mask2_permutex2var_epi64(
24250    a: __m256i,
24251    idx: __m256i,
24252    k: __mmask8,
24253    b: __m256i,
24254) -> __m256i {
24255    unsafe {
24256        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24257        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
24258    }
24259}
24260
24261/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24262///
24263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
24264#[inline]
24265#[target_feature(enable = "avx512f,avx512vl")]
24266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24267#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24268pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24269    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
24270}
24271
24272/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24273///
24274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
24275#[inline]
24276#[target_feature(enable = "avx512f,avx512vl")]
24277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24278#[cfg_attr(test, assert_instr(vpermt2q))]
24279pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24280    unsafe {
24281        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24282        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
24283    }
24284}
24285
24286/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24287///
24288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
24289#[inline]
24290#[target_feature(enable = "avx512f,avx512vl")]
24291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24292#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24293pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24294    unsafe {
24295        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24296        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
24297    }
24298}
24299
24300/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24301///
24302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
24303#[inline]
24304#[target_feature(enable = "avx512f,avx512vl")]
24305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24306#[cfg_attr(test, assert_instr(vpermi2q))]
24307pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24308    unsafe {
24309        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24310        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
24311    }
24312}
24313
24314/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24315///
24316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
24317#[inline]
24318#[target_feature(enable = "avx512f")]
24319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24320#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24321pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
24322    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
24323}
24324
24325/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24326///
24327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
24328#[inline]
24329#[target_feature(enable = "avx512f")]
24330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24331#[cfg_attr(test, assert_instr(vpermt2ps))]
24332pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
24333    unsafe {
24334        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24335        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
24336    }
24337}
24338
24339/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24340///
24341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
24342#[inline]
24343#[target_feature(enable = "avx512f")]
24344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24345#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24346pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
24347    unsafe {
24348        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24349        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
24350    }
24351}
24352
24353/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24354///
24355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
24356#[inline]
24357#[target_feature(enable = "avx512f")]
24358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24359#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24360pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
24361    unsafe {
24362        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24363        let idx = _mm512_castsi512_ps(idx).as_f32x16();
24364        transmute(simd_select_bitmask(k, permute, idx))
24365    }
24366}
24367
24368/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24369///
24370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
24371#[inline]
24372#[target_feature(enable = "avx512f,avx512vl")]
24373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24374#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24375pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
24376    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
24377}
24378
24379/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24380///
24381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
24382#[inline]
24383#[target_feature(enable = "avx512f,avx512vl")]
24384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24385#[cfg_attr(test, assert_instr(vpermt2ps))]
24386pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
24387    unsafe {
24388        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24389        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
24390    }
24391}
24392
24393/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24394///
24395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
24396#[inline]
24397#[target_feature(enable = "avx512f,avx512vl")]
24398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24399#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24400pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
24401    unsafe {
24402        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24403        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
24404    }
24405}
24406
24407/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24408///
24409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
24410#[inline]
24411#[target_feature(enable = "avx512f,avx512vl")]
24412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24413#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24414pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
24415    unsafe {
24416        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24417        let idx = _mm256_castsi256_ps(idx).as_f32x8();
24418        transmute(simd_select_bitmask(k, permute, idx))
24419    }
24420}
24421
24422/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24423///
24424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
24425#[inline]
24426#[target_feature(enable = "avx512f,avx512vl")]
24427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24428#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24429pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
24430    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
24431}
24432
24433/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24434///
24435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
24436#[inline]
24437#[target_feature(enable = "avx512f,avx512vl")]
24438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24439#[cfg_attr(test, assert_instr(vpermt2ps))]
24440pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
24441    unsafe {
24442        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24443        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
24444    }
24445}
24446
24447/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24448///
24449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
24450#[inline]
24451#[target_feature(enable = "avx512f,avx512vl")]
24452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24453#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24454pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
24455    unsafe {
24456        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24457        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
24458    }
24459}
24460
24461/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24462///
24463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
24464#[inline]
24465#[target_feature(enable = "avx512f,avx512vl")]
24466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24467#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24468pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
24469    unsafe {
24470        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24471        let idx = _mm_castsi128_ps(idx).as_f32x4();
24472        transmute(simd_select_bitmask(k, permute, idx))
24473    }
24474}
24475
24476/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24477///
24478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
24479#[inline]
24480#[target_feature(enable = "avx512f")]
24481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24482#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24483pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24484    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
24485}
24486
24487/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24488///
24489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
24490#[inline]
24491#[target_feature(enable = "avx512f")]
24492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24493#[cfg_attr(test, assert_instr(vpermt2pd))]
24494pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
24495    unsafe {
24496        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24497        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
24498    }
24499}
24500
24501/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24502///
24503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
24504#[inline]
24505#[target_feature(enable = "avx512f")]
24506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24507#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24508pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24509    unsafe {
24510        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24511        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
24512    }
24513}
24514
24515/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24516///
24517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
24518#[inline]
24519#[target_feature(enable = "avx512f")]
24520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24521#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24522pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
24523    unsafe {
24524        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24525        let idx = _mm512_castsi512_pd(idx).as_f64x8();
24526        transmute(simd_select_bitmask(k, permute, idx))
24527    }
24528}
24529
24530/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24531///
24532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
24533#[inline]
24534#[target_feature(enable = "avx512f,avx512vl")]
24535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24536#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24537pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24538    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
24539}
24540
24541/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24542///
24543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
24544#[inline]
24545#[target_feature(enable = "avx512f,avx512vl")]
24546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24547#[cfg_attr(test, assert_instr(vpermt2pd))]
24548pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
24549    unsafe {
24550        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24551        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
24552    }
24553}
24554
24555/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24556///
24557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
24558#[inline]
24559#[target_feature(enable = "avx512f,avx512vl")]
24560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24561#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24562pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24563    unsafe {
24564        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24565        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
24566    }
24567}
24568
24569/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24570///
24571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
24572#[inline]
24573#[target_feature(enable = "avx512f,avx512vl")]
24574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24575#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24576pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
24577    unsafe {
24578        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24579        let idx = _mm256_castsi256_pd(idx).as_f64x4();
24580        transmute(simd_select_bitmask(k, permute, idx))
24581    }
24582}
24583
24584/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24585///
24586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
24587#[inline]
24588#[target_feature(enable = "avx512f,avx512vl")]
24589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24590#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24591pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24592    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
24593}
24594
24595/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24596///
24597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
24598#[inline]
24599#[target_feature(enable = "avx512f,avx512vl")]
24600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24601#[cfg_attr(test, assert_instr(vpermt2pd))]
24602pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
24603    unsafe {
24604        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24605        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
24606    }
24607}
24608
24609/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24610///
24611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
24612#[inline]
24613#[target_feature(enable = "avx512f,avx512vl")]
24614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24615#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24616pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24617    unsafe {
24618        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24619        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
24620    }
24621}
24622
24623/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24624///
24625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
24626#[inline]
24627#[target_feature(enable = "avx512f,avx512vl")]
24628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24629#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24630pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
24631    unsafe {
24632        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24633        let idx = _mm_castsi128_pd(idx).as_f64x2();
24634        transmute(simd_select_bitmask(k, permute, idx))
24635    }
24636}
24637
24638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24639///
24640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
24641#[inline]
24642#[target_feature(enable = "avx512f")]
24643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24644#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
24645#[rustc_legacy_const_generics(1)]
24646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24647pub const fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
24648    unsafe {
24649        static_assert_uimm_bits!(MASK, 8);
24650        let r: i32x16 = simd_shuffle!(
24651            a.as_i32x16(),
24652            a.as_i32x16(),
24653            [
24654                MASK as u32 & 0b11,
24655                (MASK as u32 >> 2) & 0b11,
24656                (MASK as u32 >> 4) & 0b11,
24657                (MASK as u32 >> 6) & 0b11,
24658                (MASK as u32 & 0b11) + 4,
24659                ((MASK as u32 >> 2) & 0b11) + 4,
24660                ((MASK as u32 >> 4) & 0b11) + 4,
24661                ((MASK as u32 >> 6) & 0b11) + 4,
24662                (MASK as u32 & 0b11) + 8,
24663                ((MASK as u32 >> 2) & 0b11) + 8,
24664                ((MASK as u32 >> 4) & 0b11) + 8,
24665                ((MASK as u32 >> 6) & 0b11) + 8,
24666                (MASK as u32 & 0b11) + 12,
24667                ((MASK as u32 >> 2) & 0b11) + 12,
24668                ((MASK as u32 >> 4) & 0b11) + 12,
24669                ((MASK as u32 >> 6) & 0b11) + 12,
24670            ],
24671        );
24672        transmute(r)
24673    }
24674}
24675
24676/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24677///
24678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
24679#[inline]
24680#[target_feature(enable = "avx512f")]
24681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24682#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24683#[rustc_legacy_const_generics(3)]
24684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24685pub const fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24686    src: __m512i,
24687    k: __mmask16,
24688    a: __m512i,
24689) -> __m512i {
24690    unsafe {
24691        static_assert_uimm_bits!(MASK, 8);
24692        let r = _mm512_shuffle_epi32::<MASK>(a);
24693        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24694    }
24695}
24696
24697/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24698///
24699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
24700#[inline]
24701#[target_feature(enable = "avx512f")]
24702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24703#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24704#[rustc_legacy_const_generics(2)]
24705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24706pub const fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24707    k: __mmask16,
24708    a: __m512i,
24709) -> __m512i {
24710    unsafe {
24711        static_assert_uimm_bits!(MASK, 8);
24712        let r = _mm512_shuffle_epi32::<MASK>(a);
24713        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24714    }
24715}
24716
24717/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24718///
24719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
24720#[inline]
24721#[target_feature(enable = "avx512f,avx512vl")]
24722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24723#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24724#[rustc_legacy_const_generics(3)]
24725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24726pub const fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24727    src: __m256i,
24728    k: __mmask8,
24729    a: __m256i,
24730) -> __m256i {
24731    unsafe {
24732        static_assert_uimm_bits!(MASK, 8);
24733        let r = _mm256_shuffle_epi32::<MASK>(a);
24734        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24735    }
24736}
24737
24738/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24739///
24740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
24741#[inline]
24742#[target_feature(enable = "avx512f,avx512vl")]
24743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24744#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24745#[rustc_legacy_const_generics(2)]
24746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24747pub const fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24748    k: __mmask8,
24749    a: __m256i,
24750) -> __m256i {
24751    unsafe {
24752        static_assert_uimm_bits!(MASK, 8);
24753        let r = _mm256_shuffle_epi32::<MASK>(a);
24754        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24755    }
24756}
24757
24758/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24759///
24760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
24761#[inline]
24762#[target_feature(enable = "avx512f,avx512vl")]
24763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24764#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24765#[rustc_legacy_const_generics(3)]
24766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24767pub const fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24768    src: __m128i,
24769    k: __mmask8,
24770    a: __m128i,
24771) -> __m128i {
24772    unsafe {
24773        static_assert_uimm_bits!(MASK, 8);
24774        let r = _mm_shuffle_epi32::<MASK>(a);
24775        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
24776    }
24777}
24778
24779/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24780///
24781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
24782#[inline]
24783#[target_feature(enable = "avx512f,avx512vl")]
24784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24785#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24786#[rustc_legacy_const_generics(2)]
24787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24788pub const fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24789    k: __mmask8,
24790    a: __m128i,
24791) -> __m128i {
24792    unsafe {
24793        static_assert_uimm_bits!(MASK, 8);
24794        let r = _mm_shuffle_epi32::<MASK>(a);
24795        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
24796    }
24797}
24798
24799/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24800///
24801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
24802#[inline]
24803#[target_feature(enable = "avx512f")]
24804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24805#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24806#[rustc_legacy_const_generics(2)]
24807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24808pub const fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24809    unsafe {
24810        static_assert_uimm_bits!(MASK, 8);
24811        simd_shuffle!(
24812            a,
24813            b,
24814            [
24815                MASK as u32 & 0b11,
24816                (MASK as u32 >> 2) & 0b11,
24817                ((MASK as u32 >> 4) & 0b11) + 16,
24818                ((MASK as u32 >> 6) & 0b11) + 16,
24819                (MASK as u32 & 0b11) + 4,
24820                ((MASK as u32 >> 2) & 0b11) + 4,
24821                ((MASK as u32 >> 4) & 0b11) + 20,
24822                ((MASK as u32 >> 6) & 0b11) + 20,
24823                (MASK as u32 & 0b11) + 8,
24824                ((MASK as u32 >> 2) & 0b11) + 8,
24825                ((MASK as u32 >> 4) & 0b11) + 24,
24826                ((MASK as u32 >> 6) & 0b11) + 24,
24827                (MASK as u32 & 0b11) + 12,
24828                ((MASK as u32 >> 2) & 0b11) + 12,
24829                ((MASK as u32 >> 4) & 0b11) + 28,
24830                ((MASK as u32 >> 6) & 0b11) + 28,
24831            ],
24832        )
24833    }
24834}
24835
24836/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24837///
24838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24839#[inline]
24840#[target_feature(enable = "avx512f")]
24841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24842#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24843#[rustc_legacy_const_generics(4)]
24844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24845pub const fn _mm512_mask_shuffle_ps<const MASK: i32>(
24846    src: __m512,
24847    k: __mmask16,
24848    a: __m512,
24849    b: __m512,
24850) -> __m512 {
24851    unsafe {
24852        static_assert_uimm_bits!(MASK, 8);
24853        let r = _mm512_shuffle_ps::<MASK>(a, b);
24854        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24855    }
24856}
24857
24858/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24859///
24860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24861#[inline]
24862#[target_feature(enable = "avx512f")]
24863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24864#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24865#[rustc_legacy_const_generics(3)]
24866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24867pub const fn _mm512_maskz_shuffle_ps<const MASK: i32>(
24868    k: __mmask16,
24869    a: __m512,
24870    b: __m512,
24871) -> __m512 {
24872    unsafe {
24873        static_assert_uimm_bits!(MASK, 8);
24874        let r = _mm512_shuffle_ps::<MASK>(a, b);
24875        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24876    }
24877}
24878
24879/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24880///
24881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24882#[inline]
24883#[target_feature(enable = "avx512f,avx512vl")]
24884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24885#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24886#[rustc_legacy_const_generics(4)]
24887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24888pub const fn _mm256_mask_shuffle_ps<const MASK: i32>(
24889    src: __m256,
24890    k: __mmask8,
24891    a: __m256,
24892    b: __m256,
24893) -> __m256 {
24894    unsafe {
24895        static_assert_uimm_bits!(MASK, 8);
24896        let r = _mm256_shuffle_ps::<MASK>(a, b);
24897        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24898    }
24899}
24900
24901/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24902///
24903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24904#[inline]
24905#[target_feature(enable = "avx512f,avx512vl")]
24906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24907#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24908#[rustc_legacy_const_generics(3)]
24909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24910pub const fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24911    unsafe {
24912        static_assert_uimm_bits!(MASK, 8);
24913        let r = _mm256_shuffle_ps::<MASK>(a, b);
24914        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24915    }
24916}
24917
24918/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24919///
24920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24921#[inline]
24922#[target_feature(enable = "avx512f,avx512vl")]
24923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24924#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24925#[rustc_legacy_const_generics(4)]
24926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24927pub const fn _mm_mask_shuffle_ps<const MASK: i32>(
24928    src: __m128,
24929    k: __mmask8,
24930    a: __m128,
24931    b: __m128,
24932) -> __m128 {
24933    unsafe {
24934        static_assert_uimm_bits!(MASK, 8);
24935        let r = _mm_shuffle_ps::<MASK>(a, b);
24936        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24937    }
24938}
24939
24940/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24941///
24942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24943#[inline]
24944#[target_feature(enable = "avx512f,avx512vl")]
24945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24946#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24947#[rustc_legacy_const_generics(3)]
24948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24949pub const fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24950    unsafe {
24951        static_assert_uimm_bits!(MASK, 8);
24952        let r = _mm_shuffle_ps::<MASK>(a, b);
24953        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24954    }
24955}
24956
24957/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24958///
24959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24960#[inline]
24961#[target_feature(enable = "avx512f")]
24962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24963#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24964#[rustc_legacy_const_generics(2)]
24965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24966pub const fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24967    unsafe {
24968        static_assert_uimm_bits!(MASK, 8);
24969        simd_shuffle!(
24970            a,
24971            b,
24972            [
24973                MASK as u32 & 0b1,
24974                ((MASK as u32 >> 1) & 0b1) + 8,
24975                ((MASK as u32 >> 2) & 0b1) + 2,
24976                ((MASK as u32 >> 3) & 0b1) + 10,
24977                ((MASK as u32 >> 4) & 0b1) + 4,
24978                ((MASK as u32 >> 5) & 0b1) + 12,
24979                ((MASK as u32 >> 6) & 0b1) + 6,
24980                ((MASK as u32 >> 7) & 0b1) + 14,
24981            ],
24982        )
24983    }
24984}
24985
24986/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24987///
24988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24989#[inline]
24990#[target_feature(enable = "avx512f")]
24991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24992#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24993#[rustc_legacy_const_generics(4)]
24994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24995pub const fn _mm512_mask_shuffle_pd<const MASK: i32>(
24996    src: __m512d,
24997    k: __mmask8,
24998    a: __m512d,
24999    b: __m512d,
25000) -> __m512d {
25001    unsafe {
25002        static_assert_uimm_bits!(MASK, 8);
25003        let r = _mm512_shuffle_pd::<MASK>(a, b);
25004        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25005    }
25006}
25007
25008/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25009///
25010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
25011#[inline]
25012#[target_feature(enable = "avx512f")]
25013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25014#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25015#[rustc_legacy_const_generics(3)]
25016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25017pub const fn _mm512_maskz_shuffle_pd<const MASK: i32>(
25018    k: __mmask8,
25019    a: __m512d,
25020    b: __m512d,
25021) -> __m512d {
25022    unsafe {
25023        static_assert_uimm_bits!(MASK, 8);
25024        let r = _mm512_shuffle_pd::<MASK>(a, b);
25025        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25026    }
25027}
25028
25029/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25030///
25031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
25032#[inline]
25033#[target_feature(enable = "avx512f,avx512vl")]
25034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25035#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25036#[rustc_legacy_const_generics(4)]
25037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25038pub const fn _mm256_mask_shuffle_pd<const MASK: i32>(
25039    src: __m256d,
25040    k: __mmask8,
25041    a: __m256d,
25042    b: __m256d,
25043) -> __m256d {
25044    unsafe {
25045        static_assert_uimm_bits!(MASK, 8);
25046        let r = _mm256_shuffle_pd::<MASK>(a, b);
25047        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25048    }
25049}
25050
25051/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25052///
25053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
25054#[inline]
25055#[target_feature(enable = "avx512f,avx512vl")]
25056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25057#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25058#[rustc_legacy_const_generics(3)]
25059#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25060pub const fn _mm256_maskz_shuffle_pd<const MASK: i32>(
25061    k: __mmask8,
25062    a: __m256d,
25063    b: __m256d,
25064) -> __m256d {
25065    unsafe {
25066        static_assert_uimm_bits!(MASK, 8);
25067        let r = _mm256_shuffle_pd::<MASK>(a, b);
25068        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25069    }
25070}
25071
25072/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25073///
25074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
25075#[inline]
25076#[target_feature(enable = "avx512f,avx512vl")]
25077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25078#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
25079#[rustc_legacy_const_generics(4)]
25080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25081pub const fn _mm_mask_shuffle_pd<const MASK: i32>(
25082    src: __m128d,
25083    k: __mmask8,
25084    a: __m128d,
25085    b: __m128d,
25086) -> __m128d {
25087    unsafe {
25088        static_assert_uimm_bits!(MASK, 8);
25089        let r = _mm_shuffle_pd::<MASK>(a, b);
25090        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
25091    }
25092}
25093
25094/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25095///
25096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
25097#[inline]
25098#[target_feature(enable = "avx512f,avx512vl")]
25099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25100#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
25101#[rustc_legacy_const_generics(3)]
25102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25103pub const fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25104    unsafe {
25105        static_assert_uimm_bits!(MASK, 8);
25106        let r = _mm_shuffle_pd::<MASK>(a, b);
25107        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
25108    }
25109}
25110
25111/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25112///
25113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
25114#[inline]
25115#[target_feature(enable = "avx512f")]
25116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25117#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
25118#[rustc_legacy_const_generics(2)]
25119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25120pub const fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25121    unsafe {
25122        static_assert_uimm_bits!(MASK, 8);
25123        let a = a.as_i32x16();
25124        let b = b.as_i32x16();
25125        let r: i32x16 = simd_shuffle!(
25126            a,
25127            b,
25128            [
25129                (MASK as u32 & 0b11) * 4 + 0,
25130                (MASK as u32 & 0b11) * 4 + 1,
25131                (MASK as u32 & 0b11) * 4 + 2,
25132                (MASK as u32 & 0b11) * 4 + 3,
25133                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
25134                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
25135                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
25136                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
25137                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
25138                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
25139                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
25140                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
25141                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
25142                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
25143                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
25144                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
25145            ],
25146        );
25147        transmute(r)
25148    }
25149}
25150
25151/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25152///
25153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
25154#[inline]
25155#[target_feature(enable = "avx512f")]
25156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25157#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
25158#[rustc_legacy_const_generics(4)]
25159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25160pub const fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
25161    src: __m512i,
25162    k: __mmask16,
25163    a: __m512i,
25164    b: __m512i,
25165) -> __m512i {
25166    unsafe {
25167        static_assert_uimm_bits!(MASK, 8);
25168        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
25169        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25170    }
25171}
25172
25173/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25174///
25175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
25176#[inline]
25177#[target_feature(enable = "avx512f")]
25178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25179#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
25180#[rustc_legacy_const_generics(3)]
25181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25182pub const fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
25183    k: __mmask16,
25184    a: __m512i,
25185    b: __m512i,
25186) -> __m512i {
25187    unsafe {
25188        static_assert_uimm_bits!(MASK, 8);
25189        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
25190        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25191    }
25192}
25193
25194/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25195///
25196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
25197#[inline]
25198#[target_feature(enable = "avx512f,avx512vl")]
25199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25200#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
25201#[rustc_legacy_const_generics(2)]
25202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25203pub const fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25204    unsafe {
25205        static_assert_uimm_bits!(MASK, 8);
25206        let a = a.as_i32x8();
25207        let b = b.as_i32x8();
25208        let r: i32x8 = simd_shuffle!(
25209            a,
25210            b,
25211            [
25212                (MASK as u32 & 0b1) * 4 + 0,
25213                (MASK as u32 & 0b1) * 4 + 1,
25214                (MASK as u32 & 0b1) * 4 + 2,
25215                (MASK as u32 & 0b1) * 4 + 3,
25216                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
25217                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
25218                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
25219                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
25220            ],
25221        );
25222        transmute(r)
25223    }
25224}
25225
25226/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25227///
25228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
25229#[inline]
25230#[target_feature(enable = "avx512f,avx512vl")]
25231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25232#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
25233#[rustc_legacy_const_generics(4)]
25234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25235pub const fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
25236    src: __m256i,
25237    k: __mmask8,
25238    a: __m256i,
25239    b: __m256i,
25240) -> __m256i {
25241    unsafe {
25242        static_assert_uimm_bits!(MASK, 8);
25243        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
25244        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25245    }
25246}
25247
25248/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
25251#[inline]
25252#[target_feature(enable = "avx512f,avx512vl")]
25253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25254#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
25255#[rustc_legacy_const_generics(3)]
25256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25257pub const fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
25258    k: __mmask8,
25259    a: __m256i,
25260    b: __m256i,
25261) -> __m256i {
25262    unsafe {
25263        static_assert_uimm_bits!(MASK, 8);
25264        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
25265        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25266    }
25267}
25268
25269/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25270///
25271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
25272#[inline]
25273#[target_feature(enable = "avx512f")]
25274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25275#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25276#[rustc_legacy_const_generics(2)]
25277#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25278pub const fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25279    unsafe {
25280        static_assert_uimm_bits!(MASK, 8);
25281        let a = a.as_i64x8();
25282        let b = b.as_i64x8();
25283        let r: i64x8 = simd_shuffle!(
25284            a,
25285            b,
25286            [
25287                (MASK as u32 & 0b11) * 2 + 0,
25288                (MASK as u32 & 0b11) * 2 + 1,
25289                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
25290                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
25291                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
25292                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
25293                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
25294                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
25295            ],
25296        );
25297        transmute(r)
25298    }
25299}
25300
25301/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25302///
25303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
25304#[inline]
25305#[target_feature(enable = "avx512f")]
25306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25307#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25308#[rustc_legacy_const_generics(4)]
25309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25310pub const fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
25311    src: __m512i,
25312    k: __mmask8,
25313    a: __m512i,
25314    b: __m512i,
25315) -> __m512i {
25316    unsafe {
25317        static_assert_uimm_bits!(MASK, 8);
25318        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
25319        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25320    }
25321}
25322
25323/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25324///
25325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
25326#[inline]
25327#[target_feature(enable = "avx512f")]
25328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25329#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25330#[rustc_legacy_const_generics(3)]
25331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25332pub const fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
25333    k: __mmask8,
25334    a: __m512i,
25335    b: __m512i,
25336) -> __m512i {
25337    unsafe {
25338        static_assert_uimm_bits!(MASK, 8);
25339        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
25340        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25341    }
25342}
25343
25344/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25345///
25346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
25347#[inline]
25348#[target_feature(enable = "avx512f,avx512vl")]
25349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25350#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
25351#[rustc_legacy_const_generics(2)]
25352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25353pub const fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25354    unsafe {
25355        static_assert_uimm_bits!(MASK, 8);
25356        let a = a.as_i64x4();
25357        let b = b.as_i64x4();
25358        let r: i64x4 = simd_shuffle!(
25359            a,
25360            b,
25361            [
25362                (MASK as u32 & 0b1) * 2 + 0,
25363                (MASK as u32 & 0b1) * 2 + 1,
25364                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
25365                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
25366            ],
25367        );
25368        transmute(r)
25369    }
25370}
25371
25372/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25373///
25374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
25375#[inline]
25376#[target_feature(enable = "avx512f,avx512vl")]
25377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25378#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
25379#[rustc_legacy_const_generics(4)]
25380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25381pub const fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
25382    src: __m256i,
25383    k: __mmask8,
25384    a: __m256i,
25385    b: __m256i,
25386) -> __m256i {
25387    unsafe {
25388        static_assert_uimm_bits!(MASK, 8);
25389        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
25390        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
25391    }
25392}
25393
25394/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25395///
25396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
25397#[inline]
25398#[target_feature(enable = "avx512f,avx512vl")]
25399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25400#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
25401#[rustc_legacy_const_generics(3)]
25402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25403pub const fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
25404    k: __mmask8,
25405    a: __m256i,
25406    b: __m256i,
25407) -> __m256i {
25408    unsafe {
25409        static_assert_uimm_bits!(MASK, 8);
25410        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
25411        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
25412    }
25413}
25414
25415/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25416///
25417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
25418#[inline]
25419#[target_feature(enable = "avx512f")]
25420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25421#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
25422#[rustc_legacy_const_generics(2)]
25423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25424pub const fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
25425    unsafe {
25426        static_assert_uimm_bits!(MASK, 8);
25427        let a = a.as_f32x16();
25428        let b = b.as_f32x16();
25429        let r: f32x16 = simd_shuffle!(
25430            a,
25431            b,
25432            [
25433                (MASK as u32 & 0b11) * 4 + 0,
25434                (MASK as u32 & 0b11) * 4 + 1,
25435                (MASK as u32 & 0b11) * 4 + 2,
25436                (MASK as u32 & 0b11) * 4 + 3,
25437                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
25438                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
25439                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
25440                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
25441                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
25442                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
25443                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
25444                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
25445                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
25446                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
25447                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
25448                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
25449            ],
25450        );
25451        transmute(r)
25452    }
25453}
25454
25455/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25456///
25457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
25458#[inline]
25459#[target_feature(enable = "avx512f")]
25460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25461#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
25462#[rustc_legacy_const_generics(4)]
25463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25464pub const fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
25465    src: __m512,
25466    k: __mmask16,
25467    a: __m512,
25468    b: __m512,
25469) -> __m512 {
25470    unsafe {
25471        static_assert_uimm_bits!(MASK, 8);
25472        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
25473        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25474    }
25475}
25476
25477/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25478///
25479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
25480#[inline]
25481#[target_feature(enable = "avx512f")]
25482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25483#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
25484#[rustc_legacy_const_generics(3)]
25485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25486pub const fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
25487    k: __mmask16,
25488    a: __m512,
25489    b: __m512,
25490) -> __m512 {
25491    unsafe {
25492        static_assert_uimm_bits!(MASK, 8);
25493        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
25494        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25495    }
25496}
25497
25498/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25499///
25500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
25501#[inline]
25502#[target_feature(enable = "avx512f,avx512vl")]
25503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25504#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
25505#[rustc_legacy_const_generics(2)]
25506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25507pub const fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
25508    unsafe {
25509        static_assert_uimm_bits!(MASK, 8);
25510        let a = a.as_f32x8();
25511        let b = b.as_f32x8();
25512        let r: f32x8 = simd_shuffle!(
25513            a,
25514            b,
25515            [
25516                (MASK as u32 & 0b1) * 4 + 0,
25517                (MASK as u32 & 0b1) * 4 + 1,
25518                (MASK as u32 & 0b1) * 4 + 2,
25519                (MASK as u32 & 0b1) * 4 + 3,
25520                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
25521                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
25522                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
25523                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
25524            ],
25525        );
25526        transmute(r)
25527    }
25528}
25529
25530/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25531///
25532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
25533#[inline]
25534#[target_feature(enable = "avx512f,avx512vl")]
25535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25536#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
25537#[rustc_legacy_const_generics(4)]
25538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25539pub const fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
25540    src: __m256,
25541    k: __mmask8,
25542    a: __m256,
25543    b: __m256,
25544) -> __m256 {
25545    unsafe {
25546        static_assert_uimm_bits!(MASK, 8);
25547        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
25548        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25549    }
25550}
25551
25552/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25553///
25554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
25555#[inline]
25556#[target_feature(enable = "avx512f,avx512vl")]
25557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25558#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
25559#[rustc_legacy_const_generics(3)]
25560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25561pub const fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
25562    k: __mmask8,
25563    a: __m256,
25564    b: __m256,
25565) -> __m256 {
25566    unsafe {
25567        static_assert_uimm_bits!(MASK, 8);
25568        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
25569        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25570    }
25571}
25572
25573/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25574///
25575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
25576#[inline]
25577#[target_feature(enable = "avx512f")]
25578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25579#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25580#[rustc_legacy_const_generics(2)]
25581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25582pub const fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
25583    unsafe {
25584        static_assert_uimm_bits!(MASK, 8);
25585        let a = a.as_f64x8();
25586        let b = b.as_f64x8();
25587        let r: f64x8 = simd_shuffle!(
25588            a,
25589            b,
25590            [
25591                (MASK as u32 & 0b11) * 2 + 0,
25592                (MASK as u32 & 0b11) * 2 + 1,
25593                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
25594                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
25595                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
25596                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
25597                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
25598                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
25599            ],
25600        );
25601        transmute(r)
25602    }
25603}
25604
25605/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25606///
25607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
25608#[inline]
25609#[target_feature(enable = "avx512f")]
25610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25611#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25612#[rustc_legacy_const_generics(4)]
25613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25614pub const fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
25615    src: __m512d,
25616    k: __mmask8,
25617    a: __m512d,
25618    b: __m512d,
25619) -> __m512d {
25620    unsafe {
25621        static_assert_uimm_bits!(MASK, 8);
25622        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
25623        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25624    }
25625}
25626
25627/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25628///
25629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
25630#[inline]
25631#[target_feature(enable = "avx512f")]
25632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25633#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25634#[rustc_legacy_const_generics(3)]
25635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25636pub const fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
25637    k: __mmask8,
25638    a: __m512d,
25639    b: __m512d,
25640) -> __m512d {
25641    unsafe {
25642        static_assert_uimm_bits!(MASK, 8);
25643        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
25644        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25645    }
25646}
25647
25648/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25649///
25650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
25651#[inline]
25652#[target_feature(enable = "avx512f,avx512vl")]
25653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25654#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
25655#[rustc_legacy_const_generics(2)]
25656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25657pub const fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
25658    unsafe {
25659        static_assert_uimm_bits!(MASK, 8);
25660        let a = a.as_f64x4();
25661        let b = b.as_f64x4();
25662        let r: f64x4 = simd_shuffle!(
25663            a,
25664            b,
25665            [
25666                (MASK as u32 & 0b1) * 2 + 0,
25667                (MASK as u32 & 0b1) * 2 + 1,
25668                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
25669                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
25670            ],
25671        );
25672        transmute(r)
25673    }
25674}
25675
25676/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25677///
25678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
25679#[inline]
25680#[target_feature(enable = "avx512f,avx512vl")]
25681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25682#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
25683#[rustc_legacy_const_generics(4)]
25684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25685pub const fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
25686    src: __m256d,
25687    k: __mmask8,
25688    a: __m256d,
25689    b: __m256d,
25690) -> __m256d {
25691    unsafe {
25692        static_assert_uimm_bits!(MASK, 8);
25693        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
25694        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25695    }
25696}
25697
25698/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25699///
25700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
25701#[inline]
25702#[target_feature(enable = "avx512f,avx512vl")]
25703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25704#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
25705#[rustc_legacy_const_generics(3)]
25706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25707pub const fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
25708    k: __mmask8,
25709    a: __m256d,
25710    b: __m256d,
25711) -> __m256d {
25712    unsafe {
25713        static_assert_uimm_bits!(MASK, 8);
25714        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
25715        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25716    }
25717}
25718
25719/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25720///
25721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
25722#[inline]
25723#[target_feature(enable = "avx512f")]
25724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25725#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25726#[rustc_legacy_const_generics(1)]
25727#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25728pub const fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
25729    unsafe {
25730        static_assert_uimm_bits!(IMM8, 2);
25731        match IMM8 & 0x3 {
25732            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
25733            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
25734            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
25735            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
25736        }
25737    }
25738}
25739
25740/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25741///
25742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
25743#[inline]
25744#[target_feature(enable = "avx512f")]
25745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25746#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25747#[rustc_legacy_const_generics(3)]
25748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25749pub const fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
25750    src: __m128,
25751    k: __mmask8,
25752    a: __m512,
25753) -> __m128 {
25754    unsafe {
25755        static_assert_uimm_bits!(IMM8, 2);
25756        let r = _mm512_extractf32x4_ps::<IMM8>(a);
25757        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
25758    }
25759}
25760
25761/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25762///
25763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
25764#[inline]
25765#[target_feature(enable = "avx512f")]
25766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25767#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25768#[rustc_legacy_const_generics(2)]
25769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25770pub const fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
25771    unsafe {
25772        static_assert_uimm_bits!(IMM8, 2);
25773        let r = _mm512_extractf32x4_ps::<IMM8>(a);
25774        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
25775    }
25776}
25777
25778/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25779///
25780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
25781#[inline]
25782#[target_feature(enable = "avx512f,avx512vl")]
25783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25784#[cfg_attr(
25785    test,
25786    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
25787)]
25788#[rustc_legacy_const_generics(1)]
25789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25790pub const fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
25791    unsafe {
25792        static_assert_uimm_bits!(IMM8, 1);
25793        match IMM8 & 0x1 {
25794            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
25795            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
25796        }
25797    }
25798}
25799
25800/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25801///
25802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
25803#[inline]
25804#[target_feature(enable = "avx512f,avx512vl")]
25805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25806#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
25807#[rustc_legacy_const_generics(3)]
25808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25809pub const fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
25810    src: __m128,
25811    k: __mmask8,
25812    a: __m256,
25813) -> __m128 {
25814    unsafe {
25815        static_assert_uimm_bits!(IMM8, 1);
25816        let r = _mm256_extractf32x4_ps::<IMM8>(a);
25817        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
25818    }
25819}
25820
25821/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25822///
25823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
25824#[inline]
25825#[target_feature(enable = "avx512f,avx512vl")]
25826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25827#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
25828#[rustc_legacy_const_generics(2)]
25829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25830pub const fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
25831    unsafe {
25832        static_assert_uimm_bits!(IMM8, 1);
25833        let r = _mm256_extractf32x4_ps::<IMM8>(a);
25834        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
25835    }
25836}
25837
25838/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
25839///
25840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
25841#[inline]
25842#[target_feature(enable = "avx512f")]
25843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25844#[cfg_attr(
25845    test,
25846    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
25847)]
25848#[rustc_legacy_const_generics(1)]
25849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25850pub const fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
25851    unsafe {
25852        static_assert_uimm_bits!(IMM1, 1);
25853        match IMM1 {
25854            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
25855            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
25856        }
25857    }
25858}
25859
25860/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25861///
25862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
25863#[inline]
25864#[target_feature(enable = "avx512f")]
25865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25866#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25867#[rustc_legacy_const_generics(3)]
25868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25869pub const fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
25870    src: __m256i,
25871    k: __mmask8,
25872    a: __m512i,
25873) -> __m256i {
25874    unsafe {
25875        static_assert_uimm_bits!(IMM1, 1);
25876        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25877        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
25878    }
25879}
25880
25881/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25882///
25883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
25884#[inline]
25885#[target_feature(enable = "avx512f")]
25886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25887#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25888#[rustc_legacy_const_generics(2)]
25889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25890pub const fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
25891    unsafe {
25892        static_assert_uimm_bits!(IMM1, 1);
25893        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25894        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
25895    }
25896}
25897
25898/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25899///
25900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25901#[inline]
25902#[target_feature(enable = "avx512f")]
25903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25904#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25905#[rustc_legacy_const_generics(1)]
25906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25907pub const fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25908    unsafe {
25909        static_assert_uimm_bits!(IMM8, 1);
25910        match IMM8 & 0x1 {
25911            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
25912            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
25913        }
25914    }
25915}
25916
25917/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25918///
25919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25920#[inline]
25921#[target_feature(enable = "avx512f")]
25922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25923#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25924#[rustc_legacy_const_generics(3)]
25925#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25926pub const fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25927    src: __m256d,
25928    k: __mmask8,
25929    a: __m512d,
25930) -> __m256d {
25931    unsafe {
25932        static_assert_uimm_bits!(IMM8, 1);
25933        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25934        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25935    }
25936}
25937
25938/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25939///
25940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25941#[inline]
25942#[target_feature(enable = "avx512f")]
25943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25944#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25945#[rustc_legacy_const_generics(2)]
25946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25947pub const fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25948    unsafe {
25949        static_assert_uimm_bits!(IMM8, 1);
25950        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25951        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25952    }
25953}
25954
25955/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25956///
25957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25958#[inline]
25959#[target_feature(enable = "avx512f")]
25960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25961#[cfg_attr(
25962    test,
25963    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25964)]
25965#[rustc_legacy_const_generics(1)]
25966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25967pub const fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25968    unsafe {
25969        static_assert_uimm_bits!(IMM2, 2);
25970        let a = a.as_i32x16();
25971        let zero = i32x16::ZERO;
25972        let extract: i32x4 = match IMM2 {
25973            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25974            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25975            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25976            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25977        };
25978        transmute(extract)
25979    }
25980}
25981
25982/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25983///
25984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25985#[inline]
25986#[target_feature(enable = "avx512f")]
25987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25988#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25989#[rustc_legacy_const_generics(3)]
25990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25991pub const fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25992    src: __m128i,
25993    k: __mmask8,
25994    a: __m512i,
25995) -> __m128i {
25996    unsafe {
25997        static_assert_uimm_bits!(IMM2, 2);
25998        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25999        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
26000    }
26001}
26002
26003/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26004///
26005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
26006#[inline]
26007#[target_feature(enable = "avx512f")]
26008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26009#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
26010#[rustc_legacy_const_generics(2)]
26011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26012pub const fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
26013    unsafe {
26014        static_assert_uimm_bits!(IMM2, 2);
26015        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
26016        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
26017    }
26018}
26019
26020/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
26021///
26022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
26023#[inline]
26024#[target_feature(enable = "avx512f,avx512vl")]
26025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26026#[cfg_attr(
26027    test,
26028    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
26029)]
26030#[rustc_legacy_const_generics(1)]
26031#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26032pub const fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
26033    unsafe {
26034        static_assert_uimm_bits!(IMM1, 1);
26035        let a = a.as_i32x8();
26036        let zero = i32x8::ZERO;
26037        let extract: i32x4 = match IMM1 {
26038            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
26039            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
26040        };
26041        transmute(extract)
26042    }
26043}
26044
26045/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26046///
26047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
26048#[inline]
26049#[target_feature(enable = "avx512f,avx512vl")]
26050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26051#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
26052#[rustc_legacy_const_generics(3)]
26053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26054pub const fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
26055    src: __m128i,
26056    k: __mmask8,
26057    a: __m256i,
26058) -> __m128i {
26059    unsafe {
26060        static_assert_uimm_bits!(IMM1, 1);
26061        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
26062        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
26063    }
26064}
26065
26066/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26067///
26068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
26069#[inline]
26070#[target_feature(enable = "avx512f,avx512vl")]
26071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26072#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
26073#[rustc_legacy_const_generics(2)]
26074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26075pub const fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
26076    unsafe {
26077        static_assert_uimm_bits!(IMM1, 1);
26078        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
26079        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
26080    }
26081}
26082
26083/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26084///
26085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
26086#[inline]
26087#[target_feature(enable = "avx512f")]
26088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26089#[cfg_attr(test, assert_instr(vmovsldup))]
26090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26091pub const fn _mm512_moveldup_ps(a: __m512) -> __m512 {
26092    unsafe {
26093        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26094        transmute(r)
26095    }
26096}
26097
26098/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26104#[cfg_attr(test, assert_instr(vmovsldup))]
26105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26106pub const fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26107    unsafe {
26108        let mov: f32x16 =
26109            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26110        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
26111    }
26112}
26113
26114/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26115///
26116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
26117#[inline]
26118#[target_feature(enable = "avx512f")]
26119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26120#[cfg_attr(test, assert_instr(vmovsldup))]
26121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26122pub const fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
26123    unsafe {
26124        let mov: f32x16 =
26125            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26126        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
26127    }
26128}
26129
26130/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26131///
26132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
26133#[inline]
26134#[target_feature(enable = "avx512f,avx512vl")]
26135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26136#[cfg_attr(test, assert_instr(vmovsldup))]
26137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26138pub const fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26139    unsafe {
26140        let mov = _mm256_moveldup_ps(a);
26141        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
26142    }
26143}
26144
26145/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26146///
26147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
26148#[inline]
26149#[target_feature(enable = "avx512f,avx512vl")]
26150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26151#[cfg_attr(test, assert_instr(vmovsldup))]
26152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26153pub const fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
26154    unsafe {
26155        let mov = _mm256_moveldup_ps(a);
26156        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
26157    }
26158}
26159
26160/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26161///
26162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
26163#[inline]
26164#[target_feature(enable = "avx512f,avx512vl")]
26165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26166#[cfg_attr(test, assert_instr(vmovsldup))]
26167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26168pub const fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26169    unsafe {
26170        let mov = _mm_moveldup_ps(a);
26171        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
26172    }
26173}
26174
26175/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26176///
26177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
26178#[inline]
26179#[target_feature(enable = "avx512f,avx512vl")]
26180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26181#[cfg_attr(test, assert_instr(vmovsldup))]
26182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26183pub const fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
26184    unsafe {
26185        let mov = _mm_moveldup_ps(a);
26186        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
26187    }
26188}
26189
26190/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26191///
26192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
26193#[inline]
26194#[target_feature(enable = "avx512f")]
26195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26196#[cfg_attr(test, assert_instr(vmovshdup))]
26197#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26198pub const fn _mm512_movehdup_ps(a: __m512) -> __m512 {
26199    unsafe {
26200        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26201        transmute(r)
26202    }
26203}
26204
26205/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26206///
26207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
26208#[inline]
26209#[target_feature(enable = "avx512f")]
26210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26211#[cfg_attr(test, assert_instr(vmovshdup))]
26212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26213pub const fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26214    unsafe {
26215        let mov: f32x16 =
26216            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26217        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
26218    }
26219}
26220
26221/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26222///
26223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
26224#[inline]
26225#[target_feature(enable = "avx512f")]
26226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26227#[cfg_attr(test, assert_instr(vmovshdup))]
26228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26229pub const fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
26230    unsafe {
26231        let mov: f32x16 =
26232            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26233        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
26234    }
26235}
26236
26237/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26238///
26239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
26240#[inline]
26241#[target_feature(enable = "avx512f,avx512vl")]
26242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26243#[cfg_attr(test, assert_instr(vmovshdup))]
26244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26245pub const fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26246    unsafe {
26247        let mov = _mm256_movehdup_ps(a);
26248        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
26249    }
26250}
26251
26252/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26253///
26254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
26255#[inline]
26256#[target_feature(enable = "avx512f,avx512vl")]
26257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26258#[cfg_attr(test, assert_instr(vmovshdup))]
26259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26260pub const fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
26261    unsafe {
26262        let mov = _mm256_movehdup_ps(a);
26263        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
26264    }
26265}
26266
26267/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26268///
26269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
26270#[inline]
26271#[target_feature(enable = "avx512f,avx512vl")]
26272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26273#[cfg_attr(test, assert_instr(vmovshdup))]
26274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26275pub const fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26276    unsafe {
26277        let mov = _mm_movehdup_ps(a);
26278        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
26279    }
26280}
26281
26282/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26283///
26284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
26285#[inline]
26286#[target_feature(enable = "avx512f,avx512vl")]
26287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26288#[cfg_attr(test, assert_instr(vmovshdup))]
26289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26290pub const fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
26291    unsafe {
26292        let mov = _mm_movehdup_ps(a);
26293        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
26294    }
26295}
26296
26297/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
26298///
26299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
26300#[inline]
26301#[target_feature(enable = "avx512f")]
26302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26303#[cfg_attr(test, assert_instr(vmovddup))]
26304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26305pub const fn _mm512_movedup_pd(a: __m512d) -> __m512d {
26306    unsafe {
26307        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26308        transmute(r)
26309    }
26310}
26311
26312/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26313///
26314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
26315#[inline]
26316#[target_feature(enable = "avx512f")]
26317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26318#[cfg_attr(test, assert_instr(vmovddup))]
26319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26320pub const fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
26321    unsafe {
26322        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26323        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
26324    }
26325}
26326
26327/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26328///
26329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
26330#[inline]
26331#[target_feature(enable = "avx512f")]
26332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26333#[cfg_attr(test, assert_instr(vmovddup))]
26334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26335pub const fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
26336    unsafe {
26337        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26338        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
26339    }
26340}
26341
26342/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26343///
26344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
26345#[inline]
26346#[target_feature(enable = "avx512f,avx512vl")]
26347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26348#[cfg_attr(test, assert_instr(vmovddup))]
26349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26350pub const fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
26351    unsafe {
26352        let mov = _mm256_movedup_pd(a);
26353        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
26354    }
26355}
26356
26357/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26358///
26359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
26360#[inline]
26361#[target_feature(enable = "avx512f,avx512vl")]
26362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26363#[cfg_attr(test, assert_instr(vmovddup))]
26364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26365pub const fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
26366    unsafe {
26367        let mov = _mm256_movedup_pd(a);
26368        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
26369    }
26370}
26371
26372/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26373///
26374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
26375#[inline]
26376#[target_feature(enable = "avx512f,avx512vl")]
26377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26378#[cfg_attr(test, assert_instr(vmovddup))]
26379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26380pub const fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
26381    unsafe {
26382        let mov = _mm_movedup_pd(a);
26383        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
26384    }
26385}
26386
26387/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26388///
26389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
26390#[inline]
26391#[target_feature(enable = "avx512f,avx512vl")]
26392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26393#[cfg_attr(test, assert_instr(vmovddup))]
26394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26395pub const fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
26396    unsafe {
26397        let mov = _mm_movedup_pd(a);
26398        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
26399    }
26400}
26401
26402/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26403///
26404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
26405#[inline]
26406#[target_feature(enable = "avx512f")]
26407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26408#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
26409#[rustc_legacy_const_generics(2)]
26410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26411pub const fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
26412    unsafe {
26413        static_assert_uimm_bits!(IMM8, 2);
26414        let a = a.as_i32x16();
26415        let b = _mm512_castsi128_si512(b).as_i32x16();
26416        let ret: i32x16 = match IMM8 & 0b11 {
26417            0 => {
26418                simd_shuffle!(
26419                    a,
26420                    b,
26421                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
26422                )
26423            }
26424            1 => {
26425                simd_shuffle!(
26426                    a,
26427                    b,
26428                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
26429                )
26430            }
26431            2 => {
26432                simd_shuffle!(
26433                    a,
26434                    b,
26435                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
26436                )
26437            }
26438            _ => {
26439                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
26440            }
26441        };
26442        transmute(ret)
26443    }
26444}
26445
26446/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26447///
26448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
26449#[inline]
26450#[target_feature(enable = "avx512f")]
26451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26452#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
26453#[rustc_legacy_const_generics(4)]
26454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26455pub const fn _mm512_mask_inserti32x4<const IMM8: i32>(
26456    src: __m512i,
26457    k: __mmask16,
26458    a: __m512i,
26459    b: __m128i,
26460) -> __m512i {
26461    unsafe {
26462        static_assert_uimm_bits!(IMM8, 2);
26463        let r = _mm512_inserti32x4::<IMM8>(a, b);
26464        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
26465    }
26466}
26467
26468/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26469///
26470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
26471#[inline]
26472#[target_feature(enable = "avx512f")]
26473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26474#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
26475#[rustc_legacy_const_generics(3)]
26476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26477pub const fn _mm512_maskz_inserti32x4<const IMM8: i32>(
26478    k: __mmask16,
26479    a: __m512i,
26480    b: __m128i,
26481) -> __m512i {
26482    unsafe {
26483        static_assert_uimm_bits!(IMM8, 2);
26484        let r = _mm512_inserti32x4::<IMM8>(a, b);
26485        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
26486    }
26487}
26488
26489/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26490///
26491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
26492#[inline]
26493#[target_feature(enable = "avx512f,avx512vl")]
26494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26495#[cfg_attr(
26496    test,
26497    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
26498)]
26499#[rustc_legacy_const_generics(2)]
26500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26501pub const fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
26502    unsafe {
26503        static_assert_uimm_bits!(IMM8, 1);
26504        let a = a.as_i32x8();
26505        let b = _mm256_castsi128_si256(b).as_i32x8();
26506        let ret: i32x8 = match IMM8 & 0b1 {
26507            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26508            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26509        };
26510        transmute(ret)
26511    }
26512}
26513
26514/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26515///
26516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
26517#[inline]
26518#[target_feature(enable = "avx512f,avx512vl")]
26519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26520#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
26521#[rustc_legacy_const_generics(4)]
26522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26523pub const fn _mm256_mask_inserti32x4<const IMM8: i32>(
26524    src: __m256i,
26525    k: __mmask8,
26526    a: __m256i,
26527    b: __m128i,
26528) -> __m256i {
26529    unsafe {
26530        static_assert_uimm_bits!(IMM8, 1);
26531        let r = _mm256_inserti32x4::<IMM8>(a, b);
26532        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
26533    }
26534}
26535
26536/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26537///
26538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
26539#[inline]
26540#[target_feature(enable = "avx512f,avx512vl")]
26541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26542#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
26543#[rustc_legacy_const_generics(3)]
26544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26545pub const fn _mm256_maskz_inserti32x4<const IMM8: i32>(
26546    k: __mmask8,
26547    a: __m256i,
26548    b: __m128i,
26549) -> __m256i {
26550    unsafe {
26551        static_assert_uimm_bits!(IMM8, 1);
26552        let r = _mm256_inserti32x4::<IMM8>(a, b);
26553        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
26554    }
26555}
26556
26557/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
26558///
26559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
26560#[inline]
26561#[target_feature(enable = "avx512f")]
26562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26563#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
26564#[rustc_legacy_const_generics(2)]
26565#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26566pub const fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
26567    unsafe {
26568        static_assert_uimm_bits!(IMM8, 1);
26569        let b = _mm512_castsi256_si512(b);
26570        match IMM8 & 0b1 {
26571            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26572            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26573        }
26574    }
26575}
26576
26577/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26578///
26579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
26580#[inline]
26581#[target_feature(enable = "avx512f")]
26582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26583#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
26584#[rustc_legacy_const_generics(4)]
26585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26586pub const fn _mm512_mask_inserti64x4<const IMM8: i32>(
26587    src: __m512i,
26588    k: __mmask8,
26589    a: __m512i,
26590    b: __m256i,
26591) -> __m512i {
26592    unsafe {
26593        static_assert_uimm_bits!(IMM8, 1);
26594        let r = _mm512_inserti64x4::<IMM8>(a, b);
26595        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
26596    }
26597}
26598
26599/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26600///
26601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
26602#[inline]
26603#[target_feature(enable = "avx512f")]
26604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26605#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
26606#[rustc_legacy_const_generics(3)]
26607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26608pub const fn _mm512_maskz_inserti64x4<const IMM8: i32>(
26609    k: __mmask8,
26610    a: __m512i,
26611    b: __m256i,
26612) -> __m512i {
26613    unsafe {
26614        static_assert_uimm_bits!(IMM8, 1);
26615        let r = _mm512_inserti64x4::<IMM8>(a, b);
26616        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
26617    }
26618}
26619
26620/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26621///
26622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
26623#[inline]
26624#[target_feature(enable = "avx512f")]
26625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26626#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26627#[rustc_legacy_const_generics(2)]
26628#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26629pub const fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
26630    unsafe {
26631        static_assert_uimm_bits!(IMM8, 2);
26632        let b = _mm512_castps128_ps512(b);
26633        match IMM8 & 0b11 {
26634            0 => {
26635                simd_shuffle!(
26636                    a,
26637                    b,
26638                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
26639                )
26640            }
26641            1 => {
26642                simd_shuffle!(
26643                    a,
26644                    b,
26645                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
26646                )
26647            }
26648            2 => {
26649                simd_shuffle!(
26650                    a,
26651                    b,
26652                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
26653                )
26654            }
26655            _ => {
26656                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
26657            }
26658        }
26659    }
26660}
26661
26662/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26663///
26664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
26665#[inline]
26666#[target_feature(enable = "avx512f")]
26667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26668#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26669#[rustc_legacy_const_generics(4)]
26670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26671pub const fn _mm512_mask_insertf32x4<const IMM8: i32>(
26672    src: __m512,
26673    k: __mmask16,
26674    a: __m512,
26675    b: __m128,
26676) -> __m512 {
26677    unsafe {
26678        static_assert_uimm_bits!(IMM8, 2);
26679        let r = _mm512_insertf32x4::<IMM8>(a, b);
26680        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
26681    }
26682}
26683
26684/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26685///
26686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
26687#[inline]
26688#[target_feature(enable = "avx512f")]
26689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26690#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26691#[rustc_legacy_const_generics(3)]
26692#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26693pub const fn _mm512_maskz_insertf32x4<const IMM8: i32>(
26694    k: __mmask16,
26695    a: __m512,
26696    b: __m128,
26697) -> __m512 {
26698    unsafe {
26699        static_assert_uimm_bits!(IMM8, 2);
26700        let r = _mm512_insertf32x4::<IMM8>(a, b);
26701        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
26702    }
26703}
26704
26705/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26706///
26707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
26708#[inline]
26709#[target_feature(enable = "avx512f,avx512vl")]
26710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26711#[cfg_attr(
26712    test,
26713    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
26714)]
26715#[rustc_legacy_const_generics(2)]
26716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26717pub const fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
26718    unsafe {
26719        static_assert_uimm_bits!(IMM8, 1);
26720        let b = _mm256_castps128_ps256(b);
26721        match IMM8 & 0b1 {
26722            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26723            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26724        }
26725    }
26726}
26727
26728/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26729///
26730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
26731#[inline]
26732#[target_feature(enable = "avx512f,avx512vl")]
26733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26734#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
26735#[rustc_legacy_const_generics(4)]
26736#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26737pub const fn _mm256_mask_insertf32x4<const IMM8: i32>(
26738    src: __m256,
26739    k: __mmask8,
26740    a: __m256,
26741    b: __m128,
26742) -> __m256 {
26743    unsafe {
26744        static_assert_uimm_bits!(IMM8, 1);
26745        let r = _mm256_insertf32x4::<IMM8>(a, b);
26746        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
26747    }
26748}
26749
26750/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26751///
26752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
26753#[inline]
26754#[target_feature(enable = "avx512f,avx512vl")]
26755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26756#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
26757#[rustc_legacy_const_generics(3)]
26758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26759pub const fn _mm256_maskz_insertf32x4<const IMM8: i32>(
26760    k: __mmask8,
26761    a: __m256,
26762    b: __m128,
26763) -> __m256 {
26764    unsafe {
26765        static_assert_uimm_bits!(IMM8, 1);
26766        let r = _mm256_insertf32x4::<IMM8>(a, b);
26767        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
26768    }
26769}
26770
26771/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26777#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26778#[rustc_legacy_const_generics(2)]
26779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26780pub const fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
26781    unsafe {
26782        static_assert_uimm_bits!(IMM8, 1);
26783        let b = _mm512_castpd256_pd512(b);
26784        match IMM8 & 0b1 {
26785            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26786            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26787        }
26788    }
26789}
26790
26791/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26797#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26798#[rustc_legacy_const_generics(4)]
26799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26800pub const fn _mm512_mask_insertf64x4<const IMM8: i32>(
26801    src: __m512d,
26802    k: __mmask8,
26803    a: __m512d,
26804    b: __m256d,
26805) -> __m512d {
26806    unsafe {
26807        static_assert_uimm_bits!(IMM8, 1);
26808        let r = _mm512_insertf64x4::<IMM8>(a, b);
26809        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
26810    }
26811}
26812
26813/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26814///
26815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
26816#[inline]
26817#[target_feature(enable = "avx512f")]
26818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26819#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26820#[rustc_legacy_const_generics(3)]
26821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26822pub const fn _mm512_maskz_insertf64x4<const IMM8: i32>(
26823    k: __mmask8,
26824    a: __m512d,
26825    b: __m256d,
26826) -> __m512d {
26827    unsafe {
26828        static_assert_uimm_bits!(IMM8, 1);
26829        let r = _mm512_insertf64x4::<IMM8>(a, b);
26830        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
26831    }
26832}
26833
26834/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26835///
26836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
26837#[inline]
26838#[target_feature(enable = "avx512f")]
26839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26840#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
26841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26842pub const fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
26843    unsafe {
26844        let a = a.as_i32x16();
26845        let b = b.as_i32x16();
26846        #[rustfmt::skip]
26847        let r: i32x16 = simd_shuffle!(
26848            a, b,
26849            [ 2, 18, 3, 19,
26850              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26851              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26852              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26853        );
26854        transmute(r)
26855    }
26856}
26857
26858/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26859///
26860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
26861#[inline]
26862#[target_feature(enable = "avx512f")]
26863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26864#[cfg_attr(test, assert_instr(vpunpckhdq))]
26865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26866pub const fn _mm512_mask_unpackhi_epi32(
26867    src: __m512i,
26868    k: __mmask16,
26869    a: __m512i,
26870    b: __m512i,
26871) -> __m512i {
26872    unsafe {
26873        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
26874        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
26875    }
26876}
26877
26878/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26879///
26880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
26881#[inline]
26882#[target_feature(enable = "avx512f")]
26883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26884#[cfg_attr(test, assert_instr(vpunpckhdq))]
26885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26886pub const fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26887    unsafe {
26888        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
26889        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
26890    }
26891}
26892
26893/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26894///
26895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
26896#[inline]
26897#[target_feature(enable = "avx512f,avx512vl")]
26898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26899#[cfg_attr(test, assert_instr(vpunpckhdq))]
26900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26901pub const fn _mm256_mask_unpackhi_epi32(
26902    src: __m256i,
26903    k: __mmask8,
26904    a: __m256i,
26905    b: __m256i,
26906) -> __m256i {
26907    unsafe {
26908        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
26909        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
26910    }
26911}
26912
26913/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26914///
26915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
26916#[inline]
26917#[target_feature(enable = "avx512f,avx512vl")]
26918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26919#[cfg_attr(test, assert_instr(vpunpckhdq))]
26920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26921pub const fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26922    unsafe {
26923        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
26924        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
26925    }
26926}
26927
26928/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26929///
26930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
26931#[inline]
26932#[target_feature(enable = "avx512f,avx512vl")]
26933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26934#[cfg_attr(test, assert_instr(vpunpckhdq))]
26935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26936pub const fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26937    unsafe {
26938        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
26939        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
26940    }
26941}
26942
26943/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26944///
26945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
26946#[inline]
26947#[target_feature(enable = "avx512f,avx512vl")]
26948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26949#[cfg_attr(test, assert_instr(vpunpckhdq))]
26950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26951pub const fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26952    unsafe {
26953        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
26954        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
26955    }
26956}
26957
26958/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26959///
26960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
26961#[inline]
26962#[target_feature(enable = "avx512f")]
26963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26964#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
26965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26966pub const fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
26967    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26968}
26969
26970/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26971///
26972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
26973#[inline]
26974#[target_feature(enable = "avx512f")]
26975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26976#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26978pub const fn _mm512_mask_unpackhi_epi64(
26979    src: __m512i,
26980    k: __mmask8,
26981    a: __m512i,
26982    b: __m512i,
26983) -> __m512i {
26984    unsafe {
26985        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26986        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
26987    }
26988}
26989
26990/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26991///
26992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26993#[inline]
26994#[target_feature(enable = "avx512f")]
26995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26996#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26997#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26998pub const fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26999    unsafe {
27000        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
27001        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
27002    }
27003}
27004
27005/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27006///
27007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
27008#[inline]
27009#[target_feature(enable = "avx512f,avx512vl")]
27010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27011#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27013pub const fn _mm256_mask_unpackhi_epi64(
27014    src: __m256i,
27015    k: __mmask8,
27016    a: __m256i,
27017    b: __m256i,
27018) -> __m256i {
27019    unsafe {
27020        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
27021        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
27022    }
27023}
27024
27025/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27026///
27027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
27028#[inline]
27029#[target_feature(enable = "avx512f,avx512vl")]
27030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27031#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27033pub const fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27034    unsafe {
27035        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
27036        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
27037    }
27038}
27039
27040/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27041///
27042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
27043#[inline]
27044#[target_feature(enable = "avx512f,avx512vl")]
27045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27046#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27047#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27048pub const fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27049    unsafe {
27050        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
27051        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
27052    }
27053}
27054
27055/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27056///
27057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
27058#[inline]
27059#[target_feature(enable = "avx512f,avx512vl")]
27060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27061#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27063pub const fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27064    unsafe {
27065        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
27066        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
27067    }
27068}
27069
27070/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27071///
27072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
27073#[inline]
27074#[target_feature(enable = "avx512f")]
27075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27076#[cfg_attr(test, assert_instr(vunpckhps))]
27077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27078pub const fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
27079    unsafe {
27080        #[rustfmt::skip]
27081        simd_shuffle!(
27082            a, b,
27083            [ 2, 18, 3, 19,
27084              2 + 4, 18 + 4, 3 + 4, 19 + 4,
27085              2 + 8, 18 + 8, 3 + 8, 19 + 8,
27086              2 + 12, 18 + 12, 3 + 12, 19 + 12],
27087        )
27088    }
27089}
27090
27091/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27092///
27093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
27094#[inline]
27095#[target_feature(enable = "avx512f")]
27096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27097#[cfg_attr(test, assert_instr(vunpckhps))]
27098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27099pub const fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27100    unsafe {
27101        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
27102        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
27103    }
27104}
27105
27106/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27107///
27108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
27109#[inline]
27110#[target_feature(enable = "avx512f")]
27111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27112#[cfg_attr(test, assert_instr(vunpckhps))]
27113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27114pub const fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27115    unsafe {
27116        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
27117        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
27118    }
27119}
27120
27121/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27122///
27123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
27124#[inline]
27125#[target_feature(enable = "avx512f,avx512vl")]
27126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27127#[cfg_attr(test, assert_instr(vunpckhps))]
27128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27129pub const fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27130    unsafe {
27131        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
27132        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
27133    }
27134}
27135
27136/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27137///
27138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
27139#[inline]
27140#[target_feature(enable = "avx512f,avx512vl")]
27141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27142#[cfg_attr(test, assert_instr(vunpckhps))]
27143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27144pub const fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27145    unsafe {
27146        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
27147        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
27148    }
27149}
27150
27151/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27152///
27153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
27154#[inline]
27155#[target_feature(enable = "avx512f,avx512vl")]
27156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27157#[cfg_attr(test, assert_instr(vunpckhps))]
27158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27159pub const fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27160    unsafe {
27161        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
27162        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
27163    }
27164}
27165
27166/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
27169#[inline]
27170#[target_feature(enable = "avx512f,avx512vl")]
27171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27172#[cfg_attr(test, assert_instr(vunpckhps))]
27173#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27174pub const fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27175    unsafe {
27176        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
27177        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
27178    }
27179}
27180
27181/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27182///
27183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
27184#[inline]
27185#[target_feature(enable = "avx512f")]
27186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27187#[cfg_attr(test, assert_instr(vunpckhpd))]
27188#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27189pub const fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
27190    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
27191}
27192
27193/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27194///
27195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
27196#[inline]
27197#[target_feature(enable = "avx512f")]
27198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27199#[cfg_attr(test, assert_instr(vunpckhpd))]
27200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27201pub const fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27202    unsafe {
27203        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
27204        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
27205    }
27206}
27207
27208/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27209///
27210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
27211#[inline]
27212#[target_feature(enable = "avx512f")]
27213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27214#[cfg_attr(test, assert_instr(vunpckhpd))]
27215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27216pub const fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27217    unsafe {
27218        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
27219        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
27220    }
27221}
27222
27223/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27224///
27225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
27226#[inline]
27227#[target_feature(enable = "avx512f,avx512vl")]
27228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27229#[cfg_attr(test, assert_instr(vunpckhpd))]
27230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27231pub const fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27232    unsafe {
27233        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
27234        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
27235    }
27236}
27237
27238/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27239///
27240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
27241#[inline]
27242#[target_feature(enable = "avx512f,avx512vl")]
27243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27244#[cfg_attr(test, assert_instr(vunpckhpd))]
27245#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27246pub const fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27247    unsafe {
27248        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
27249        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
27250    }
27251}
27252
27253/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27254///
27255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
27256#[inline]
27257#[target_feature(enable = "avx512f,avx512vl")]
27258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27259#[cfg_attr(test, assert_instr(vunpckhpd))]
27260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27261pub const fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27262    unsafe {
27263        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
27264        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
27265    }
27266}
27267
27268/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27269///
27270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
27271#[inline]
27272#[target_feature(enable = "avx512f,avx512vl")]
27273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27274#[cfg_attr(test, assert_instr(vunpckhpd))]
27275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27276pub const fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27277    unsafe {
27278        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
27279        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
27280    }
27281}
27282
27283/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27284///
27285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
27286#[inline]
27287#[target_feature(enable = "avx512f")]
27288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27289#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
27290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27291pub const fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
27292    unsafe {
27293        let a = a.as_i32x16();
27294        let b = b.as_i32x16();
27295        #[rustfmt::skip]
27296        let r: i32x16 = simd_shuffle!(
27297            a, b,
27298            [ 0, 16, 1, 17,
27299              0 + 4, 16 + 4, 1 + 4, 17 + 4,
27300              0 + 8, 16 + 8, 1 + 8, 17 + 8,
27301              0 + 12, 16 + 12, 1 + 12, 17 + 12],
27302        );
27303        transmute(r)
27304    }
27305}
27306
27307/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27308///
27309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
27310#[inline]
27311#[target_feature(enable = "avx512f")]
27312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27313#[cfg_attr(test, assert_instr(vpunpckldq))]
27314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27315pub const fn _mm512_mask_unpacklo_epi32(
27316    src: __m512i,
27317    k: __mmask16,
27318    a: __m512i,
27319    b: __m512i,
27320) -> __m512i {
27321    unsafe {
27322        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
27323        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
27324    }
27325}
27326
27327/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27328///
27329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
27330#[inline]
27331#[target_feature(enable = "avx512f")]
27332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27333#[cfg_attr(test, assert_instr(vpunpckldq))]
27334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27335pub const fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27336    unsafe {
27337        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
27338        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
27339    }
27340}
27341
27342/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343///
27344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
27345#[inline]
27346#[target_feature(enable = "avx512f,avx512vl")]
27347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27348#[cfg_attr(test, assert_instr(vpunpckldq))]
27349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27350pub const fn _mm256_mask_unpacklo_epi32(
27351    src: __m256i,
27352    k: __mmask8,
27353    a: __m256i,
27354    b: __m256i,
27355) -> __m256i {
27356    unsafe {
27357        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
27358        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
27359    }
27360}
27361
27362/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27363///
27364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
27365#[inline]
27366#[target_feature(enable = "avx512f,avx512vl")]
27367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27368#[cfg_attr(test, assert_instr(vpunpckldq))]
27369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27370pub const fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27371    unsafe {
27372        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
27373        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
27374    }
27375}
27376
27377/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27378///
27379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
27380#[inline]
27381#[target_feature(enable = "avx512f,avx512vl")]
27382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27383#[cfg_attr(test, assert_instr(vpunpckldq))]
27384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27385pub const fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27386    unsafe {
27387        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
27388        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
27389    }
27390}
27391
27392/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27393///
27394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
27395#[inline]
27396#[target_feature(enable = "avx512f,avx512vl")]
27397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27398#[cfg_attr(test, assert_instr(vpunpckldq))]
27399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27400pub const fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27401    unsafe {
27402        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
27403        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
27404    }
27405}
27406
27407/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27408///
27409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
27410#[inline]
27411#[target_feature(enable = "avx512f")]
27412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27413#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
27414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27415pub const fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
27416    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
27417}
27418
27419/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27420///
27421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
27422#[inline]
27423#[target_feature(enable = "avx512f")]
27424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27425#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27427pub const fn _mm512_mask_unpacklo_epi64(
27428    src: __m512i,
27429    k: __mmask8,
27430    a: __m512i,
27431    b: __m512i,
27432) -> __m512i {
27433    unsafe {
27434        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
27435        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
27436    }
27437}
27438
27439/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27440///
27441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
27442#[inline]
27443#[target_feature(enable = "avx512f")]
27444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27445#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27447pub const fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27448    unsafe {
27449        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
27450        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
27451    }
27452}
27453
27454/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455///
27456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
27457#[inline]
27458#[target_feature(enable = "avx512f,avx512vl")]
27459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27460#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27462pub const fn _mm256_mask_unpacklo_epi64(
27463    src: __m256i,
27464    k: __mmask8,
27465    a: __m256i,
27466    b: __m256i,
27467) -> __m256i {
27468    unsafe {
27469        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
27470        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
27471    }
27472}
27473
27474/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27475///
27476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
27477#[inline]
27478#[target_feature(enable = "avx512f,avx512vl")]
27479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27480#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27482pub const fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27483    unsafe {
27484        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
27485        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
27486    }
27487}
27488
27489/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27490///
27491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
27492#[inline]
27493#[target_feature(enable = "avx512f,avx512vl")]
27494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27495#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27497pub const fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27498    unsafe {
27499        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
27500        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
27501    }
27502}
27503
27504/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27505///
27506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
27507#[inline]
27508#[target_feature(enable = "avx512f,avx512vl")]
27509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27510#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27512pub const fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27513    unsafe {
27514        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
27515        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
27516    }
27517}
27518
27519/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27520///
27521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
27522#[inline]
27523#[target_feature(enable = "avx512f")]
27524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27525#[cfg_attr(test, assert_instr(vunpcklps))]
27526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27527pub const fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
27528    unsafe {
27529        #[rustfmt::skip]
27530        simd_shuffle!(a, b,
27531                       [ 0, 16, 1, 17,
27532                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
27533                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
27534                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
27535        )
27536    }
27537}
27538
27539/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27540///
27541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
27542#[inline]
27543#[target_feature(enable = "avx512f")]
27544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27545#[cfg_attr(test, assert_instr(vunpcklps))]
27546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27547pub const fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27548    unsafe {
27549        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
27550        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
27551    }
27552}
27553
27554/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27555///
27556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
27557#[inline]
27558#[target_feature(enable = "avx512f")]
27559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27560#[cfg_attr(test, assert_instr(vunpcklps))]
27561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27562pub const fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27563    unsafe {
27564        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
27565        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
27566    }
27567}
27568
27569/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27570///
27571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
27572#[inline]
27573#[target_feature(enable = "avx512f,avx512vl")]
27574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27575#[cfg_attr(test, assert_instr(vunpcklps))]
27576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27577pub const fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27578    unsafe {
27579        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
27580        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
27581    }
27582}
27583
27584/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27585///
27586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
27587#[inline]
27588#[target_feature(enable = "avx512f,avx512vl")]
27589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27590#[cfg_attr(test, assert_instr(vunpcklps))]
27591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27592pub const fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27593    unsafe {
27594        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
27595        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
27596    }
27597}
27598
27599/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27600///
27601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
27602#[inline]
27603#[target_feature(enable = "avx512f,avx512vl")]
27604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27605#[cfg_attr(test, assert_instr(vunpcklps))]
27606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27607pub const fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27608    unsafe {
27609        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
27610        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
27611    }
27612}
27613
27614/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27615///
27616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
27617#[inline]
27618#[target_feature(enable = "avx512f,avx512vl")]
27619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27620#[cfg_attr(test, assert_instr(vunpcklps))]
27621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27622pub const fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27623    unsafe {
27624        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
27625        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
27626    }
27627}
27628
27629/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27630///
27631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
27632#[inline]
27633#[target_feature(enable = "avx512f")]
27634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27635#[cfg_attr(test, assert_instr(vunpcklpd))]
27636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27637pub const fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
27638    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
27639}
27640
27641/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27642///
27643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
27644#[inline]
27645#[target_feature(enable = "avx512f")]
27646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27647#[cfg_attr(test, assert_instr(vunpcklpd))]
27648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27649pub const fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27650    unsafe {
27651        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
27652        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
27653    }
27654}
27655
27656/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27657///
27658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
27659#[inline]
27660#[target_feature(enable = "avx512f")]
27661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27662#[cfg_attr(test, assert_instr(vunpcklpd))]
27663#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27664pub const fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27665    unsafe {
27666        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
27667        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
27668    }
27669}
27670
27671/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27672///
27673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
27674#[inline]
27675#[target_feature(enable = "avx512f,avx512vl")]
27676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27677#[cfg_attr(test, assert_instr(vunpcklpd))]
27678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27679pub const fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27680    unsafe {
27681        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
27682        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
27683    }
27684}
27685
27686/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27687///
27688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
27689#[inline]
27690#[target_feature(enable = "avx512f,avx512vl")]
27691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27692#[cfg_attr(test, assert_instr(vunpcklpd))]
27693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27694pub const fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27695    unsafe {
27696        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
27697        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
27698    }
27699}
27700
27701/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27702///
27703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
27704#[inline]
27705#[target_feature(enable = "avx512f,avx512vl")]
27706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27707#[cfg_attr(test, assert_instr(vunpcklpd))]
27708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27709pub const fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27710    unsafe {
27711        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
27712        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
27713    }
27714}
27715
27716/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27717///
27718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
27719#[inline]
27720#[target_feature(enable = "avx512f,avx512vl")]
27721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27722#[cfg_attr(test, assert_instr(vunpcklpd))]
27723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27724pub const fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27725    unsafe {
27726        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
27727        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
27728    }
27729}
27730
27731/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are indeterminate.
27732///
27733/// In the Intel documentation, the upper bits are declared to be "undefined".
27734/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27735/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27736///
27737/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27738///
27739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
27740#[inline]
27741#[target_feature(enable = "avx512f")]
27742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27744pub const fn _mm512_castps128_ps512(a: __m128) -> __m512 {
27745    unsafe {
27746        simd_shuffle!(
27747            a,
27748            _mm_undefined_ps(),
27749            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
27750        )
27751    }
27752}
27753
27754/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are indeterminate.
27755///
27756/// In the Intel documentation, the upper bits are declared to be "undefined".
27757/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27758/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27759///
27760/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27761///
27762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
27763#[inline]
27764#[target_feature(enable = "avx512f")]
27765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27767pub const fn _mm512_castps256_ps512(a: __m256) -> __m512 {
27768    unsafe {
27769        simd_shuffle!(
27770            a,
27771            _mm256_undefined_ps(),
27772            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
27773        )
27774    }
27775}
27776
27777/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27778///
27779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
27780#[inline]
27781#[target_feature(enable = "avx512f")]
27782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27783#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27784pub const fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
27785    unsafe {
27786        simd_shuffle!(
27787            a,
27788            _mm_set1_ps(0.),
27789            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
27790        )
27791    }
27792}
27793
27794/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27795///
27796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
27797#[inline]
27798#[target_feature(enable = "avx512f")]
27799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27801pub const fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
27802    unsafe {
27803        simd_shuffle!(
27804            a,
27805            _mm256_set1_ps(0.),
27806            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
27807        )
27808    }
27809}
27810
27811/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27812///
27813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
27814#[inline]
27815#[target_feature(enable = "avx512f")]
27816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27818pub const fn _mm512_castps512_ps128(a: __m512) -> __m128 {
27819    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
27820}
27821
27822/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27823///
27824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
27825#[inline]
27826#[target_feature(enable = "avx512f")]
27827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27829pub const fn _mm512_castps512_ps256(a: __m512) -> __m256 {
27830    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
27831}
27832
27833/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27834///
27835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
27836#[inline]
27837#[target_feature(enable = "avx512f")]
27838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27840pub const fn _mm512_castps_pd(a: __m512) -> __m512d {
27841    unsafe { transmute(a) }
27842}
27843
27844/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27845///
27846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
27847#[inline]
27848#[target_feature(enable = "avx512f")]
27849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27851pub const fn _mm512_castps_si512(a: __m512) -> __m512i {
27852    unsafe { transmute(a) }
27853}
27854
27855/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are indeterminate.
27856///
27857/// In the Intel documentation, the upper bits are declared to be "undefined".
27858/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27859/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27860///
27861/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27862///
27863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
27864#[inline]
27865#[target_feature(enable = "avx512f")]
27866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27868pub const fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
27869    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27870}
27871
27872/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are indeterminate.
27873///
27874/// In the Intel documentation, the upper bits are declared to be "undefined".
27875/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27876/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27877///
27878/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27879///
27880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
27881#[inline]
27882#[target_feature(enable = "avx512f")]
27883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27885pub const fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
27886    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
27887}
27888
27889/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27890///
27891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
27892#[inline]
27893#[target_feature(enable = "avx512f")]
27894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27896pub const fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
27897    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
27898}
27899
27900/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27901///
27902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
27903#[inline]
27904#[target_feature(enable = "avx512f")]
27905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27907pub const fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
27908    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
27909}
27910
27911/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27912///
27913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
27914#[inline]
27915#[target_feature(enable = "avx512f")]
27916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27918pub const fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
27919    unsafe { simd_shuffle!(a, a, [0, 1]) }
27920}
27921
27922/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27923///
27924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
27925#[inline]
27926#[target_feature(enable = "avx512f")]
27927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27929pub const fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
27930    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
27931}
27932
27933/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27934///
27935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
27936#[inline]
27937#[target_feature(enable = "avx512f")]
27938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27939#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27940pub const fn _mm512_castpd_ps(a: __m512d) -> __m512 {
27941    unsafe { transmute(a) }
27942}
27943
27944/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27945///
27946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
27947#[inline]
27948#[target_feature(enable = "avx512f")]
27949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27951pub const fn _mm512_castpd_si512(a: __m512d) -> __m512i {
27952    unsafe { transmute(a) }
27953}
27954
27955/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are indeterminate.
27956///
27957/// In the Intel documentation, the upper bits are declared to be "undefined".
27958/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27959/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27960///
27961/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27962///
27963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
27964#[inline]
27965#[target_feature(enable = "avx512f")]
27966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27968pub const fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
27969    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27970}
27971
27972/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are indeterminate.
27973///
27974/// In the Intel documentation, the upper bits are declared to be "undefined".
27975/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27976/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27977///
27978/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27979///
27980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
27981#[inline]
27982#[target_feature(enable = "avx512f")]
27983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27985pub const fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
27986    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
27987}
27988
27989/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27990///
27991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
27992#[inline]
27993#[target_feature(enable = "avx512f")]
27994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27996pub const fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
27997    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27998}
27999
28000/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28001///
28002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
28003#[inline]
28004#[target_feature(enable = "avx512f")]
28005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28007pub const fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
28008    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
28009}
28010
28011/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28012///
28013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
28014#[inline]
28015#[target_feature(enable = "avx512f")]
28016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28017#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28018pub const fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
28019    unsafe { simd_shuffle!(a, a, [0, 1]) }
28020}
28021
28022/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28023///
28024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
28025#[inline]
28026#[target_feature(enable = "avx512f")]
28027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28029pub const fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
28030    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
28031}
28032
28033/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28034///
28035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
28036#[inline]
28037#[target_feature(enable = "avx512f")]
28038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28040pub const fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
28041    unsafe { transmute(a) }
28042}
28043
28044/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28045///
28046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
28047#[inline]
28048#[target_feature(enable = "avx512f")]
28049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28051pub const fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
28052    unsafe { transmute(a) }
28053}
28054
28055/// Copy the lower 32-bit integer in a to dst.
28056///
28057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
28058#[inline]
28059#[target_feature(enable = "avx512f")]
28060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28061#[cfg_attr(test, assert_instr(vmovd))]
28062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28063pub const fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
28064    unsafe { simd_extract!(a.as_i32x16(), 0) }
28065}
28066
28067/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
28068///
28069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
28070#[inline]
28071#[target_feature(enable = "avx512f")]
28072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28073#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28074pub const fn _mm512_cvtss_f32(a: __m512) -> f32 {
28075    unsafe { simd_extract!(a, 0) }
28076}
28077
28078/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
28079///
28080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
28081#[inline]
28082#[target_feature(enable = "avx512f")]
28083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28085pub const fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
28086    unsafe { simd_extract!(a, 0) }
28087}
28088
28089/// Broadcast the low packed 32-bit integer from a to all elements of dst.
28090///
28091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
28092#[inline]
28093#[target_feature(enable = "avx512f")]
28094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28095#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
28096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28097pub const fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
28098    unsafe {
28099        let a = _mm512_castsi128_si512(a).as_i32x16();
28100        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
28101        transmute(ret)
28102    }
28103}
28104
28105/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28106///
28107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
28108#[inline]
28109#[target_feature(enable = "avx512f")]
28110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28111#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28113pub const fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28114    unsafe {
28115        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
28116        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
28117    }
28118}
28119
28120/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28121///
28122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
28123#[inline]
28124#[target_feature(enable = "avx512f")]
28125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28126#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28128pub const fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
28129    unsafe {
28130        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
28131        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
28132    }
28133}
28134
28135/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136///
28137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
28138#[inline]
28139#[target_feature(enable = "avx512f,avx512vl")]
28140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28141#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28143pub const fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28144    unsafe {
28145        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
28146        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
28147    }
28148}
28149
28150/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28151///
28152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
28153#[inline]
28154#[target_feature(enable = "avx512f,avx512vl")]
28155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28156#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28158pub const fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
28159    unsafe {
28160        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
28161        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
28162    }
28163}
28164
28165/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28166///
28167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
28168#[inline]
28169#[target_feature(enable = "avx512f,avx512vl")]
28170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28171#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28173pub const fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28174    unsafe {
28175        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
28176        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
28177    }
28178}
28179
28180/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28181///
28182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
28183#[inline]
28184#[target_feature(enable = "avx512f,avx512vl")]
28185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28186#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28188pub const fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
28189    unsafe {
28190        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
28191        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
28192    }
28193}
28194
28195/// Broadcast the low packed 64-bit integer from a to all elements of dst.
28196///
28197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
28198#[inline]
28199#[target_feature(enable = "avx512f")]
28200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28201#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
28202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28203pub const fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
28204    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
28205}
28206
28207/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28208///
28209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
28210#[inline]
28211#[target_feature(enable = "avx512f")]
28212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28213#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28215pub const fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
28216    unsafe {
28217        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
28218        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
28219    }
28220}
28221
28222/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28223///
28224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
28225#[inline]
28226#[target_feature(enable = "avx512f")]
28227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28228#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28230pub const fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
28231    unsafe {
28232        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
28233        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
28234    }
28235}
28236
28237/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28238///
28239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
28240#[inline]
28241#[target_feature(enable = "avx512f,avx512vl")]
28242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28243#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28245pub const fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28246    unsafe {
28247        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
28248        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
28249    }
28250}
28251
28252/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28253///
28254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
28255#[inline]
28256#[target_feature(enable = "avx512f,avx512vl")]
28257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28258#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28260pub const fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
28261    unsafe {
28262        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
28263        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
28264    }
28265}
28266
28267/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28268///
28269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
28270#[inline]
28271#[target_feature(enable = "avx512f,avx512vl")]
28272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28273#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28275pub const fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28276    unsafe {
28277        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
28278        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
28279    }
28280}
28281
28282/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28283///
28284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
28285#[inline]
28286#[target_feature(enable = "avx512f,avx512vl")]
28287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28288#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28290pub const fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
28291    unsafe {
28292        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
28293        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
28294    }
28295}
28296
28297/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
28298///
28299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
28300#[inline]
28301#[target_feature(enable = "avx512f")]
28302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28303#[cfg_attr(test, assert_instr(vbroadcastss))]
28304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28305pub const fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
28306    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
28307}
28308
28309/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28310///
28311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
28312#[inline]
28313#[target_feature(enable = "avx512f")]
28314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28315#[cfg_attr(test, assert_instr(vbroadcastss))]
28316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28317pub const fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28318    unsafe {
28319        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
28320        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
28321    }
28322}
28323
28324/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28325///
28326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
28327#[inline]
28328#[target_feature(enable = "avx512f")]
28329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28330#[cfg_attr(test, assert_instr(vbroadcastss))]
28331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28332pub const fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
28333    unsafe {
28334        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
28335        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
28336    }
28337}
28338
28339/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28340///
28341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
28342#[inline]
28343#[target_feature(enable = "avx512f,avx512vl")]
28344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28345#[cfg_attr(test, assert_instr(vbroadcastss))]
28346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28347pub const fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28348    unsafe {
28349        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
28350        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
28351    }
28352}
28353
28354/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28355///
28356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
28357#[inline]
28358#[target_feature(enable = "avx512f,avx512vl")]
28359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28360#[cfg_attr(test, assert_instr(vbroadcastss))]
28361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28362pub const fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
28363    unsafe {
28364        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
28365        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
28366    }
28367}
28368
28369/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28370///
28371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
28372#[inline]
28373#[target_feature(enable = "avx512f,avx512vl")]
28374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28375#[cfg_attr(test, assert_instr(vbroadcastss))]
28376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28377pub const fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
28378    unsafe {
28379        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
28380        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
28381    }
28382}
28383
28384/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28385///
28386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
28387#[inline]
28388#[target_feature(enable = "avx512f,avx512vl")]
28389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28390#[cfg_attr(test, assert_instr(vbroadcastss))]
28391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28392pub const fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
28393    unsafe {
28394        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
28395        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
28396    }
28397}
28398
28399/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
28400///
28401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
28402#[inline]
28403#[target_feature(enable = "avx512f")]
28404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28405#[cfg_attr(test, assert_instr(vbroadcastsd))]
28406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28407pub const fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
28408    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
28409}
28410
28411/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28412///
28413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
28414#[inline]
28415#[target_feature(enable = "avx512f")]
28416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28417#[cfg_attr(test, assert_instr(vbroadcastsd))]
28418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28419pub const fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
28420    unsafe {
28421        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
28422        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
28423    }
28424}
28425
28426/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28427///
28428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
28429#[inline]
28430#[target_feature(enable = "avx512f")]
28431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28432#[cfg_attr(test, assert_instr(vbroadcastsd))]
28433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28434pub const fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
28435    unsafe {
28436        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
28437        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
28438    }
28439}
28440
28441/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28442///
28443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
28444#[inline]
28445#[target_feature(enable = "avx512f,avx512vl")]
28446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28447#[cfg_attr(test, assert_instr(vbroadcastsd))]
28448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28449pub const fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
28450    unsafe {
28451        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
28452        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
28453    }
28454}
28455
28456/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28457///
28458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
28459#[inline]
28460#[target_feature(enable = "avx512f,avx512vl")]
28461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28462#[cfg_attr(test, assert_instr(vbroadcastsd))]
28463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28464pub const fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
28465    unsafe {
28466        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
28467        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
28468    }
28469}
28470
28471/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28472///
28473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
28474#[inline]
28475#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28478pub const fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
28479    unsafe {
28480        let a = a.as_i32x4();
28481        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
28482        transmute(ret)
28483    }
28484}
28485
28486/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28487///
28488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
28489#[inline]
28490#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28493pub const fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28494    unsafe {
28495        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
28496        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
28497    }
28498}
28499
28500/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28501///
28502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
28503#[inline]
28504#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28507pub const fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
28508    unsafe {
28509        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
28510        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
28511    }
28512}
28513
28514/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28515///
28516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
28517#[inline]
28518#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28521pub const fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
28522    unsafe {
28523        let a = a.as_i32x4();
28524        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
28525        transmute(ret)
28526    }
28527}
28528
28529/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28530///
28531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
28532#[inline]
28533#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28536pub const fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28537    unsafe {
28538        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
28539        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
28540    }
28541}
28542
28543/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28544///
28545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
28546#[inline]
28547#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28550pub const fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
28551    unsafe {
28552        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
28553        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
28554    }
28555}
28556
28557/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
28558///
28559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
28560#[inline]
28561#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28564pub const fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
28565    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28566}
28567
28568/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28569///
28570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
28571#[inline]
28572#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28575pub const fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
28576    unsafe {
28577        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
28578        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
28579    }
28580}
28581
28582/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28583///
28584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
28585#[inline]
28586#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28589pub const fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
28590    unsafe {
28591        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
28592        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
28593    }
28594}
28595
28596/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28597///
28598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
28599#[inline]
28600#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
28601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28602#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28603pub const fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
28604    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
28605}
28606
28607/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28608///
28609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
28610#[inline]
28611#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28614pub const fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28615    unsafe {
28616        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
28617        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
28618    }
28619}
28620
28621/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28622///
28623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
28624#[inline]
28625#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28627#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28628pub const fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
28629    unsafe {
28630        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
28631        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
28632    }
28633}
28634
28635/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28636///
28637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
28638#[inline]
28639#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
28640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28642pub const fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
28643    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28644}
28645
28646/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28647///
28648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
28649#[inline]
28650#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28653pub const fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28654    unsafe {
28655        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
28656        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
28657    }
28658}
28659
28660/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28661///
28662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
28663#[inline]
28664#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28667pub const fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
28668    unsafe {
28669        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
28670        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
28671    }
28672}
28673
28674/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
28675///
28676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
28677#[inline]
28678#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
28679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28681pub const fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
28682    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28683}
28684
28685/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28686///
28687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
28688#[inline]
28689#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28692pub const fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
28693    unsafe {
28694        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
28695        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
28696    }
28697}
28698
28699/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28700///
28701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
28702#[inline]
28703#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28706pub const fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
28707    unsafe {
28708        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
28709        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
28710    }
28711}
28712
28713/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28714///
28715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
28716#[inline]
28717#[target_feature(enable = "avx512f")]
28718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28719#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28721pub const fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28722    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
28723}
28724
28725/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28726///
28727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
28728#[inline]
28729#[target_feature(enable = "avx512f,avx512vl")]
28730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28731#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28733pub const fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28734    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
28735}
28736
28737/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28738///
28739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
28740#[inline]
28741#[target_feature(enable = "avx512f,avx512vl")]
28742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28743#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28745pub const fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28746    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
28747}
28748
28749/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28750///
28751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
28752#[inline]
28753#[target_feature(enable = "avx512f")]
28754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28755#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28757pub const fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28758    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
28759}
28760
28761/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28762///
28763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
28764#[inline]
28765#[target_feature(enable = "avx512f,avx512vl")]
28766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28767#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28769pub const fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28770    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
28771}
28772
28773/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28774///
28775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
28776#[inline]
28777#[target_feature(enable = "avx512f,avx512vl")]
28778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28779#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28781pub const fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28782    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
28783}
28784
28785/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28786///
28787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
28788#[inline]
28789#[target_feature(enable = "avx512f")]
28790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28791#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28793pub const fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
28794    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
28795}
28796
28797/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28798///
28799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
28800#[inline]
28801#[target_feature(enable = "avx512f,avx512vl")]
28802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28803#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28805pub const fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
28806    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
28807}
28808
28809/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28810///
28811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
28812#[inline]
28813#[target_feature(enable = "avx512f,avx512vl")]
28814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28815#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28817pub const fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
28818    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
28819}
28820
28821/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28822///
28823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
28824#[inline]
28825#[target_feature(enable = "avx512f")]
28826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28827#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28829pub const fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
28830    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
28831}
28832
28833/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28834///
28835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
28836#[inline]
28837#[target_feature(enable = "avx512f,avx512vl")]
28838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28839#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28841pub const fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
28842    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
28843}
28844
28845/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28846///
28847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
28848#[inline]
28849#[target_feature(enable = "avx512f,avx512vl")]
28850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28851#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28853pub const fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
28854    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
28855}
28856
28857/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
28858///
28859/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
28860///
28861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
28862#[inline]
28863#[target_feature(enable = "avx512f")]
28864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28865#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28866#[rustc_legacy_const_generics(2)]
28867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28868pub const fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
28869    unsafe {
28870        static_assert_uimm_bits!(IMM8, 8);
28871        let a = a.as_i32x16();
28872        let b = b.as_i32x16();
28873        let imm8: i32 = IMM8 % 16;
28874        let r: i32x16 = match imm8 {
28875            0 => simd_shuffle!(
28876                a,
28877                b,
28878                [
28879                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
28880                ],
28881            ),
28882            1 => simd_shuffle!(
28883                a,
28884                b,
28885                [
28886                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
28887                ],
28888            ),
28889            2 => simd_shuffle!(
28890                a,
28891                b,
28892                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
28893            ),
28894            3 => simd_shuffle!(
28895                a,
28896                b,
28897                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
28898            ),
28899            4 => simd_shuffle!(
28900                a,
28901                b,
28902                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
28903            ),
28904            5 => simd_shuffle!(
28905                a,
28906                b,
28907                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
28908            ),
28909            6 => simd_shuffle!(
28910                a,
28911                b,
28912                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
28913            ),
28914            7 => simd_shuffle!(
28915                a,
28916                b,
28917                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
28918            ),
28919            8 => simd_shuffle!(
28920                a,
28921                b,
28922                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
28923            ),
28924            9 => simd_shuffle!(
28925                a,
28926                b,
28927                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
28928            ),
28929            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
28930            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
28931            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
28932            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
28933            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
28934            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
28935            _ => unreachable_unchecked(),
28936        };
28937        transmute(r)
28938    }
28939}
28940
28941/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28942///
28943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
28944#[inline]
28945#[target_feature(enable = "avx512f")]
28946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28947#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28948#[rustc_legacy_const_generics(4)]
28949#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28950pub const fn _mm512_mask_alignr_epi32<const IMM8: i32>(
28951    src: __m512i,
28952    k: __mmask16,
28953    a: __m512i,
28954    b: __m512i,
28955) -> __m512i {
28956    unsafe {
28957        static_assert_uimm_bits!(IMM8, 8);
28958        let r = _mm512_alignr_epi32::<IMM8>(a, b);
28959        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
28960    }
28961}
28962
28963/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28964///
28965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
28966#[inline]
28967#[target_feature(enable = "avx512f")]
28968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28969#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28970#[rustc_legacy_const_generics(3)]
28971#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28972pub const fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
28973    k: __mmask16,
28974    a: __m512i,
28975    b: __m512i,
28976) -> __m512i {
28977    unsafe {
28978        static_assert_uimm_bits!(IMM8, 8);
28979        let r = _mm512_alignr_epi32::<IMM8>(a, b);
28980        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
28981    }
28982}
28983
28984/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
28985///
28986/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
28987///
28988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
28989#[inline]
28990#[target_feature(enable = "avx512f,avx512vl")]
28991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28992#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28993#[rustc_legacy_const_generics(2)]
28994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28995pub const fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28996    unsafe {
28997        static_assert_uimm_bits!(IMM8, 8);
28998        let a = a.as_i32x8();
28999        let b = b.as_i32x8();
29000        let imm8: i32 = IMM8 % 8;
29001        let r: i32x8 = match imm8 {
29002            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
29003            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
29004            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
29005            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
29006            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
29007            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
29008            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
29009            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
29010            _ => unreachable_unchecked(),
29011        };
29012        transmute(r)
29013    }
29014}
29015
29016/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29017///
29018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
29019#[inline]
29020#[target_feature(enable = "avx512f,avx512vl")]
29021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29022#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29023#[rustc_legacy_const_generics(4)]
29024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29025pub const fn _mm256_mask_alignr_epi32<const IMM8: i32>(
29026    src: __m256i,
29027    k: __mmask8,
29028    a: __m256i,
29029    b: __m256i,
29030) -> __m256i {
29031    unsafe {
29032        static_assert_uimm_bits!(IMM8, 8);
29033        let r = _mm256_alignr_epi32::<IMM8>(a, b);
29034        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
29035    }
29036}
29037
29038/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29039///
29040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
29041#[inline]
29042#[target_feature(enable = "avx512f,avx512vl")]
29043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29044#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29045#[rustc_legacy_const_generics(3)]
29046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29047pub const fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
29048    k: __mmask8,
29049    a: __m256i,
29050    b: __m256i,
29051) -> __m256i {
29052    unsafe {
29053        static_assert_uimm_bits!(IMM8, 8);
29054        let r = _mm256_alignr_epi32::<IMM8>(a, b);
29055        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
29056    }
29057}
29058
29059/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
29060///
29061/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
29062///
29063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
29064#[inline]
29065#[target_feature(enable = "avx512f,avx512vl")]
29066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29067#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
29068#[rustc_legacy_const_generics(2)]
29069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29070pub const fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29071    unsafe {
29072        static_assert_uimm_bits!(IMM8, 8);
29073        let a = a.as_i32x4();
29074        let b = b.as_i32x4();
29075        let imm8: i32 = IMM8 % 4;
29076        let r: i32x4 = match imm8 {
29077            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
29078            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
29079            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
29080            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
29081            _ => unreachable_unchecked(),
29082        };
29083        transmute(r)
29084    }
29085}
29086
29087/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29088///
29089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
29090#[inline]
29091#[target_feature(enable = "avx512f,avx512vl")]
29092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29093#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29094#[rustc_legacy_const_generics(4)]
29095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29096pub const fn _mm_mask_alignr_epi32<const IMM8: i32>(
29097    src: __m128i,
29098    k: __mmask8,
29099    a: __m128i,
29100    b: __m128i,
29101) -> __m128i {
29102    unsafe {
29103        static_assert_uimm_bits!(IMM8, 8);
29104        let r = _mm_alignr_epi32::<IMM8>(a, b);
29105        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
29106    }
29107}
29108
29109/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29110///
29111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
29112#[inline]
29113#[target_feature(enable = "avx512f,avx512vl")]
29114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29115#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29116#[rustc_legacy_const_generics(3)]
29117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29118pub const fn _mm_maskz_alignr_epi32<const IMM8: i32>(
29119    k: __mmask8,
29120    a: __m128i,
29121    b: __m128i,
29122) -> __m128i {
29123    unsafe {
29124        static_assert_uimm_bits!(IMM8, 8);
29125        let r = _mm_alignr_epi32::<IMM8>(a, b);
29126        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
29127    }
29128}
29129
29130/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
29131///
29132/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
29133///
29134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
29135#[inline]
29136#[target_feature(enable = "avx512f")]
29137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29138#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29139#[rustc_legacy_const_generics(2)]
29140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29141pub const fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
29142    unsafe {
29143        static_assert_uimm_bits!(IMM8, 8);
29144        let imm8: i32 = IMM8 % 8;
29145        let r: i64x8 = match imm8 {
29146            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
29147            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
29148            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
29149            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
29150            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
29151            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
29152            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
29153            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
29154            _ => unreachable_unchecked(),
29155        };
29156        transmute(r)
29157    }
29158}
29159
29160/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29161///
29162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
29163#[inline]
29164#[target_feature(enable = "avx512f")]
29165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29166#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29167#[rustc_legacy_const_generics(4)]
29168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29169pub const fn _mm512_mask_alignr_epi64<const IMM8: i32>(
29170    src: __m512i,
29171    k: __mmask8,
29172    a: __m512i,
29173    b: __m512i,
29174) -> __m512i {
29175    unsafe {
29176        static_assert_uimm_bits!(IMM8, 8);
29177        let r = _mm512_alignr_epi64::<IMM8>(a, b);
29178        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
29179    }
29180}
29181
29182/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29183///
29184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
29185#[inline]
29186#[target_feature(enable = "avx512f")]
29187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29188#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29189#[rustc_legacy_const_generics(3)]
29190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29191pub const fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
29192    k: __mmask8,
29193    a: __m512i,
29194    b: __m512i,
29195) -> __m512i {
29196    unsafe {
29197        static_assert_uimm_bits!(IMM8, 8);
29198        let r = _mm512_alignr_epi64::<IMM8>(a, b);
29199        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
29200    }
29201}
29202
29203/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
29204///
29205/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
29206///
29207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
29208#[inline]
29209#[target_feature(enable = "avx512f,avx512vl")]
29210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29211#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29212#[rustc_legacy_const_generics(2)]
29213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29214pub const fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
29215    unsafe {
29216        static_assert_uimm_bits!(IMM8, 8);
29217        let imm8: i32 = IMM8 % 4;
29218        let r: i64x4 = match imm8 {
29219            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
29220            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
29221            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
29222            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
29223            _ => unreachable_unchecked(),
29224        };
29225        transmute(r)
29226    }
29227}
29228
29229/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29230///
29231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
29232#[inline]
29233#[target_feature(enable = "avx512f,avx512vl")]
29234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29235#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29236#[rustc_legacy_const_generics(4)]
29237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29238pub const fn _mm256_mask_alignr_epi64<const IMM8: i32>(
29239    src: __m256i,
29240    k: __mmask8,
29241    a: __m256i,
29242    b: __m256i,
29243) -> __m256i {
29244    unsafe {
29245        static_assert_uimm_bits!(IMM8, 8);
29246        let r = _mm256_alignr_epi64::<IMM8>(a, b);
29247        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
29248    }
29249}
29250
29251/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29252///
29253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
29254#[inline]
29255#[target_feature(enable = "avx512f,avx512vl")]
29256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29257#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29258#[rustc_legacy_const_generics(3)]
29259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29260pub const fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
29261    k: __mmask8,
29262    a: __m256i,
29263    b: __m256i,
29264) -> __m256i {
29265    unsafe {
29266        static_assert_uimm_bits!(IMM8, 8);
29267        let r = _mm256_alignr_epi64::<IMM8>(a, b);
29268        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
29269    }
29270}
29271
29272/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
29273///
29274/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
29275///
29276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
29277#[inline]
29278#[target_feature(enable = "avx512f,avx512vl")]
29279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29280#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
29281#[rustc_legacy_const_generics(2)]
29282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29283pub const fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29284    unsafe {
29285        static_assert_uimm_bits!(IMM8, 8);
29286        let imm8: i32 = IMM8 % 2;
29287        let r: i64x2 = match imm8 {
29288            0 => simd_shuffle!(a, b, [2, 3]),
29289            1 => simd_shuffle!(a, b, [3, 0]),
29290            _ => unreachable_unchecked(),
29291        };
29292        transmute(r)
29293    }
29294}
29295
29296/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29297///
29298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
29299#[inline]
29300#[target_feature(enable = "avx512f,avx512vl")]
29301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29302#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29303#[rustc_legacy_const_generics(4)]
29304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29305pub const fn _mm_mask_alignr_epi64<const IMM8: i32>(
29306    src: __m128i,
29307    k: __mmask8,
29308    a: __m128i,
29309    b: __m128i,
29310) -> __m128i {
29311    unsafe {
29312        static_assert_uimm_bits!(IMM8, 8);
29313        let r = _mm_alignr_epi64::<IMM8>(a, b);
29314        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
29315    }
29316}
29317
29318/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29319///
29320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
29321#[inline]
29322#[target_feature(enable = "avx512f,avx512vl")]
29323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29324#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29325#[rustc_legacy_const_generics(3)]
29326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29327pub const fn _mm_maskz_alignr_epi64<const IMM8: i32>(
29328    k: __mmask8,
29329    a: __m128i,
29330    b: __m128i,
29331) -> __m128i {
29332    unsafe {
29333        static_assert_uimm_bits!(IMM8, 8);
29334        let r = _mm_alignr_epi64::<IMM8>(a, b);
29335        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
29336    }
29337}
29338
29339/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
29340///
29341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
29342#[inline]
29343#[target_feature(enable = "avx512f")]
29344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29345#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
29346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29347pub const fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
29348    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
29349}
29350
29351/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29352///
29353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
29354#[inline]
29355#[target_feature(enable = "avx512f")]
29356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29357#[cfg_attr(test, assert_instr(vpandd))]
29358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29359pub const fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29360    unsafe {
29361        let and = _mm512_and_epi32(a, b).as_i32x16();
29362        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
29363    }
29364}
29365
29366/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29367///
29368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
29369#[inline]
29370#[target_feature(enable = "avx512f")]
29371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29372#[cfg_attr(test, assert_instr(vpandd))]
29373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29374pub const fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29375    unsafe {
29376        let and = _mm512_and_epi32(a, b).as_i32x16();
29377        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
29378    }
29379}
29380
29381/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29382///
29383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
29384#[inline]
29385#[target_feature(enable = "avx512f,avx512vl")]
29386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29387#[cfg_attr(test, assert_instr(vpandd))]
29388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29389pub const fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29390    unsafe {
29391        let and = simd_and(a.as_i32x8(), b.as_i32x8());
29392        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
29393    }
29394}
29395
29396/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29397///
29398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
29399#[inline]
29400#[target_feature(enable = "avx512f,avx512vl")]
29401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29402#[cfg_attr(test, assert_instr(vpandd))]
29403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29404pub const fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29405    unsafe {
29406        let and = simd_and(a.as_i32x8(), b.as_i32x8());
29407        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
29408    }
29409}
29410
29411/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29412///
29413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
29414#[inline]
29415#[target_feature(enable = "avx512f,avx512vl")]
29416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29417#[cfg_attr(test, assert_instr(vpandd))]
29418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29419pub const fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29420    unsafe {
29421        let and = simd_and(a.as_i32x4(), b.as_i32x4());
29422        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
29423    }
29424}
29425
29426/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29427///
29428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
29429#[inline]
29430#[target_feature(enable = "avx512f,avx512vl")]
29431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29432#[cfg_attr(test, assert_instr(vpandd))]
29433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29434pub const fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29435    unsafe {
29436        let and = simd_and(a.as_i32x4(), b.as_i32x4());
29437        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
29438    }
29439}
29440
29441/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
29442///
29443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
29444#[inline]
29445#[target_feature(enable = "avx512f")]
29446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29447#[cfg_attr(test, assert_instr(vpandq))]
29448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29449pub const fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
29450    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
29451}
29452
29453/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29454///
29455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
29456#[inline]
29457#[target_feature(enable = "avx512f")]
29458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29459#[cfg_attr(test, assert_instr(vpandq))]
29460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29461pub const fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29462    unsafe {
29463        let and = _mm512_and_epi64(a, b).as_i64x8();
29464        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
29465    }
29466}
29467
29468/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29469///
29470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
29471#[inline]
29472#[target_feature(enable = "avx512f")]
29473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29474#[cfg_attr(test, assert_instr(vpandq))]
29475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29476pub const fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29477    unsafe {
29478        let and = _mm512_and_epi64(a, b).as_i64x8();
29479        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
29480    }
29481}
29482
29483/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29484///
29485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
29486#[inline]
29487#[target_feature(enable = "avx512f,avx512vl")]
29488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29489#[cfg_attr(test, assert_instr(vpandq))]
29490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29491pub const fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29492    unsafe {
29493        let and = simd_and(a.as_i64x4(), b.as_i64x4());
29494        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
29495    }
29496}
29497
29498/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29499///
29500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
29501#[inline]
29502#[target_feature(enable = "avx512f,avx512vl")]
29503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29504#[cfg_attr(test, assert_instr(vpandq))]
29505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29506pub const fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29507    unsafe {
29508        let and = simd_and(a.as_i64x4(), b.as_i64x4());
29509        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
29510    }
29511}
29512
29513/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29514///
29515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
29516#[inline]
29517#[target_feature(enable = "avx512f,avx512vl")]
29518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29519#[cfg_attr(test, assert_instr(vpandq))]
29520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29521pub const fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29522    unsafe {
29523        let and = simd_and(a.as_i64x2(), b.as_i64x2());
29524        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
29525    }
29526}
29527
29528/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29529///
29530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
29531#[inline]
29532#[target_feature(enable = "avx512f,avx512vl")]
29533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29534#[cfg_attr(test, assert_instr(vpandq))]
29535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29536pub const fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29537    unsafe {
29538        let and = simd_and(a.as_i64x2(), b.as_i64x2());
29539        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
29540    }
29541}
29542
29543/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
29544///
29545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
29546#[inline]
29547#[target_feature(enable = "avx512f")]
29548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29549#[cfg_attr(test, assert_instr(vpandq))]
29550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29551pub const fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
29552    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
29553}
29554
29555/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29556///
29557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
29558#[inline]
29559#[target_feature(enable = "avx512f")]
29560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29561#[cfg_attr(test, assert_instr(vporq))]
29562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29563pub const fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
29564    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
29565}
29566
29567/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29568///
29569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
29570#[inline]
29571#[target_feature(enable = "avx512f")]
29572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29573#[cfg_attr(test, assert_instr(vpord))]
29574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29575pub const fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29576    unsafe {
29577        let or = _mm512_or_epi32(a, b).as_i32x16();
29578        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
29579    }
29580}
29581
29582/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29583///
29584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
29585#[inline]
29586#[target_feature(enable = "avx512f")]
29587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29588#[cfg_attr(test, assert_instr(vpord))]
29589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29590pub const fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29591    unsafe {
29592        let or = _mm512_or_epi32(a, b).as_i32x16();
29593        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
29594    }
29595}
29596
29597/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29598///
29599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
29600#[inline]
29601#[target_feature(enable = "avx512f,avx512vl")]
29602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29603#[cfg_attr(test, assert_instr(vor))] //should be vpord
29604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29605pub const fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
29606    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
29607}
29608
29609/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29610///
29611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
29612#[inline]
29613#[target_feature(enable = "avx512f,avx512vl")]
29614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29615#[cfg_attr(test, assert_instr(vpord))]
29616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29617pub const fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29618    unsafe {
29619        let or = _mm256_or_epi32(a, b).as_i32x8();
29620        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
29621    }
29622}
29623
29624/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29625///
29626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
29627#[inline]
29628#[target_feature(enable = "avx512f,avx512vl")]
29629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29630#[cfg_attr(test, assert_instr(vpord))]
29631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29632pub const fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29633    unsafe {
29634        let or = _mm256_or_epi32(a, b).as_i32x8();
29635        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
29636    }
29637}
29638
29639/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29640///
29641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
29642#[inline]
29643#[target_feature(enable = "avx512f,avx512vl")]
29644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29645#[cfg_attr(test, assert_instr(vor))] //should be vpord
29646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29647pub const fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
29648    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
29649}
29650
29651/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29652///
29653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
29654#[inline]
29655#[target_feature(enable = "avx512f,avx512vl")]
29656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29657#[cfg_attr(test, assert_instr(vpord))]
29658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29659pub const fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29660    unsafe {
29661        let or = _mm_or_epi32(a, b).as_i32x4();
29662        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
29663    }
29664}
29665
29666/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29667///
29668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
29669#[inline]
29670#[target_feature(enable = "avx512f,avx512vl")]
29671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29672#[cfg_attr(test, assert_instr(vpord))]
29673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29674pub const fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29675    unsafe {
29676        let or = _mm_or_epi32(a, b).as_i32x4();
29677        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
29678    }
29679}
29680
29681/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29682///
29683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
29684#[inline]
29685#[target_feature(enable = "avx512f")]
29686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29687#[cfg_attr(test, assert_instr(vporq))]
29688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29689pub const fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
29690    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
29691}
29692
29693/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29694///
29695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
29696#[inline]
29697#[target_feature(enable = "avx512f")]
29698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29699#[cfg_attr(test, assert_instr(vporq))]
29700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29701pub const fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29702    unsafe {
29703        let or = _mm512_or_epi64(a, b).as_i64x8();
29704        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
29705    }
29706}
29707
29708/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29709///
29710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
29711#[inline]
29712#[target_feature(enable = "avx512f")]
29713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29714#[cfg_attr(test, assert_instr(vporq))]
29715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29716pub const fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29717    unsafe {
29718        let or = _mm512_or_epi64(a, b).as_i64x8();
29719        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
29720    }
29721}
29722
29723/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29724///
29725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
29726#[inline]
29727#[target_feature(enable = "avx512f,avx512vl")]
29728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29729#[cfg_attr(test, assert_instr(vor))] //should be vporq
29730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29731pub const fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
29732    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
29733}
29734
29735/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29736///
29737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
29738#[inline]
29739#[target_feature(enable = "avx512f,avx512vl")]
29740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29741#[cfg_attr(test, assert_instr(vporq))]
29742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29743pub const fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29744    unsafe {
29745        let or = _mm256_or_epi64(a, b).as_i64x4();
29746        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
29747    }
29748}
29749
29750/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29751///
29752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
29753#[inline]
29754#[target_feature(enable = "avx512f,avx512vl")]
29755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29756#[cfg_attr(test, assert_instr(vporq))]
29757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29758pub const fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29759    unsafe {
29760        let or = _mm256_or_epi64(a, b).as_i64x4();
29761        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
29762    }
29763}
29764
29765/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29766///
29767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
29768#[inline]
29769#[target_feature(enable = "avx512f,avx512vl")]
29770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29771#[cfg_attr(test, assert_instr(vor))] //should be vporq
29772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29773pub const fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
29774    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
29775}
29776
29777/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29778///
29779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
29780#[inline]
29781#[target_feature(enable = "avx512f,avx512vl")]
29782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29783#[cfg_attr(test, assert_instr(vporq))]
29784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29785pub const fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29786    unsafe {
29787        let or = _mm_or_epi64(a, b).as_i64x2();
29788        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
29789    }
29790}
29791
29792/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29793///
29794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
29795#[inline]
29796#[target_feature(enable = "avx512f,avx512vl")]
29797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29798#[cfg_attr(test, assert_instr(vporq))]
29799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29800pub const fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29801    unsafe {
29802        let or = _mm_or_epi64(a, b).as_i64x2();
29803        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
29804    }
29805}
29806
29807/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
29808///
29809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
29810#[inline]
29811#[target_feature(enable = "avx512f")]
29812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29813#[cfg_attr(test, assert_instr(vporq))]
29814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29815pub const fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
29816    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
29817}
29818
29819/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29820///
29821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
29822#[inline]
29823#[target_feature(enable = "avx512f")]
29824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29825#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
29826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29827pub const fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
29828    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
29829}
29830
29831/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29832///
29833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
29834#[inline]
29835#[target_feature(enable = "avx512f")]
29836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29837#[cfg_attr(test, assert_instr(vpxord))]
29838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29839pub const fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29840    unsafe {
29841        let xor = _mm512_xor_epi32(a, b).as_i32x16();
29842        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
29843    }
29844}
29845
29846/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29847///
29848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
29849#[inline]
29850#[target_feature(enable = "avx512f")]
29851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29852#[cfg_attr(test, assert_instr(vpxord))]
29853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29854pub const fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29855    unsafe {
29856        let xor = _mm512_xor_epi32(a, b).as_i32x16();
29857        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
29858    }
29859}
29860
29861/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29862///
29863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
29864#[inline]
29865#[target_feature(enable = "avx512f,avx512vl")]
29866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29867#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29869pub const fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
29870    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
29871}
29872
29873/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29874///
29875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
29876#[inline]
29877#[target_feature(enable = "avx512f,avx512vl")]
29878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29879#[cfg_attr(test, assert_instr(vpxord))]
29880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29881pub const fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29882    unsafe {
29883        let xor = _mm256_xor_epi32(a, b).as_i32x8();
29884        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
29885    }
29886}
29887
29888/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29889///
29890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
29891#[inline]
29892#[target_feature(enable = "avx512f,avx512vl")]
29893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29894#[cfg_attr(test, assert_instr(vpxord))]
29895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29896pub const fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29897    unsafe {
29898        let xor = _mm256_xor_epi32(a, b).as_i32x8();
29899        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
29900    }
29901}
29902
29903/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29904///
29905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
29906#[inline]
29907#[target_feature(enable = "avx512f,avx512vl")]
29908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29909#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29911pub const fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
29912    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
29913}
29914
29915/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29916///
29917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
29918#[inline]
29919#[target_feature(enable = "avx512f,avx512vl")]
29920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29921#[cfg_attr(test, assert_instr(vpxord))]
29922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29923pub const fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29924    unsafe {
29925        let xor = _mm_xor_epi32(a, b).as_i32x4();
29926        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
29927    }
29928}
29929
29930/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29931///
29932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
29933#[inline]
29934#[target_feature(enable = "avx512f,avx512vl")]
29935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29936#[cfg_attr(test, assert_instr(vpxord))]
29937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29938pub const fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29939    unsafe {
29940        let xor = _mm_xor_epi32(a, b).as_i32x4();
29941        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
29942    }
29943}
29944
29945/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29946///
29947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
29948#[inline]
29949#[target_feature(enable = "avx512f")]
29950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29951#[cfg_attr(test, assert_instr(vpxorq))]
29952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29953pub const fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
29954    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
29955}
29956
29957/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29958///
29959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
29960#[inline]
29961#[target_feature(enable = "avx512f")]
29962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29963#[cfg_attr(test, assert_instr(vpxorq))]
29964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29965pub const fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29966    unsafe {
29967        let xor = _mm512_xor_epi64(a, b).as_i64x8();
29968        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
29969    }
29970}
29971
29972/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29973///
29974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
29975#[inline]
29976#[target_feature(enable = "avx512f")]
29977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29978#[cfg_attr(test, assert_instr(vpxorq))]
29979#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29980pub const fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29981    unsafe {
29982        let xor = _mm512_xor_epi64(a, b).as_i64x8();
29983        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
29984    }
29985}
29986
29987/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29988///
29989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
29990#[inline]
29991#[target_feature(enable = "avx512f,avx512vl")]
29992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29993#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
29994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29995pub const fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
29996    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
29997}
29998
29999/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30000///
30001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
30002#[inline]
30003#[target_feature(enable = "avx512f,avx512vl")]
30004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30005#[cfg_attr(test, assert_instr(vpxorq))]
30006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30007pub const fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30008    unsafe {
30009        let xor = _mm256_xor_epi64(a, b).as_i64x4();
30010        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
30011    }
30012}
30013
30014/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30015///
30016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
30017#[inline]
30018#[target_feature(enable = "avx512f,avx512vl")]
30019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30020#[cfg_attr(test, assert_instr(vpxorq))]
30021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30022pub const fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30023    unsafe {
30024        let xor = _mm256_xor_epi64(a, b).as_i64x4();
30025        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
30026    }
30027}
30028
30029/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
30030///
30031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
30032#[inline]
30033#[target_feature(enable = "avx512f,avx512vl")]
30034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30035#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
30036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30037pub const fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
30038    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
30039}
30040
30041/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30042///
30043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
30044#[inline]
30045#[target_feature(enable = "avx512f,avx512vl")]
30046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30047#[cfg_attr(test, assert_instr(vpxorq))]
30048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30049pub const fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30050    unsafe {
30051        let xor = _mm_xor_epi64(a, b).as_i64x2();
30052        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
30053    }
30054}
30055
30056/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30057///
30058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
30059#[inline]
30060#[target_feature(enable = "avx512f,avx512vl")]
30061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30062#[cfg_attr(test, assert_instr(vpxorq))]
30063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30064pub const fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30065    unsafe {
30066        let xor = _mm_xor_epi64(a, b).as_i64x2();
30067        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
30068    }
30069}
30070
30071/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
30072///
30073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
30074#[inline]
30075#[target_feature(enable = "avx512f")]
30076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30077#[cfg_attr(test, assert_instr(vpxorq))]
30078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30079pub const fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
30080    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
30081}
30082
30083/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
30084///
30085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
30086#[inline]
30087#[target_feature(enable = "avx512f")]
30088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30089#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30091pub const fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
30092    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
30093}
30094
30095/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30096///
30097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
30098#[inline]
30099#[target_feature(enable = "avx512f")]
30100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30101#[cfg_attr(test, assert_instr(vpandnd))]
30102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30103pub const fn _mm512_mask_andnot_epi32(
30104    src: __m512i,
30105    k: __mmask16,
30106    a: __m512i,
30107    b: __m512i,
30108) -> __m512i {
30109    unsafe {
30110        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
30111        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
30112    }
30113}
30114
30115/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30116///
30117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
30118#[inline]
30119#[target_feature(enable = "avx512f")]
30120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30121#[cfg_attr(test, assert_instr(vpandnd))]
30122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30123pub const fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
30124    unsafe {
30125        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
30126        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
30127    }
30128}
30129
30130/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30131///
30132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
30133#[inline]
30134#[target_feature(enable = "avx512f,avx512vl")]
30135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30136#[cfg_attr(test, assert_instr(vpandnd))]
30137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30138pub const fn _mm256_mask_andnot_epi32(
30139    src: __m256i,
30140    k: __mmask8,
30141    a: __m256i,
30142    b: __m256i,
30143) -> __m256i {
30144    unsafe {
30145        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
30146        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
30147        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
30148    }
30149}
30150
30151/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30152///
30153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
30154#[inline]
30155#[target_feature(enable = "avx512f,avx512vl")]
30156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30157#[cfg_attr(test, assert_instr(vpandnd))]
30158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30159pub const fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30160    unsafe {
30161        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
30162        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
30163        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
30164    }
30165}
30166
30167/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30168///
30169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
30170#[inline]
30171#[target_feature(enable = "avx512f,avx512vl")]
30172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30173#[cfg_attr(test, assert_instr(vpandnd))]
30174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30175pub const fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30176    unsafe {
30177        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
30178        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
30179        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
30180    }
30181}
30182
30183/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30184///
30185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
30186#[inline]
30187#[target_feature(enable = "avx512f,avx512vl")]
30188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30189#[cfg_attr(test, assert_instr(vpandnd))]
30190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30191pub const fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30192    unsafe {
30193        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
30194        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
30195        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
30196    }
30197}
30198
30199/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
30200///
30201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
30202#[inline]
30203#[target_feature(enable = "avx512f")]
30204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30205#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30207pub const fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
30208    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
30209}
30210
30211/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30212///
30213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
30214#[inline]
30215#[target_feature(enable = "avx512f")]
30216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30217#[cfg_attr(test, assert_instr(vpandnq))]
30218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30219pub const fn _mm512_mask_andnot_epi64(
30220    src: __m512i,
30221    k: __mmask8,
30222    a: __m512i,
30223    b: __m512i,
30224) -> __m512i {
30225    unsafe {
30226        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
30227        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
30228    }
30229}
30230
30231/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30232///
30233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
30234#[inline]
30235#[target_feature(enable = "avx512f")]
30236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30237#[cfg_attr(test, assert_instr(vpandnq))]
30238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30239pub const fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
30240    unsafe {
30241        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
30242        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
30243    }
30244}
30245
30246/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30247///
30248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
30249#[inline]
30250#[target_feature(enable = "avx512f,avx512vl")]
30251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30252#[cfg_attr(test, assert_instr(vpandnq))]
30253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30254pub const fn _mm256_mask_andnot_epi64(
30255    src: __m256i,
30256    k: __mmask8,
30257    a: __m256i,
30258    b: __m256i,
30259) -> __m256i {
30260    unsafe {
30261        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
30262        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
30263        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
30264    }
30265}
30266
30267/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30268///
30269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
30270#[inline]
30271#[target_feature(enable = "avx512f,avx512vl")]
30272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30273#[cfg_attr(test, assert_instr(vpandnq))]
30274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30275pub const fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30276    unsafe {
30277        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
30278        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
30279        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
30280    }
30281}
30282
30283/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30284///
30285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
30286#[inline]
30287#[target_feature(enable = "avx512f,avx512vl")]
30288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30289#[cfg_attr(test, assert_instr(vpandnq))]
30290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30291pub const fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30292    unsafe {
30293        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
30294        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
30295        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
30296    }
30297}
30298
30299/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30300///
30301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
30302#[inline]
30303#[target_feature(enable = "avx512f,avx512vl")]
30304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30305#[cfg_attr(test, assert_instr(vpandnq))]
30306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30307pub const fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30308    unsafe {
30309        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
30310        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
30311        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
30312    }
30313}
30314
30315/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
30316///
30317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
30318#[inline]
30319#[target_feature(enable = "avx512f")]
30320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30321#[cfg_attr(test, assert_instr(vpandnq))]
30322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30323pub const fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
30324    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
30325}
30326
30327/// Convert 16-bit mask a into an integer value, and store the result in dst.
30328///
30329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
30330#[inline]
30331#[target_feature(enable = "avx512f")]
30332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30333#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30334pub const fn _cvtmask16_u32(a: __mmask16) -> u32 {
30335    a as u32
30336}
30337
30338/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
30339///
30340/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
30341#[inline]
30342#[target_feature(enable = "avx512f")]
30343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30345pub const fn _cvtu32_mask16(a: u32) -> __mmask16 {
30346    a as __mmask16
30347}
30348
30349/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30350///
30351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
30352#[inline]
30353#[target_feature(enable = "avx512f")]
30354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30355#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30357pub const fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30358    a & b
30359}
30360
30361/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30362///
30363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
30364#[inline]
30365#[target_feature(enable = "avx512f")]
30366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30367#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30369pub const fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
30370    a & b
30371}
30372
30373/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30374///
30375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
30376#[inline]
30377#[target_feature(enable = "avx512f")]
30378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30379#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30381pub const fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30382    a | b
30383}
30384
30385/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30386///
30387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
30388#[inline]
30389#[target_feature(enable = "avx512f")]
30390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30391#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30392#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30393pub const fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
30394    a | b
30395}
30396
30397/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30398///
30399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
30400#[inline]
30401#[target_feature(enable = "avx512f")]
30402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30403#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30405pub const fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30406    a ^ b
30407}
30408
30409/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30410///
30411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
30412#[inline]
30413#[target_feature(enable = "avx512f")]
30414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30415#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30417pub const fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
30418    a ^ b
30419}
30420
30421/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30422///
30423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
30424#[inline]
30425#[target_feature(enable = "avx512f")]
30426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30428pub const fn _knot_mask16(a: __mmask16) -> __mmask16 {
30429    a ^ 0b11111111_11111111
30430}
30431
30432/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30433///
30434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
30435#[inline]
30436#[target_feature(enable = "avx512f")]
30437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30439pub const fn _mm512_knot(a: __mmask16) -> __mmask16 {
30440    a ^ 0b11111111_11111111
30441}
30442
30443/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30444///
30445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
30446#[inline]
30447#[target_feature(enable = "avx512f")]
30448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30449#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
30450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30451pub const fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30452    _mm512_kand(_mm512_knot(a), b)
30453}
30454
30455/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30456///
30457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
30458#[inline]
30459#[target_feature(enable = "avx512f")]
30460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30461#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
30462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30463pub const fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
30464    _mm512_kand(_mm512_knot(a), b)
30465}
30466
30467/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30468///
30469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
30470#[inline]
30471#[target_feature(enable = "avx512f")]
30472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30473#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
30474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30475pub const fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30476    _mm512_knot(_mm512_kxor(a, b))
30477}
30478
30479/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30480///
30481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
30482#[inline]
30483#[target_feature(enable = "avx512f")]
30484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30485#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
30486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30487pub const fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
30488    _mm512_knot(_mm512_kxor(a, b))
30489}
30490
30491/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30492/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
30493///
30494/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
30495#[inline]
30496#[target_feature(enable = "avx512f")]
30497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30499pub const unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
30500    let tmp = _kor_mask16(a, b);
30501    *all_ones = (tmp == 0xffff) as u8;
30502    (tmp == 0) as u8
30503}
30504
30505/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
30506/// store 0 in dst.
30507///
30508/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
30509#[inline]
30510#[target_feature(enable = "avx512f")]
30511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30513pub const fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30514    (_kor_mask16(a, b) == 0xffff) as u8
30515}
30516
30517/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30518/// store 0 in dst.
30519///
30520/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
30521#[inline]
30522#[target_feature(enable = "avx512f")]
30523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30525pub const fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30526    (_kor_mask16(a, b) == 0) as u8
30527}
30528
30529/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
30530///
30531/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
30532#[inline]
30533#[target_feature(enable = "avx512f")]
30534#[rustc_legacy_const_generics(1)]
30535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30537pub const fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30538    a.unbounded_shl(COUNT)
30539}
30540
30541/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
30542///
30543/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
30544#[inline]
30545#[target_feature(enable = "avx512f")]
30546#[rustc_legacy_const_generics(1)]
30547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30549pub const fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30550    a.unbounded_shr(COUNT)
30551}
30552
30553/// Load 16-bit mask from memory
30554///
30555/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
30556#[inline]
30557#[target_feature(enable = "avx512f")]
30558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30560pub const unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
30561    *mem_addr
30562}
30563
30564/// Store 16-bit mask to memory
30565///
30566/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
30567#[inline]
30568#[target_feature(enable = "avx512f")]
30569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30571pub const unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
30572    *mem_addr = a;
30573}
30574
30575/// Copy 16-bit mask a to k.
30576///
30577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
30578#[inline]
30579#[target_feature(enable = "avx512f")]
30580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30581#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30582#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30583pub const fn _mm512_kmov(a: __mmask16) -> __mmask16 {
30584    a
30585}
30586
30587/// Converts integer mask into bitmask, storing the result in dst.
30588///
30589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
30590#[inline]
30591#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
30592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30594pub const fn _mm512_int2mask(mask: i32) -> __mmask16 {
30595    mask as u16
30596}
30597
30598/// Converts bit mask k1 into an integer value, storing the results in dst.
30599///
30600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
30601#[inline]
30602#[target_feature(enable = "avx512f")]
30603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30604#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30606pub const fn _mm512_mask2int(k1: __mmask16) -> i32 {
30607    k1 as i32
30608}
30609
30610/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
30611///
30612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
30613#[inline]
30614#[target_feature(enable = "avx512f")]
30615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30616#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
30617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30618pub const fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
30619    ((a & 0xff) << 8) | (b & 0xff)
30620}
30621
30622/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
30623///
30624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
30625#[inline]
30626#[target_feature(enable = "avx512f")]
30627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30628#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
30629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30630pub const fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
30631    let r = (a | b) == 0b11111111_11111111;
30632    r as i32
30633}
30634
30635/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
30636///
30637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
30638#[inline]
30639#[target_feature(enable = "avx512f")]
30640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30641#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
30642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30643pub const fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
30644    let r = (a | b) == 0;
30645    r as i32
30646}
30647
30648/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30649///
30650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
30651#[inline]
30652#[target_feature(enable = "avx512f")]
30653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30654#[cfg_attr(test, assert_instr(vptestmd))]
30655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30656pub const fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30657    let and = _mm512_and_epi32(a, b);
30658    let zero = _mm512_setzero_si512();
30659    _mm512_cmpneq_epi32_mask(and, zero)
30660}
30661
30662/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30663///
30664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
30665#[inline]
30666#[target_feature(enable = "avx512f")]
30667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30668#[cfg_attr(test, assert_instr(vptestmd))]
30669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30670pub const fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30671    let and = _mm512_and_epi32(a, b);
30672    let zero = _mm512_setzero_si512();
30673    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
30674}
30675
30676/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30677///
30678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
30679#[inline]
30680#[target_feature(enable = "avx512f,avx512vl")]
30681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30682#[cfg_attr(test, assert_instr(vptestmd))]
30683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30684pub const fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30685    let and = _mm256_and_si256(a, b);
30686    let zero = _mm256_setzero_si256();
30687    _mm256_cmpneq_epi32_mask(and, zero)
30688}
30689
30690/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30691///
30692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
30693#[inline]
30694#[target_feature(enable = "avx512f,avx512vl")]
30695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30696#[cfg_attr(test, assert_instr(vptestmd))]
30697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30698pub const fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30699    let and = _mm256_and_si256(a, b);
30700    let zero = _mm256_setzero_si256();
30701    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
30702}
30703
30704/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30705///
30706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
30707#[inline]
30708#[target_feature(enable = "avx512f,avx512vl")]
30709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30710#[cfg_attr(test, assert_instr(vptestmd))]
30711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30712pub const fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30713    let and = _mm_and_si128(a, b);
30714    let zero = _mm_setzero_si128();
30715    _mm_cmpneq_epi32_mask(and, zero)
30716}
30717
30718/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30719///
30720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
30721#[inline]
30722#[target_feature(enable = "avx512f,avx512vl")]
30723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30724#[cfg_attr(test, assert_instr(vptestmd))]
30725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30726pub const fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30727    let and = _mm_and_si128(a, b);
30728    let zero = _mm_setzero_si128();
30729    _mm_mask_cmpneq_epi32_mask(k, and, zero)
30730}
30731
30732/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30733///
30734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
30735#[inline]
30736#[target_feature(enable = "avx512f")]
30737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30738#[cfg_attr(test, assert_instr(vptestmq))]
30739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30740pub const fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30741    let and = _mm512_and_epi64(a, b);
30742    let zero = _mm512_setzero_si512();
30743    _mm512_cmpneq_epi64_mask(and, zero)
30744}
30745
30746/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30747///
30748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
30749#[inline]
30750#[target_feature(enable = "avx512f")]
30751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30752#[cfg_attr(test, assert_instr(vptestmq))]
30753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30754pub const fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30755    let and = _mm512_and_epi64(a, b);
30756    let zero = _mm512_setzero_si512();
30757    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
30758}
30759
30760/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30761///
30762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
30763#[inline]
30764#[target_feature(enable = "avx512f,avx512vl")]
30765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30766#[cfg_attr(test, assert_instr(vptestmq))]
30767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30768pub const fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30769    let and = _mm256_and_si256(a, b);
30770    let zero = _mm256_setzero_si256();
30771    _mm256_cmpneq_epi64_mask(and, zero)
30772}
30773
30774/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30775///
30776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
30777#[inline]
30778#[target_feature(enable = "avx512f,avx512vl")]
30779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30780#[cfg_attr(test, assert_instr(vptestmq))]
30781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30782pub const fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30783    let and = _mm256_and_si256(a, b);
30784    let zero = _mm256_setzero_si256();
30785    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
30786}
30787
30788/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30789///
30790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
30791#[inline]
30792#[target_feature(enable = "avx512f,avx512vl")]
30793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30794#[cfg_attr(test, assert_instr(vptestmq))]
30795#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30796pub const fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30797    let and = _mm_and_si128(a, b);
30798    let zero = _mm_setzero_si128();
30799    _mm_cmpneq_epi64_mask(and, zero)
30800}
30801
30802/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30803///
30804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
30805#[inline]
30806#[target_feature(enable = "avx512f,avx512vl")]
30807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30808#[cfg_attr(test, assert_instr(vptestmq))]
30809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30810pub const fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30811    let and = _mm_and_si128(a, b);
30812    let zero = _mm_setzero_si128();
30813    _mm_mask_cmpneq_epi64_mask(k, and, zero)
30814}
30815
30816/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30817///
30818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
30819#[inline]
30820#[target_feature(enable = "avx512f")]
30821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30822#[cfg_attr(test, assert_instr(vptestnmd))]
30823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30824pub const fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30825    let and = _mm512_and_epi32(a, b);
30826    let zero = _mm512_setzero_si512();
30827    _mm512_cmpeq_epi32_mask(and, zero)
30828}
30829
30830/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30831///
30832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
30833#[inline]
30834#[target_feature(enable = "avx512f")]
30835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30836#[cfg_attr(test, assert_instr(vptestnmd))]
30837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30838pub const fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30839    let and = _mm512_and_epi32(a, b);
30840    let zero = _mm512_setzero_si512();
30841    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
30842}
30843
30844/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30845///
30846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
30847#[inline]
30848#[target_feature(enable = "avx512f,avx512vl")]
30849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30850#[cfg_attr(test, assert_instr(vptestnmd))]
30851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30852pub const fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30853    let and = _mm256_and_si256(a, b);
30854    let zero = _mm256_setzero_si256();
30855    _mm256_cmpeq_epi32_mask(and, zero)
30856}
30857
30858/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30859///
30860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
30861#[inline]
30862#[target_feature(enable = "avx512f,avx512vl")]
30863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30864#[cfg_attr(test, assert_instr(vptestnmd))]
30865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30866pub const fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30867    let and = _mm256_and_si256(a, b);
30868    let zero = _mm256_setzero_si256();
30869    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
30870}
30871
30872/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30873///
30874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
30875#[inline]
30876#[target_feature(enable = "avx512f,avx512vl")]
30877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30878#[cfg_attr(test, assert_instr(vptestnmd))]
30879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30880pub const fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30881    let and = _mm_and_si128(a, b);
30882    let zero = _mm_setzero_si128();
30883    _mm_cmpeq_epi32_mask(and, zero)
30884}
30885
30886/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30887///
30888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
30889#[inline]
30890#[target_feature(enable = "avx512f,avx512vl")]
30891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30892#[cfg_attr(test, assert_instr(vptestnmd))]
30893#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30894pub const fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30895    let and = _mm_and_si128(a, b);
30896    let zero = _mm_setzero_si128();
30897    _mm_mask_cmpeq_epi32_mask(k, and, zero)
30898}
30899
30900/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30901///
30902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
30903#[inline]
30904#[target_feature(enable = "avx512f")]
30905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30906#[cfg_attr(test, assert_instr(vptestnmq))]
30907#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30908pub const fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30909    let and = _mm512_and_epi64(a, b);
30910    let zero = _mm512_setzero_si512();
30911    _mm512_cmpeq_epi64_mask(and, zero)
30912}
30913
30914/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30915///
30916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
30917#[inline]
30918#[target_feature(enable = "avx512f")]
30919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30920#[cfg_attr(test, assert_instr(vptestnmq))]
30921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30922pub const fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30923    let and = _mm512_and_epi64(a, b);
30924    let zero = _mm512_setzero_si512();
30925    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
30926}
30927
30928/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30929///
30930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
30931#[inline]
30932#[target_feature(enable = "avx512f,avx512vl")]
30933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30934#[cfg_attr(test, assert_instr(vptestnmq))]
30935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30936pub const fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30937    let and = _mm256_and_si256(a, b);
30938    let zero = _mm256_setzero_si256();
30939    _mm256_cmpeq_epi64_mask(and, zero)
30940}
30941
30942/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30943///
30944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
30945#[inline]
30946#[target_feature(enable = "avx512f,avx512vl")]
30947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30948#[cfg_attr(test, assert_instr(vptestnmq))]
30949#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30950pub const fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30951    let and = _mm256_and_si256(a, b);
30952    let zero = _mm256_setzero_si256();
30953    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
30954}
30955
30956/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30957///
30958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
30959#[inline]
30960#[target_feature(enable = "avx512f,avx512vl")]
30961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30962#[cfg_attr(test, assert_instr(vptestnmq))]
30963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30964pub const fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30965    let and = _mm_and_si128(a, b);
30966    let zero = _mm_setzero_si128();
30967    _mm_cmpeq_epi64_mask(and, zero)
30968}
30969
30970/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30971///
30972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
30973#[inline]
30974#[target_feature(enable = "avx512f,avx512vl")]
30975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30976#[cfg_attr(test, assert_instr(vptestnmq))]
30977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30978pub const fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30979    let and = _mm_and_si128(a, b);
30980    let zero = _mm_setzero_si128();
30981    _mm_mask_cmpeq_epi64_mask(k, and, zero)
30982}
30983
30984/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
30985///
30986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
30987///
30988/// # Safety of non-temporal stores
30989///
30990/// After using this intrinsic, but before any other access to the memory that this intrinsic
30991/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
30992/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
30993/// return.
30994///
30995/// See [`_mm_sfence`] for details.
30996#[inline]
30997#[target_feature(enable = "avx512f")]
30998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30999#[cfg_attr(test, assert_instr(vmovntps))]
31000#[allow(clippy::cast_ptr_alignment)]
31001pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
31002    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31003    crate::arch::asm!(
31004        vps!("vmovntps", ",{a}"),
31005        p = in(reg) mem_addr,
31006        a = in(zmm_reg) a,
31007        options(nostack, preserves_flags),
31008    );
31009}
31010
31011/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31012///
31013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
31014///
31015/// # Safety of non-temporal stores
31016///
31017/// After using this intrinsic, but before any other access to the memory that this intrinsic
31018/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31019/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31020/// return.
31021///
31022/// See [`_mm_sfence`] for details.
31023#[inline]
31024#[target_feature(enable = "avx512f")]
31025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31026#[cfg_attr(test, assert_instr(vmovntpd))]
31027#[allow(clippy::cast_ptr_alignment)]
31028pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
31029    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31030    crate::arch::asm!(
31031        vps!("vmovntpd", ",{a}"),
31032        p = in(reg) mem_addr,
31033        a = in(zmm_reg) a,
31034        options(nostack, preserves_flags),
31035    );
31036}
31037
31038/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31039///
31040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
31041///
31042/// # Safety of non-temporal stores
31043///
31044/// After using this intrinsic, but before any other access to the memory that this intrinsic
31045/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31046/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31047/// return.
31048///
31049/// See [`_mm_sfence`] for details.
31050#[inline]
31051#[target_feature(enable = "avx512f")]
31052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31053#[cfg_attr(test, assert_instr(vmovntdq))]
31054#[allow(clippy::cast_ptr_alignment)]
31055pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
31056    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31057    crate::arch::asm!(
31058        vps!("vmovntdq", ",{a}"),
31059        p = in(reg) mem_addr,
31060        a = in(zmm_reg) a,
31061        options(nostack, preserves_flags),
31062    );
31063}
31064
31065/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
31066/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
31067/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
31068///
31069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
31070#[inline]
31071#[target_feature(enable = "avx512f")]
31072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31073pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
31074    let dst: __m512i;
31075    crate::arch::asm!(
31076        vpl!("vmovntdqa {a}"),
31077        a = out(zmm_reg) dst,
31078        p = in(reg) mem_addr,
31079        options(pure, readonly, nostack, preserves_flags),
31080    );
31081    dst
31082}
31083
31084/// Sets packed 32-bit integers in `dst` with the supplied values.
31085///
31086/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
31087#[inline]
31088#[target_feature(enable = "avx512f")]
31089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31091pub const fn _mm512_set_ps(
31092    e0: f32,
31093    e1: f32,
31094    e2: f32,
31095    e3: f32,
31096    e4: f32,
31097    e5: f32,
31098    e6: f32,
31099    e7: f32,
31100    e8: f32,
31101    e9: f32,
31102    e10: f32,
31103    e11: f32,
31104    e12: f32,
31105    e13: f32,
31106    e14: f32,
31107    e15: f32,
31108) -> __m512 {
31109    _mm512_setr_ps(
31110        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
31111    )
31112}
31113
31114/// Sets packed 32-bit integers in `dst` with the supplied values in
31115/// reverse order.
31116///
31117/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
31118#[inline]
31119#[target_feature(enable = "avx512f")]
31120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31122pub const fn _mm512_setr_ps(
31123    e0: f32,
31124    e1: f32,
31125    e2: f32,
31126    e3: f32,
31127    e4: f32,
31128    e5: f32,
31129    e6: f32,
31130    e7: f32,
31131    e8: f32,
31132    e9: f32,
31133    e10: f32,
31134    e11: f32,
31135    e12: f32,
31136    e13: f32,
31137    e14: f32,
31138    e15: f32,
31139) -> __m512 {
31140    unsafe {
31141        let r = f32x16::new(
31142            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
31143        );
31144        transmute(r)
31145    }
31146}
31147
31148/// Broadcast 64-bit float `a` to all elements of `dst`.
31149///
31150/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
31151#[inline]
31152#[target_feature(enable = "avx512f")]
31153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31155pub const fn _mm512_set1_pd(a: f64) -> __m512d {
31156    unsafe { transmute(f64x8::splat(a)) }
31157}
31158
31159/// Broadcast 32-bit float `a` to all elements of `dst`.
31160///
31161/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
31162#[inline]
31163#[target_feature(enable = "avx512f")]
31164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31166pub const fn _mm512_set1_ps(a: f32) -> __m512 {
31167    unsafe { transmute(f32x16::splat(a)) }
31168}
31169
31170/// Sets packed 32-bit integers in `dst` with the supplied values.
31171///
31172/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
31173#[inline]
31174#[target_feature(enable = "avx512f")]
31175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31177pub const fn _mm512_set_epi32(
31178    e15: i32,
31179    e14: i32,
31180    e13: i32,
31181    e12: i32,
31182    e11: i32,
31183    e10: i32,
31184    e9: i32,
31185    e8: i32,
31186    e7: i32,
31187    e6: i32,
31188    e5: i32,
31189    e4: i32,
31190    e3: i32,
31191    e2: i32,
31192    e1: i32,
31193    e0: i32,
31194) -> __m512i {
31195    _mm512_setr_epi32(
31196        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
31197    )
31198}
31199
31200/// Broadcast 8-bit integer a to all elements of dst.
31201///
31202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
31203#[inline]
31204#[target_feature(enable = "avx512f")]
31205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31207pub const fn _mm512_set1_epi8(a: i8) -> __m512i {
31208    unsafe { transmute(i8x64::splat(a)) }
31209}
31210
31211/// Broadcast the low packed 16-bit integer from a to all elements of dst.
31212///
31213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
31214#[inline]
31215#[target_feature(enable = "avx512f")]
31216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31217#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31218pub const fn _mm512_set1_epi16(a: i16) -> __m512i {
31219    unsafe { transmute(i16x32::splat(a)) }
31220}
31221
31222/// Broadcast 32-bit integer `a` to all elements of `dst`.
31223///
31224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
31225#[inline]
31226#[target_feature(enable = "avx512f")]
31227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31229pub const fn _mm512_set1_epi32(a: i32) -> __m512i {
31230    unsafe { transmute(i32x16::splat(a)) }
31231}
31232
31233/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31234///
31235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
31236#[inline]
31237#[target_feature(enable = "avx512f")]
31238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31239#[cfg_attr(test, assert_instr(vpbroadcastd))]
31240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31241pub const fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
31242    unsafe {
31243        let r = _mm512_set1_epi32(a).as_i32x16();
31244        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
31245    }
31246}
31247
31248/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31249///
31250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
31251#[inline]
31252#[target_feature(enable = "avx512f")]
31253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31254#[cfg_attr(test, assert_instr(vpbroadcastd))]
31255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31256pub const fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
31257    unsafe {
31258        let r = _mm512_set1_epi32(a).as_i32x16();
31259        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
31260    }
31261}
31262
31263/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31264///
31265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
31266#[inline]
31267#[target_feature(enable = "avx512f,avx512vl")]
31268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31269#[cfg_attr(test, assert_instr(vpbroadcastd))]
31270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31271pub const fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
31272    unsafe {
31273        let r = _mm256_set1_epi32(a).as_i32x8();
31274        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
31275    }
31276}
31277
31278/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31279///
31280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
31281#[inline]
31282#[target_feature(enable = "avx512f,avx512vl")]
31283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31284#[cfg_attr(test, assert_instr(vpbroadcastd))]
31285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31286pub const fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
31287    unsafe {
31288        let r = _mm256_set1_epi32(a).as_i32x8();
31289        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
31290    }
31291}
31292
31293/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31294///
31295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
31296#[inline]
31297#[target_feature(enable = "avx512f,avx512vl")]
31298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31299#[cfg_attr(test, assert_instr(vpbroadcastd))]
31300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31301pub const fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
31302    unsafe {
31303        let r = _mm_set1_epi32(a).as_i32x4();
31304        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
31305    }
31306}
31307
31308/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31309///
31310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
31311#[inline]
31312#[target_feature(enable = "avx512f,avx512vl")]
31313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31314#[cfg_attr(test, assert_instr(vpbroadcastd))]
31315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31316pub const fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
31317    unsafe {
31318        let r = _mm_set1_epi32(a).as_i32x4();
31319        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
31320    }
31321}
31322
31323/// Broadcast 64-bit integer `a` to all elements of `dst`.
31324///
31325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
31326#[inline]
31327#[target_feature(enable = "avx512f")]
31328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31330pub const fn _mm512_set1_epi64(a: i64) -> __m512i {
31331    unsafe { transmute(i64x8::splat(a)) }
31332}
31333
31334/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31335///
31336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
31337#[inline]
31338#[target_feature(enable = "avx512f")]
31339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31340#[cfg_attr(test, assert_instr(vpbroadcastq))]
31341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31342pub const fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
31343    unsafe {
31344        let r = _mm512_set1_epi64(a).as_i64x8();
31345        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
31346    }
31347}
31348
31349/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31350///
31351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
31352#[inline]
31353#[target_feature(enable = "avx512f")]
31354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31355#[cfg_attr(test, assert_instr(vpbroadcastq))]
31356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31357pub const fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
31358    unsafe {
31359        let r = _mm512_set1_epi64(a).as_i64x8();
31360        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
31361    }
31362}
31363
31364/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31365///
31366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
31367#[inline]
31368#[target_feature(enable = "avx512f,avx512vl")]
31369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31370#[cfg_attr(test, assert_instr(vpbroadcastq))]
31371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31372pub const fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
31373    unsafe {
31374        let r = _mm256_set1_epi64x(a).as_i64x4();
31375        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
31376    }
31377}
31378
31379/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31380///
31381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
31382#[inline]
31383#[target_feature(enable = "avx512f,avx512vl")]
31384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31385#[cfg_attr(test, assert_instr(vpbroadcastq))]
31386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31387pub const fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
31388    unsafe {
31389        let r = _mm256_set1_epi64x(a).as_i64x4();
31390        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
31391    }
31392}
31393
31394/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31395///
31396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
31397#[inline]
31398#[target_feature(enable = "avx512f,avx512vl")]
31399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31400#[cfg_attr(test, assert_instr(vpbroadcastq))]
31401#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31402pub const fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
31403    unsafe {
31404        let r = _mm_set1_epi64x(a).as_i64x2();
31405        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
31406    }
31407}
31408
31409/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31410///
31411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
31412#[inline]
31413#[target_feature(enable = "avx512f,avx512vl")]
31414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31415#[cfg_attr(test, assert_instr(vpbroadcastq))]
31416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31417pub const fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
31418    unsafe {
31419        let r = _mm_set1_epi64x(a).as_i64x2();
31420        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
31421    }
31422}
31423
31424/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
31425///
31426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
31427#[inline]
31428#[target_feature(enable = "avx512f")]
31429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31431pub const fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31432    _mm512_set_epi64(d, c, b, a, d, c, b, a)
31433}
31434
31435/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
31436///
31437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
31438#[inline]
31439#[target_feature(enable = "avx512f")]
31440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31442pub const fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31443    _mm512_set_epi64(a, b, c, d, a, b, c, d)
31444}
31445
31446/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31447///
31448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
31449#[inline]
31450#[target_feature(enable = "avx512f")]
31451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31452#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31453pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31454    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
31455}
31456
31457/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31458///
31459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
31460#[inline]
31461#[target_feature(enable = "avx512f")]
31462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31463#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31464pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31465    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
31466}
31467
31468/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31469///
31470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
31471#[inline]
31472#[target_feature(enable = "avx512f")]
31473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31474#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31475pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31476    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
31477}
31478
31479/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31480///
31481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
31482#[inline]
31483#[target_feature(enable = "avx512f")]
31484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31485#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31486pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31487    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
31488}
31489
31490/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31491///
31492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
31493#[inline]
31494#[target_feature(enable = "avx512f")]
31495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31496#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31497pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31498    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
31499}
31500
31501/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31502///
31503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
31504#[inline]
31505#[target_feature(enable = "avx512f")]
31506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31507#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31508pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31509    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
31510}
31511
31512/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31513///
31514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
31515#[inline]
31516#[target_feature(enable = "avx512f")]
31517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31518#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31519pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31520    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
31521}
31522
31523/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31524///
31525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
31526#[inline]
31527#[target_feature(enable = "avx512f")]
31528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31529#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31530pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31531    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
31532}
31533
31534/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31535///
31536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
31537#[inline]
31538#[target_feature(enable = "avx512f")]
31539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31540#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31541pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31542    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
31543}
31544
31545/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31546///
31547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
31548#[inline]
31549#[target_feature(enable = "avx512f")]
31550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31551#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31552pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31553    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
31554}
31555
31556/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31557///
31558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
31559#[inline]
31560#[target_feature(enable = "avx512f")]
31561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31562#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31563pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31564    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
31565}
31566
31567/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31568///
31569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
31570#[inline]
31571#[target_feature(enable = "avx512f")]
31572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31573#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31574pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31575    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
31576}
31577
31578/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31579///
31580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
31581#[inline]
31582#[target_feature(enable = "avx512f")]
31583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31584#[rustc_legacy_const_generics(2)]
31585#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31586pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
31587    unsafe {
31588        static_assert_uimm_bits!(IMM8, 5);
31589        let neg_one = -1;
31590        let a = a.as_f32x16();
31591        let b = b.as_f32x16();
31592        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
31593        r.cast_unsigned()
31594    }
31595}
31596
31597/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31598///
31599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
31600#[inline]
31601#[target_feature(enable = "avx512f")]
31602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31603#[rustc_legacy_const_generics(3)]
31604#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31605pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31606    unsafe {
31607        static_assert_uimm_bits!(IMM8, 5);
31608        let a = a.as_f32x16();
31609        let b = b.as_f32x16();
31610        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
31611        r.cast_unsigned()
31612    }
31613}
31614
31615/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31616///
31617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
31618#[inline]
31619#[target_feature(enable = "avx512f,avx512vl")]
31620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31621#[rustc_legacy_const_generics(2)]
31622#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31623pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
31624    unsafe {
31625        static_assert_uimm_bits!(IMM8, 5);
31626        let neg_one = -1;
31627        let a = a.as_f32x8();
31628        let b = b.as_f32x8();
31629        let r = vcmpps256(a, b, IMM8, neg_one);
31630        r.cast_unsigned()
31631    }
31632}
31633
31634/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31635///
31636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
31637#[inline]
31638#[target_feature(enable = "avx512f,avx512vl")]
31639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31640#[rustc_legacy_const_generics(3)]
31641#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31642pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
31643    unsafe {
31644        static_assert_uimm_bits!(IMM8, 5);
31645        let a = a.as_f32x8();
31646        let b = b.as_f32x8();
31647        let r = vcmpps256(a, b, IMM8, k1 as i8);
31648        r.cast_unsigned()
31649    }
31650}
31651
31652/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31653///
31654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
31655#[inline]
31656#[target_feature(enable = "avx512f,avx512vl")]
31657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31658#[rustc_legacy_const_generics(2)]
31659#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31660pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
31661    unsafe {
31662        static_assert_uimm_bits!(IMM8, 5);
31663        let neg_one = -1;
31664        let a = a.as_f32x4();
31665        let b = b.as_f32x4();
31666        let r = vcmpps128(a, b, IMM8, neg_one);
31667        r.cast_unsigned()
31668    }
31669}
31670
31671/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31672///
31673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
31674#[inline]
31675#[target_feature(enable = "avx512f,avx512vl")]
31676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31677#[rustc_legacy_const_generics(3)]
31678#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31679pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
31680    unsafe {
31681        static_assert_uimm_bits!(IMM8, 5);
31682        let a = a.as_f32x4();
31683        let b = b.as_f32x4();
31684        let r = vcmpps128(a, b, IMM8, k1 as i8);
31685        r.cast_unsigned()
31686    }
31687}
31688
31689/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
31690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31691///
31692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
31693#[inline]
31694#[target_feature(enable = "avx512f")]
31695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31696#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
31697#[rustc_legacy_const_generics(2, 3)]
31698pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31699    a: __m512,
31700    b: __m512,
31701) -> __mmask16 {
31702    unsafe {
31703        static_assert_uimm_bits!(IMM5, 5);
31704        static_assert_mantissas_sae!(SAE);
31705        let neg_one = -1;
31706        let a = a.as_f32x16();
31707        let b = b.as_f32x16();
31708        let r = vcmpps(a, b, IMM5, neg_one, SAE);
31709        r.cast_unsigned()
31710    }
31711}
31712
31713/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
31714/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31715///
31716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
31717#[inline]
31718#[target_feature(enable = "avx512f")]
31719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31720#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
31721#[rustc_legacy_const_generics(3, 4)]
31722pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31723    m: __mmask16,
31724    a: __m512,
31725    b: __m512,
31726) -> __mmask16 {
31727    unsafe {
31728        static_assert_uimm_bits!(IMM5, 5);
31729        static_assert_mantissas_sae!(SAE);
31730        let a = a.as_f32x16();
31731        let b = b.as_f32x16();
31732        let r = vcmpps(a, b, IMM5, m as i16, SAE);
31733        r.cast_unsigned()
31734    }
31735}
31736
31737/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
31738///
31739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
31740#[inline]
31741#[target_feature(enable = "avx512f")]
31742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31743#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
31744pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31745    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
31746}
31747
31748/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31749///
31750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
31751#[inline]
31752#[target_feature(enable = "avx512f")]
31753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31754#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31755pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31756    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
31757}
31758
31759/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
31760///
31761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
31762#[inline]
31763#[target_feature(enable = "avx512f")]
31764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31765#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31766pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31767    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
31768}
31769
31770/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31771///
31772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
31773#[inline]
31774#[target_feature(enable = "avx512f")]
31775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31776#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31777pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31778    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
31779}
31780
31781/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31782///
31783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
31784#[inline]
31785#[target_feature(enable = "avx512f")]
31786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31787#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31788pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31789    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
31790}
31791
31792/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31793///
31794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
31795#[inline]
31796#[target_feature(enable = "avx512f")]
31797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31798#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31799pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31800    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
31801}
31802
31803/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31804///
31805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
31806#[inline]
31807#[target_feature(enable = "avx512f")]
31808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31809#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31810pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31811    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
31812}
31813
31814/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31815///
31816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
31817#[inline]
31818#[target_feature(enable = "avx512f")]
31819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31820#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31821pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31822    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
31823}
31824
31825/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31826///
31827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
31828#[inline]
31829#[target_feature(enable = "avx512f")]
31830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31831#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31832pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31833    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
31834}
31835
31836/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31837///
31838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
31839#[inline]
31840#[target_feature(enable = "avx512f")]
31841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31842#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31843pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31844    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
31845}
31846
31847/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31848///
31849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
31850#[inline]
31851#[target_feature(enable = "avx512f")]
31852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31853#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31854pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31855    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
31856}
31857
31858/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31859///
31860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
31861#[inline]
31862#[target_feature(enable = "avx512f")]
31863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31864#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31865pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31866    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
31867}
31868
31869/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31870///
31871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
31872#[inline]
31873#[target_feature(enable = "avx512f")]
31874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31875#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31876pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31877    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
31878}
31879
31880/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881///
31882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
31883#[inline]
31884#[target_feature(enable = "avx512f")]
31885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31886#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31887pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31888    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
31889}
31890
31891/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31892///
31893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
31894#[inline]
31895#[target_feature(enable = "avx512f")]
31896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31897#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31898pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31899    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
31900}
31901
31902/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31903///
31904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
31905#[inline]
31906#[target_feature(enable = "avx512f")]
31907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31908#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31909pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31910    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
31911}
31912
31913/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31914///
31915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
31916#[inline]
31917#[target_feature(enable = "avx512f")]
31918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31919#[rustc_legacy_const_generics(2)]
31920#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31921pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
31922    unsafe {
31923        static_assert_uimm_bits!(IMM8, 5);
31924        let neg_one = -1;
31925        let a = a.as_f64x8();
31926        let b = b.as_f64x8();
31927        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
31928        r.cast_unsigned()
31929    }
31930}
31931
31932/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31933///
31934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
31935#[inline]
31936#[target_feature(enable = "avx512f")]
31937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31938#[rustc_legacy_const_generics(3)]
31939#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31940pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31941    unsafe {
31942        static_assert_uimm_bits!(IMM8, 5);
31943        let a = a.as_f64x8();
31944        let b = b.as_f64x8();
31945        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
31946        r.cast_unsigned()
31947    }
31948}
31949
31950/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31951///
31952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
31953#[inline]
31954#[target_feature(enable = "avx512f,avx512vl")]
31955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31956#[rustc_legacy_const_generics(2)]
31957#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31958pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
31959    unsafe {
31960        static_assert_uimm_bits!(IMM8, 5);
31961        let neg_one = -1;
31962        let a = a.as_f64x4();
31963        let b = b.as_f64x4();
31964        let r = vcmppd256(a, b, IMM8, neg_one);
31965        r.cast_unsigned()
31966    }
31967}
31968
31969/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31970///
31971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
31972#[inline]
31973#[target_feature(enable = "avx512f,avx512vl")]
31974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31975#[rustc_legacy_const_generics(3)]
31976#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31977pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
31978    unsafe {
31979        static_assert_uimm_bits!(IMM8, 5);
31980        let a = a.as_f64x4();
31981        let b = b.as_f64x4();
31982        let r = vcmppd256(a, b, IMM8, k1 as i8);
31983        r.cast_unsigned()
31984    }
31985}
31986
31987/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31988///
31989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
31990#[inline]
31991#[target_feature(enable = "avx512f,avx512vl")]
31992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31993#[rustc_legacy_const_generics(2)]
31994#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31995pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
31996    unsafe {
31997        static_assert_uimm_bits!(IMM8, 5);
31998        let neg_one = -1;
31999        let a = a.as_f64x2();
32000        let b = b.as_f64x2();
32001        let r = vcmppd128(a, b, IMM8, neg_one);
32002        r.cast_unsigned()
32003    }
32004}
32005
32006/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32007///
32008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
32009#[inline]
32010#[target_feature(enable = "avx512f,avx512vl")]
32011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32012#[rustc_legacy_const_generics(3)]
32013#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32014pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32015    unsafe {
32016        static_assert_uimm_bits!(IMM8, 5);
32017        let a = a.as_f64x2();
32018        let b = b.as_f64x2();
32019        let r = vcmppd128(a, b, IMM8, k1 as i8);
32020        r.cast_unsigned()
32021    }
32022}
32023
32024/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
32025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32026///
32027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
32028#[inline]
32029#[target_feature(enable = "avx512f")]
32030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32031#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32032#[rustc_legacy_const_generics(2, 3)]
32033pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32034    a: __m512d,
32035    b: __m512d,
32036) -> __mmask8 {
32037    unsafe {
32038        static_assert_uimm_bits!(IMM5, 5);
32039        static_assert_mantissas_sae!(SAE);
32040        let neg_one = -1;
32041        let a = a.as_f64x8();
32042        let b = b.as_f64x8();
32043        let r = vcmppd(a, b, IMM5, neg_one, SAE);
32044        r.cast_unsigned()
32045    }
32046}
32047
32048/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
32049/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32050///
32051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
32052#[inline]
32053#[target_feature(enable = "avx512f")]
32054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32055#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32056#[rustc_legacy_const_generics(3, 4)]
32057pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32058    k1: __mmask8,
32059    a: __m512d,
32060    b: __m512d,
32061) -> __mmask8 {
32062    unsafe {
32063        static_assert_uimm_bits!(IMM5, 5);
32064        static_assert_mantissas_sae!(SAE);
32065        let a = a.as_f64x8();
32066        let b = b.as_f64x8();
32067        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
32068        r.cast_unsigned()
32069    }
32070}
32071
32072/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
32073///
32074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
32075#[inline]
32076#[target_feature(enable = "avx512f")]
32077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32078#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32079pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32080    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
32081}
32082
32083/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32084///
32085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
32086#[inline]
32087#[target_feature(enable = "avx512f")]
32088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32089#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32090pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32091    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
32092}
32093
32094/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
32095///
32096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
32097#[inline]
32098#[target_feature(enable = "avx512f")]
32099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32100#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32101pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32102    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
32103}
32104
32105/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32106///
32107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
32108#[inline]
32109#[target_feature(enable = "avx512f")]
32110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32111#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32112pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32113    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
32114}
32115
32116/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32117///
32118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
32119#[inline]
32120#[target_feature(enable = "avx512f")]
32121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32122#[rustc_legacy_const_generics(2)]
32123#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32124pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
32125    unsafe {
32126        static_assert_uimm_bits!(IMM8, 5);
32127        let neg_one = -1;
32128        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
32129        r.cast_unsigned()
32130    }
32131}
32132
32133/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32134///
32135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
32136#[inline]
32137#[target_feature(enable = "avx512f")]
32138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32139#[rustc_legacy_const_generics(3)]
32140#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32141pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
32142    unsafe {
32143        static_assert_uimm_bits!(IMM8, 5);
32144        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
32145        r.cast_unsigned()
32146    }
32147}
32148
32149/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32150/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32151///
32152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
32153#[inline]
32154#[target_feature(enable = "avx512f")]
32155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32156#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32157#[rustc_legacy_const_generics(2, 3)]
32158pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
32159    unsafe {
32160        static_assert_uimm_bits!(IMM5, 5);
32161        static_assert_mantissas_sae!(SAE);
32162        let neg_one = -1;
32163        let r = vcmpss(a, b, IMM5, neg_one, SAE);
32164        r.cast_unsigned()
32165    }
32166}
32167
32168/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
32169/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32170///
32171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
32172#[inline]
32173#[target_feature(enable = "avx512f")]
32174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32175#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32176#[rustc_legacy_const_generics(3, 4)]
32177pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
32178    k1: __mmask8,
32179    a: __m128,
32180    b: __m128,
32181) -> __mmask8 {
32182    unsafe {
32183        static_assert_uimm_bits!(IMM5, 5);
32184        static_assert_mantissas_sae!(SAE);
32185        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
32186        r.cast_unsigned()
32187    }
32188}
32189
32190/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32191///
32192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
32193#[inline]
32194#[target_feature(enable = "avx512f")]
32195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32196#[rustc_legacy_const_generics(2)]
32197#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32198pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32199    unsafe {
32200        static_assert_uimm_bits!(IMM8, 5);
32201        let neg_one = -1;
32202        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
32203        r.cast_unsigned()
32204    }
32205}
32206
32207/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32208///
32209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
32210#[inline]
32211#[target_feature(enable = "avx512f")]
32212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32213#[rustc_legacy_const_generics(3)]
32214#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32215pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32216    unsafe {
32217        static_assert_uimm_bits!(IMM8, 5);
32218        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
32219        r.cast_unsigned()
32220    }
32221}
32222
32223/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32225///
32226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
32227#[inline]
32228#[target_feature(enable = "avx512f")]
32229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32230#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32231#[rustc_legacy_const_generics(2, 3)]
32232pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32233    unsafe {
32234        static_assert_uimm_bits!(IMM5, 5);
32235        static_assert_mantissas_sae!(SAE);
32236        let neg_one = -1;
32237        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
32238        r.cast_unsigned()
32239    }
32240}
32241
32242/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
32243/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32244///
32245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
32246#[inline]
32247#[target_feature(enable = "avx512f")]
32248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32249#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32250#[rustc_legacy_const_generics(3, 4)]
32251pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
32252    k1: __mmask8,
32253    a: __m128d,
32254    b: __m128d,
32255) -> __mmask8 {
32256    unsafe {
32257        static_assert_uimm_bits!(IMM5, 5);
32258        static_assert_mantissas_sae!(SAE);
32259        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
32260        r.cast_unsigned()
32261    }
32262}
32263
32264/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32265///
32266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
32267#[inline]
32268#[target_feature(enable = "avx512f")]
32269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32270#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32272pub const fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32273    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
32274}
32275
32276/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32277///
32278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
32279#[inline]
32280#[target_feature(enable = "avx512f")]
32281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32282#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32284pub const fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32285    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32286}
32287
32288/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32289///
32290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
32291#[inline]
32292#[target_feature(enable = "avx512f,avx512vl")]
32293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32294#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32296pub const fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32297    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
32298}
32299
32300/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32301///
32302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
32303#[inline]
32304#[target_feature(enable = "avx512f,avx512vl")]
32305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32306#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32308pub const fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32309    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32310}
32311
32312/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32313///
32314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
32315#[inline]
32316#[target_feature(enable = "avx512f,avx512vl")]
32317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32318#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32320pub const fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32321    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
32322}
32323
32324/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32325///
32326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
32327#[inline]
32328#[target_feature(enable = "avx512f,avx512vl")]
32329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32330#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32332pub const fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32333    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32334}
32335
32336/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32337///
32338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
32339#[inline]
32340#[target_feature(enable = "avx512f")]
32341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32342#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32344pub const fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32345    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
32346}
32347
32348/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32349///
32350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
32351#[inline]
32352#[target_feature(enable = "avx512f")]
32353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32354#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32355#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32356pub const fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32357    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32358}
32359
32360/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32361///
32362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
32363#[inline]
32364#[target_feature(enable = "avx512f,avx512vl")]
32365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32366#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32367#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32368pub const fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32369    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
32370}
32371
32372/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32373///
32374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
32375#[inline]
32376#[target_feature(enable = "avx512f,avx512vl")]
32377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32378#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32380pub const fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32381    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32382}
32383
32384/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32385///
32386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
32387#[inline]
32388#[target_feature(enable = "avx512f,avx512vl")]
32389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32390#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32392pub const fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32393    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
32394}
32395
32396/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32397///
32398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
32399#[inline]
32400#[target_feature(enable = "avx512f,avx512vl")]
32401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32402#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32404pub const fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32405    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32406}
32407
32408/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32409///
32410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
32411#[inline]
32412#[target_feature(enable = "avx512f")]
32413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32414#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32416pub const fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32417    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
32418}
32419
32420/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32421///
32422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
32423#[inline]
32424#[target_feature(enable = "avx512f")]
32425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32426#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32428pub const fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32429    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32430}
32431
32432/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32433///
32434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
32435#[inline]
32436#[target_feature(enable = "avx512f,avx512vl")]
32437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32438#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32440pub const fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32441    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
32442}
32443
32444/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32445///
32446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
32447#[inline]
32448#[target_feature(enable = "avx512f,avx512vl")]
32449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32450#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32452pub const fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32453    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32454}
32455
32456/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32457///
32458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
32459#[inline]
32460#[target_feature(enable = "avx512f,avx512vl")]
32461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32462#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32464pub const fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32465    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
32466}
32467
32468/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32469///
32470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
32471#[inline]
32472#[target_feature(enable = "avx512f,avx512vl")]
32473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32474#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32476pub const fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32477    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32478}
32479
32480/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32481///
32482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
32483#[inline]
32484#[target_feature(enable = "avx512f")]
32485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32486#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32487#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32488pub const fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32489    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
32490}
32491
32492/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32493///
32494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
32495#[inline]
32496#[target_feature(enable = "avx512f")]
32497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32498#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32499#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32500pub const fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32501    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32502}
32503
32504/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32505///
32506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
32507#[inline]
32508#[target_feature(enable = "avx512f,avx512vl")]
32509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32510#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32512pub const fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32513    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
32514}
32515
32516/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32517///
32518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
32519#[inline]
32520#[target_feature(enable = "avx512f,avx512vl")]
32521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32522#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32524pub const fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32525    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32526}
32527
32528/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32529///
32530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
32531#[inline]
32532#[target_feature(enable = "avx512f,avx512vl")]
32533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32534#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32536pub const fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32537    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
32538}
32539
32540/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32541///
32542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
32543#[inline]
32544#[target_feature(enable = "avx512f,avx512vl")]
32545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32546#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32548pub const fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32549    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32550}
32551
32552/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32553///
32554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
32555#[inline]
32556#[target_feature(enable = "avx512f")]
32557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32558#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32560pub const fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32561    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
32562}
32563
32564/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32565///
32566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
32567#[inline]
32568#[target_feature(enable = "avx512f")]
32569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32570#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32572pub const fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32573    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32574}
32575
32576/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32577///
32578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
32579#[inline]
32580#[target_feature(enable = "avx512f,avx512vl")]
32581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32582#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32584pub const fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32585    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
32586}
32587
32588/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32589///
32590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
32591#[inline]
32592#[target_feature(enable = "avx512f,avx512vl")]
32593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32594#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32595#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32596pub const fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32597    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32598}
32599
32600/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32601///
32602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
32603#[inline]
32604#[target_feature(enable = "avx512f,avx512vl")]
32605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32606#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32608pub const fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32609    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
32610}
32611
32612/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32613///
32614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
32615#[inline]
32616#[target_feature(enable = "avx512f,avx512vl")]
32617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32618#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32620pub const fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32621    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32622}
32623
32624/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32625///
32626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
32627#[inline]
32628#[target_feature(enable = "avx512f")]
32629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32630#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32632pub const fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32633    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
32634}
32635
32636/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637///
32638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
32639#[inline]
32640#[target_feature(enable = "avx512f")]
32641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32642#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32643#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32644pub const fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32645    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32646}
32647
32648/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32649///
32650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
32651#[inline]
32652#[target_feature(enable = "avx512f,avx512vl")]
32653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32654#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32656pub const fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32657    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
32658}
32659
32660/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32661///
32662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
32663#[inline]
32664#[target_feature(enable = "avx512f,avx512vl")]
32665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32666#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32667#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32668pub const fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32669    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32670}
32671
32672/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32673///
32674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
32675#[inline]
32676#[target_feature(enable = "avx512f,avx512vl")]
32677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32678#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32680pub const fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32681    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
32682}
32683
32684/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32685///
32686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
32687#[inline]
32688#[target_feature(enable = "avx512f,avx512vl")]
32689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32690#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32692pub const fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32693    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32694}
32695
32696/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32697///
32698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
32699#[inline]
32700#[target_feature(enable = "avx512f")]
32701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32702#[rustc_legacy_const_generics(2)]
32703#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32705pub const fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32706    a: __m512i,
32707    b: __m512i,
32708) -> __mmask16 {
32709    unsafe {
32710        static_assert_uimm_bits!(IMM3, 3);
32711        let a = a.as_u32x16();
32712        let b = b.as_u32x16();
32713        let r = match IMM3 {
32714            0 => simd_eq(a, b),
32715            1 => simd_lt(a, b),
32716            2 => simd_le(a, b),
32717            3 => i32x16::ZERO,
32718            4 => simd_ne(a, b),
32719            5 => simd_ge(a, b),
32720            6 => simd_gt(a, b),
32721            _ => i32x16::splat(-1),
32722        };
32723        simd_bitmask(r)
32724    }
32725}
32726
32727/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32728///
32729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
32730#[inline]
32731#[target_feature(enable = "avx512f")]
32732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32733#[rustc_legacy_const_generics(3)]
32734#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32736pub const fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32737    k1: __mmask16,
32738    a: __m512i,
32739    b: __m512i,
32740) -> __mmask16 {
32741    unsafe {
32742        static_assert_uimm_bits!(IMM3, 3);
32743        let a = a.as_u32x16();
32744        let b = b.as_u32x16();
32745        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
32746        let r = match IMM3 {
32747            0 => simd_and(k1, simd_eq(a, b)),
32748            1 => simd_and(k1, simd_lt(a, b)),
32749            2 => simd_and(k1, simd_le(a, b)),
32750            3 => i32x16::ZERO,
32751            4 => simd_and(k1, simd_ne(a, b)),
32752            5 => simd_and(k1, simd_ge(a, b)),
32753            6 => simd_and(k1, simd_gt(a, b)),
32754            _ => k1,
32755        };
32756        simd_bitmask(r)
32757    }
32758}
32759
32760/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32761///
32762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
32763#[inline]
32764#[target_feature(enable = "avx512f,avx512vl")]
32765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32766#[rustc_legacy_const_generics(2)]
32767#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32769pub const fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32770    a: __m256i,
32771    b: __m256i,
32772) -> __mmask8 {
32773    unsafe {
32774        static_assert_uimm_bits!(IMM3, 3);
32775        let a = a.as_u32x8();
32776        let b = b.as_u32x8();
32777        let r = match IMM3 {
32778            0 => simd_eq(a, b),
32779            1 => simd_lt(a, b),
32780            2 => simd_le(a, b),
32781            3 => i32x8::ZERO,
32782            4 => simd_ne(a, b),
32783            5 => simd_ge(a, b),
32784            6 => simd_gt(a, b),
32785            _ => i32x8::splat(-1),
32786        };
32787        simd_bitmask(r)
32788    }
32789}
32790
32791/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32792///
32793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
32794#[inline]
32795#[target_feature(enable = "avx512f,avx512vl")]
32796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32797#[rustc_legacy_const_generics(3)]
32798#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32800pub const fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32801    k1: __mmask8,
32802    a: __m256i,
32803    b: __m256i,
32804) -> __mmask8 {
32805    unsafe {
32806        static_assert_uimm_bits!(IMM3, 3);
32807        let a = a.as_u32x8();
32808        let b = b.as_u32x8();
32809        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
32810        let r = match IMM3 {
32811            0 => simd_and(k1, simd_eq(a, b)),
32812            1 => simd_and(k1, simd_lt(a, b)),
32813            2 => simd_and(k1, simd_le(a, b)),
32814            3 => i32x8::ZERO,
32815            4 => simd_and(k1, simd_ne(a, b)),
32816            5 => simd_and(k1, simd_ge(a, b)),
32817            6 => simd_and(k1, simd_gt(a, b)),
32818            _ => k1,
32819        };
32820        simd_bitmask(r)
32821    }
32822}
32823
32824/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32825///
32826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
32827#[inline]
32828#[target_feature(enable = "avx512f,avx512vl")]
32829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32830#[rustc_legacy_const_generics(2)]
32831#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32833pub const fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32834    unsafe {
32835        static_assert_uimm_bits!(IMM3, 3);
32836        let a = a.as_u32x4();
32837        let b = b.as_u32x4();
32838        let r = match IMM3 {
32839            0 => simd_eq(a, b),
32840            1 => simd_lt(a, b),
32841            2 => simd_le(a, b),
32842            3 => i32x4::ZERO,
32843            4 => simd_ne(a, b),
32844            5 => simd_ge(a, b),
32845            6 => simd_gt(a, b),
32846            _ => i32x4::splat(-1),
32847        };
32848        simd_bitmask(r)
32849    }
32850}
32851
32852/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32853///
32854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
32855#[inline]
32856#[target_feature(enable = "avx512f,avx512vl")]
32857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32858#[rustc_legacy_const_generics(3)]
32859#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32860#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32861pub const fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32862    k1: __mmask8,
32863    a: __m128i,
32864    b: __m128i,
32865) -> __mmask8 {
32866    unsafe {
32867        static_assert_uimm_bits!(IMM3, 3);
32868        let a = a.as_u32x4();
32869        let b = b.as_u32x4();
32870        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
32871        let r = match IMM3 {
32872            0 => simd_and(k1, simd_eq(a, b)),
32873            1 => simd_and(k1, simd_lt(a, b)),
32874            2 => simd_and(k1, simd_le(a, b)),
32875            3 => i32x4::ZERO,
32876            4 => simd_and(k1, simd_ne(a, b)),
32877            5 => simd_and(k1, simd_ge(a, b)),
32878            6 => simd_and(k1, simd_gt(a, b)),
32879            _ => k1,
32880        };
32881        simd_bitmask(r)
32882    }
32883}
32884
32885/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32886///
32887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
32888#[inline]
32889#[target_feature(enable = "avx512f")]
32890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32891#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32893pub const fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32894    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
32895}
32896
32897/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32898///
32899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
32900#[inline]
32901#[target_feature(enable = "avx512f")]
32902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32903#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32905pub const fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32906    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32907}
32908
32909/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
32912#[inline]
32913#[target_feature(enable = "avx512f,avx512vl")]
32914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32917pub const fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32918    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
32919}
32920
32921/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32922///
32923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
32924#[inline]
32925#[target_feature(enable = "avx512f,avx512vl")]
32926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32927#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32929pub const fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32930    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32931}
32932
32933/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32934///
32935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
32936#[inline]
32937#[target_feature(enable = "avx512f,avx512vl")]
32938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32939#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32941pub const fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32942    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
32943}
32944
32945/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32946///
32947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
32948#[inline]
32949#[target_feature(enable = "avx512f,avx512vl")]
32950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32951#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32953pub const fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32954    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32955}
32956
32957/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32958///
32959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
32960#[inline]
32961#[target_feature(enable = "avx512f")]
32962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32963#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32965pub const fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32966    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
32967}
32968
32969/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32970///
32971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
32972#[inline]
32973#[target_feature(enable = "avx512f")]
32974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32975#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32977pub const fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32978    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32979}
32980
32981/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32982///
32983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
32984#[inline]
32985#[target_feature(enable = "avx512f,avx512vl")]
32986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32987#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32989pub const fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32990    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
32991}
32992
32993/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32994///
32995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
32996#[inline]
32997#[target_feature(enable = "avx512f,avx512vl")]
32998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32999#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33001pub const fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33002    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33003}
33004
33005/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
33006///
33007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
33008#[inline]
33009#[target_feature(enable = "avx512f,avx512vl")]
33010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33011#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33013pub const fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33014    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
33015}
33016
33017/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33018///
33019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
33020#[inline]
33021#[target_feature(enable = "avx512f,avx512vl")]
33022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33023#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33025pub const fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33026    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33027}
33028
33029/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33030///
33031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
33032#[inline]
33033#[target_feature(enable = "avx512f")]
33034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33035#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33037pub const fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33038    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
33039}
33040
33041/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33042///
33043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
33044#[inline]
33045#[target_feature(enable = "avx512f")]
33046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33049pub const fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33050    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33051}
33052
33053/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33054///
33055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
33056#[inline]
33057#[target_feature(enable = "avx512f,avx512vl")]
33058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33059#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33061pub const fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33062    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
33063}
33064
33065/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33066///
33067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
33068#[inline]
33069#[target_feature(enable = "avx512f,avx512vl")]
33070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33071#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33073pub const fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33074    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33075}
33076
33077/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33078///
33079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
33080#[inline]
33081#[target_feature(enable = "avx512f,avx512vl")]
33082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33083#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33085pub const fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33086    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
33087}
33088
33089/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33090///
33091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
33092#[inline]
33093#[target_feature(enable = "avx512f,avx512vl")]
33094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33095#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33097pub const fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33098    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33099}
33100
33101/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33102///
33103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
33104#[inline]
33105#[target_feature(enable = "avx512f")]
33106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33107#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33109pub const fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33110    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
33111}
33112
33113/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33114///
33115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
33116#[inline]
33117#[target_feature(enable = "avx512f")]
33118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33119#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33121pub const fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33122    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33123}
33124
33125/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33126///
33127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
33128#[inline]
33129#[target_feature(enable = "avx512f,avx512vl")]
33130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33131#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33133pub const fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33134    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
33135}
33136
33137/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33138///
33139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
33140#[inline]
33141#[target_feature(enable = "avx512f,avx512vl")]
33142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33143#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33145pub const fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33146    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33147}
33148
33149/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33150///
33151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
33152#[inline]
33153#[target_feature(enable = "avx512f,avx512vl")]
33154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33155#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33157pub const fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33158    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
33159}
33160
33161/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33162///
33163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
33164#[inline]
33165#[target_feature(enable = "avx512f,avx512vl")]
33166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33167#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33169pub const fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33170    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33171}
33172
33173/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33174///
33175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
33176#[inline]
33177#[target_feature(enable = "avx512f")]
33178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33179#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33181pub const fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33182    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
33183}
33184
33185/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33186///
33187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
33188#[inline]
33189#[target_feature(enable = "avx512f")]
33190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33191#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33193pub const fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33194    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33195}
33196
33197/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33198///
33199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
33200#[inline]
33201#[target_feature(enable = "avx512f,avx512vl")]
33202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33203#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33205pub const fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33206    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
33207}
33208
33209/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33210///
33211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
33212#[inline]
33213#[target_feature(enable = "avx512f,avx512vl")]
33214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33215#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33217pub const fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33218    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33219}
33220
33221/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33222///
33223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
33224#[inline]
33225#[target_feature(enable = "avx512f,avx512vl")]
33226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33227#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33229pub const fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33230    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
33231}
33232
33233/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33234///
33235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
33236#[inline]
33237#[target_feature(enable = "avx512f,avx512vl")]
33238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33239#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33241pub const fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33242    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33243}
33244
33245/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33246///
33247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
33248#[inline]
33249#[target_feature(enable = "avx512f")]
33250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33251#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33253pub const fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33254    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
33255}
33256
33257/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33258///
33259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
33260#[inline]
33261#[target_feature(enable = "avx512f")]
33262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33263#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33265pub const fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33266    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33267}
33268
33269/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33270///
33271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
33272#[inline]
33273#[target_feature(enable = "avx512f,avx512vl")]
33274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33275#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33277pub const fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33278    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
33279}
33280
33281/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33282///
33283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
33284#[inline]
33285#[target_feature(enable = "avx512f,avx512vl")]
33286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33287#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33289pub const fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33290    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33291}
33292
33293/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33294///
33295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
33296#[inline]
33297#[target_feature(enable = "avx512f,avx512vl")]
33298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33299#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33301pub const fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33302    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
33303}
33304
33305/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33306///
33307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
33308#[inline]
33309#[target_feature(enable = "avx512f,avx512vl")]
33310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33311#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33313pub const fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33314    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33315}
33316
33317/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33318///
33319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
33320#[inline]
33321#[target_feature(enable = "avx512f")]
33322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33323#[rustc_legacy_const_generics(2)]
33324#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33325#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33326pub const fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33327    a: __m512i,
33328    b: __m512i,
33329) -> __mmask16 {
33330    unsafe {
33331        static_assert_uimm_bits!(IMM3, 3);
33332        let a = a.as_i32x16();
33333        let b = b.as_i32x16();
33334        let r = match IMM3 {
33335            0 => simd_eq(a, b),
33336            1 => simd_lt(a, b),
33337            2 => simd_le(a, b),
33338            3 => i32x16::ZERO,
33339            4 => simd_ne(a, b),
33340            5 => simd_ge(a, b),
33341            6 => simd_gt(a, b),
33342            _ => i32x16::splat(-1),
33343        };
33344        simd_bitmask(r)
33345    }
33346}
33347
33348/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33349///
33350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
33351#[inline]
33352#[target_feature(enable = "avx512f")]
33353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33354#[rustc_legacy_const_generics(3)]
33355#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33357pub const fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33358    k1: __mmask16,
33359    a: __m512i,
33360    b: __m512i,
33361) -> __mmask16 {
33362    unsafe {
33363        static_assert_uimm_bits!(IMM3, 3);
33364        let a = a.as_i32x16();
33365        let b = b.as_i32x16();
33366        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
33367        let r = match IMM3 {
33368            0 => simd_and(k1, simd_eq(a, b)),
33369            1 => simd_and(k1, simd_lt(a, b)),
33370            2 => simd_and(k1, simd_le(a, b)),
33371            3 => i32x16::ZERO,
33372            4 => simd_and(k1, simd_ne(a, b)),
33373            5 => simd_and(k1, simd_ge(a, b)),
33374            6 => simd_and(k1, simd_gt(a, b)),
33375            _ => k1,
33376        };
33377        simd_bitmask(r)
33378    }
33379}
33380
33381/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33382///
33383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
33384#[inline]
33385#[target_feature(enable = "avx512f,avx512vl")]
33386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33387#[rustc_legacy_const_generics(2)]
33388#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33390pub const fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33391    a: __m256i,
33392    b: __m256i,
33393) -> __mmask8 {
33394    unsafe {
33395        static_assert_uimm_bits!(IMM3, 3);
33396        let a = a.as_i32x8();
33397        let b = b.as_i32x8();
33398        let r = match IMM3 {
33399            0 => simd_eq(a, b),
33400            1 => simd_lt(a, b),
33401            2 => simd_le(a, b),
33402            3 => i32x8::ZERO,
33403            4 => simd_ne(a, b),
33404            5 => simd_ge(a, b),
33405            6 => simd_gt(a, b),
33406            _ => i32x8::splat(-1),
33407        };
33408        simd_bitmask(r)
33409    }
33410}
33411
33412/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33413///
33414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
33415#[inline]
33416#[target_feature(enable = "avx512f,avx512vl")]
33417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33418#[rustc_legacy_const_generics(3)]
33419#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33421pub const fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33422    k1: __mmask8,
33423    a: __m256i,
33424    b: __m256i,
33425) -> __mmask8 {
33426    unsafe {
33427        static_assert_uimm_bits!(IMM3, 3);
33428        let a = a.as_i32x8();
33429        let b = b.as_i32x8();
33430        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
33431        let r = match IMM3 {
33432            0 => simd_and(k1, simd_eq(a, b)),
33433            1 => simd_and(k1, simd_lt(a, b)),
33434            2 => simd_and(k1, simd_le(a, b)),
33435            3 => i32x8::ZERO,
33436            4 => simd_and(k1, simd_ne(a, b)),
33437            5 => simd_and(k1, simd_ge(a, b)),
33438            6 => simd_and(k1, simd_gt(a, b)),
33439            _ => k1,
33440        };
33441        simd_bitmask(r)
33442    }
33443}
33444
33445/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33446///
33447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
33448#[inline]
33449#[target_feature(enable = "avx512f,avx512vl")]
33450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33451#[rustc_legacy_const_generics(2)]
33452#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33454pub const fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33455    unsafe {
33456        static_assert_uimm_bits!(IMM3, 3);
33457        let a = a.as_i32x4();
33458        let b = b.as_i32x4();
33459        let r = match IMM3 {
33460            0 => simd_eq(a, b),
33461            1 => simd_lt(a, b),
33462            2 => simd_le(a, b),
33463            3 => i32x4::ZERO,
33464            4 => simd_ne(a, b),
33465            5 => simd_ge(a, b),
33466            6 => simd_gt(a, b),
33467            _ => i32x4::splat(-1),
33468        };
33469        simd_bitmask(r)
33470    }
33471}
33472
33473/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33474///
33475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
33476#[inline]
33477#[target_feature(enable = "avx512f,avx512vl")]
33478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33479#[rustc_legacy_const_generics(3)]
33480#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33482pub const fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33483    k1: __mmask8,
33484    a: __m128i,
33485    b: __m128i,
33486) -> __mmask8 {
33487    unsafe {
33488        static_assert_uimm_bits!(IMM3, 3);
33489        let a = a.as_i32x4();
33490        let b = b.as_i32x4();
33491        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
33492        let r = match IMM3 {
33493            0 => simd_and(k1, simd_eq(a, b)),
33494            1 => simd_and(k1, simd_lt(a, b)),
33495            2 => simd_and(k1, simd_le(a, b)),
33496            3 => i32x4::ZERO,
33497            4 => simd_and(k1, simd_ne(a, b)),
33498            5 => simd_and(k1, simd_ge(a, b)),
33499            6 => simd_and(k1, simd_gt(a, b)),
33500            _ => k1,
33501        };
33502        simd_bitmask(r)
33503    }
33504}
33505
33506/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33507///
33508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
33509#[inline]
33510#[target_feature(enable = "avx512f")]
33511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33512#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33514pub const fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33515    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
33516}
33517
33518/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33519///
33520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
33521#[inline]
33522#[target_feature(enable = "avx512f")]
33523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33524#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33526pub const fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33527    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33528}
33529
33530/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33531///
33532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
33533#[inline]
33534#[target_feature(enable = "avx512f,avx512vl")]
33535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33536#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33537#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33538pub const fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33539    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
33540}
33541
33542/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33543///
33544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
33545#[inline]
33546#[target_feature(enable = "avx512f,avx512vl")]
33547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33548#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33550pub const fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33551    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33552}
33553
33554/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33555///
33556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
33557#[inline]
33558#[target_feature(enable = "avx512f,avx512vl")]
33559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33560#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33562pub const fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33563    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
33564}
33565
33566/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33567///
33568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
33569#[inline]
33570#[target_feature(enable = "avx512f,avx512vl")]
33571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33572#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33574pub const fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33575    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33576}
33577
33578/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33579///
33580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
33581#[inline]
33582#[target_feature(enable = "avx512f")]
33583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33584#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33586pub const fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33587    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
33588}
33589
33590/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33591///
33592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
33593#[inline]
33594#[target_feature(enable = "avx512f")]
33595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33596#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33598pub const fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33599    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33600}
33601
33602/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33603///
33604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
33605#[inline]
33606#[target_feature(enable = "avx512f,avx512vl")]
33607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33608#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33610pub const fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33611    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
33612}
33613
33614/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33615///
33616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
33617#[inline]
33618#[target_feature(enable = "avx512f,avx512vl")]
33619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33620#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33622pub const fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33623    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33624}
33625
33626/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33627///
33628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
33629#[inline]
33630#[target_feature(enable = "avx512f,avx512vl")]
33631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33632#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33634pub const fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33635    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
33636}
33637
33638/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33639///
33640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
33641#[inline]
33642#[target_feature(enable = "avx512f,avx512vl")]
33643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33644#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33646pub const fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33647    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33648}
33649
33650/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33651///
33652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
33653#[inline]
33654#[target_feature(enable = "avx512f")]
33655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33656#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33658pub const fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33659    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
33660}
33661
33662/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33663///
33664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
33665#[inline]
33666#[target_feature(enable = "avx512f")]
33667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33668#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33670pub const fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33671    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33672}
33673
33674/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33675///
33676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
33677#[inline]
33678#[target_feature(enable = "avx512f,avx512vl")]
33679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33680#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33682pub const fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33683    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
33684}
33685
33686/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33687///
33688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
33689#[inline]
33690#[target_feature(enable = "avx512f,avx512vl")]
33691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33692#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33694pub const fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33695    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33696}
33697
33698/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
33701#[inline]
33702#[target_feature(enable = "avx512f,avx512vl")]
33703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33704#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33706pub const fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33707    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
33708}
33709
33710/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33711///
33712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
33713#[inline]
33714#[target_feature(enable = "avx512f,avx512vl")]
33715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33716#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33717#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33718pub const fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33719    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33720}
33721
33722/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33723///
33724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
33725#[inline]
33726#[target_feature(enable = "avx512f")]
33727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33728#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33730pub const fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33731    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
33732}
33733
33734/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33735///
33736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
33737#[inline]
33738#[target_feature(enable = "avx512f")]
33739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33740#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33742pub const fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33743    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33744}
33745
33746/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33747///
33748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
33749#[inline]
33750#[target_feature(enable = "avx512f,avx512vl")]
33751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33752#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33754pub const fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33755    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
33756}
33757
33758/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33759///
33760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
33761#[inline]
33762#[target_feature(enable = "avx512f,avx512vl")]
33763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33764#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33766pub const fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33767    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33768}
33769
33770/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33771///
33772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
33773#[inline]
33774#[target_feature(enable = "avx512f,avx512vl")]
33775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33776#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33777#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33778pub const fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33779    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
33780}
33781
33782/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33783///
33784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
33785#[inline]
33786#[target_feature(enable = "avx512f,avx512vl")]
33787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33788#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33790pub const fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33791    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33792}
33793
33794/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33795///
33796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
33797#[inline]
33798#[target_feature(enable = "avx512f")]
33799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33800#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33802pub const fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33803    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
33804}
33805
33806/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33807///
33808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
33809#[inline]
33810#[target_feature(enable = "avx512f")]
33811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33812#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33813#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33814pub const fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33815    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33816}
33817
33818/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33819///
33820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
33821#[inline]
33822#[target_feature(enable = "avx512f,avx512vl")]
33823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33824#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33826pub const fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33827    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
33828}
33829
33830/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33831///
33832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
33833#[inline]
33834#[target_feature(enable = "avx512f,avx512vl")]
33835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33836#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33838pub const fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33839    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33840}
33841
33842/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33843///
33844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
33845#[inline]
33846#[target_feature(enable = "avx512f,avx512vl")]
33847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33848#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33850pub const fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33851    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
33852}
33853
33854/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33855///
33856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
33857#[inline]
33858#[target_feature(enable = "avx512f,avx512vl")]
33859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33862pub const fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33863    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33864}
33865
33866/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33867///
33868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
33869#[inline]
33870#[target_feature(enable = "avx512f")]
33871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33872#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33874pub const fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33875    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
33876}
33877
33878/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33879///
33880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
33881#[inline]
33882#[target_feature(enable = "avx512f")]
33883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33884#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33886pub const fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33887    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33888}
33889
33890/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33891///
33892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
33893#[inline]
33894#[target_feature(enable = "avx512f,avx512vl")]
33895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33896#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33898pub const fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33899    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
33900}
33901
33902/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33903///
33904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
33905#[inline]
33906#[target_feature(enable = "avx512f,avx512vl")]
33907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33908#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33910pub const fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33911    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33912}
33913
33914/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33915///
33916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
33917#[inline]
33918#[target_feature(enable = "avx512f,avx512vl")]
33919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33920#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33922pub const fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33923    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
33924}
33925
33926/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33927///
33928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
33929#[inline]
33930#[target_feature(enable = "avx512f,avx512vl")]
33931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33932#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33934pub const fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33935    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33936}
33937
33938/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33939///
33940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
33941#[inline]
33942#[target_feature(enable = "avx512f")]
33943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33944#[rustc_legacy_const_generics(2)]
33945#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33947pub const fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33948    a: __m512i,
33949    b: __m512i,
33950) -> __mmask8 {
33951    unsafe {
33952        static_assert_uimm_bits!(IMM3, 3);
33953        let a = a.as_u64x8();
33954        let b = b.as_u64x8();
33955        let r = match IMM3 {
33956            0 => simd_eq(a, b),
33957            1 => simd_lt(a, b),
33958            2 => simd_le(a, b),
33959            3 => i64x8::ZERO,
33960            4 => simd_ne(a, b),
33961            5 => simd_ge(a, b),
33962            6 => simd_gt(a, b),
33963            _ => i64x8::splat(-1),
33964        };
33965        simd_bitmask(r)
33966    }
33967}
33968
33969/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33970///
33971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
33972#[inline]
33973#[target_feature(enable = "avx512f")]
33974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33975#[rustc_legacy_const_generics(3)]
33976#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33978pub const fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33979    k1: __mmask8,
33980    a: __m512i,
33981    b: __m512i,
33982) -> __mmask8 {
33983    unsafe {
33984        static_assert_uimm_bits!(IMM3, 3);
33985        let a = a.as_u64x8();
33986        let b = b.as_u64x8();
33987        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33988        let r = match IMM3 {
33989            0 => simd_and(k1, simd_eq(a, b)),
33990            1 => simd_and(k1, simd_lt(a, b)),
33991            2 => simd_and(k1, simd_le(a, b)),
33992            3 => i64x8::ZERO,
33993            4 => simd_and(k1, simd_ne(a, b)),
33994            5 => simd_and(k1, simd_ge(a, b)),
33995            6 => simd_and(k1, simd_gt(a, b)),
33996            _ => k1,
33997        };
33998        simd_bitmask(r)
33999    }
34000}
34001
34002/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34003///
34004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
34005#[inline]
34006#[target_feature(enable = "avx512f,avx512vl")]
34007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34008#[rustc_legacy_const_generics(2)]
34009#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34011pub const fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34012    a: __m256i,
34013    b: __m256i,
34014) -> __mmask8 {
34015    unsafe {
34016        static_assert_uimm_bits!(IMM3, 3);
34017        let a = a.as_u64x4();
34018        let b = b.as_u64x4();
34019        let r = match IMM3 {
34020            0 => simd_eq(a, b),
34021            1 => simd_lt(a, b),
34022            2 => simd_le(a, b),
34023            3 => i64x4::ZERO,
34024            4 => simd_ne(a, b),
34025            5 => simd_ge(a, b),
34026            6 => simd_gt(a, b),
34027            _ => i64x4::splat(-1),
34028        };
34029        simd_bitmask(r)
34030    }
34031}
34032
34033/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34034///
34035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
34036#[inline]
34037#[target_feature(enable = "avx512f,avx512vl")]
34038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34039#[rustc_legacy_const_generics(3)]
34040#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34042pub const fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34043    k1: __mmask8,
34044    a: __m256i,
34045    b: __m256i,
34046) -> __mmask8 {
34047    unsafe {
34048        static_assert_uimm_bits!(IMM3, 3);
34049        let a = a.as_u64x4();
34050        let b = b.as_u64x4();
34051        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
34052        let r = match IMM3 {
34053            0 => simd_and(k1, simd_eq(a, b)),
34054            1 => simd_and(k1, simd_lt(a, b)),
34055            2 => simd_and(k1, simd_le(a, b)),
34056            3 => i64x4::ZERO,
34057            4 => simd_and(k1, simd_ne(a, b)),
34058            5 => simd_and(k1, simd_ge(a, b)),
34059            6 => simd_and(k1, simd_gt(a, b)),
34060            _ => k1,
34061        };
34062        simd_bitmask(r)
34063    }
34064}
34065
34066/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34067///
34068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
34069#[inline]
34070#[target_feature(enable = "avx512f,avx512vl")]
34071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34072#[rustc_legacy_const_generics(2)]
34073#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34075pub const fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34076    unsafe {
34077        static_assert_uimm_bits!(IMM3, 3);
34078        let a = a.as_u64x2();
34079        let b = b.as_u64x2();
34080        let r = match IMM3 {
34081            0 => simd_eq(a, b),
34082            1 => simd_lt(a, b),
34083            2 => simd_le(a, b),
34084            3 => i64x2::ZERO,
34085            4 => simd_ne(a, b),
34086            5 => simd_ge(a, b),
34087            6 => simd_gt(a, b),
34088            _ => i64x2::splat(-1),
34089        };
34090        simd_bitmask(r)
34091    }
34092}
34093
34094/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34095///
34096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
34097#[inline]
34098#[target_feature(enable = "avx512f,avx512vl")]
34099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34100#[rustc_legacy_const_generics(3)]
34101#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34103pub const fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34104    k1: __mmask8,
34105    a: __m128i,
34106    b: __m128i,
34107) -> __mmask8 {
34108    unsafe {
34109        static_assert_uimm_bits!(IMM3, 3);
34110        let a = a.as_u64x2();
34111        let b = b.as_u64x2();
34112        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
34113        let r = match IMM3 {
34114            0 => simd_and(k1, simd_eq(a, b)),
34115            1 => simd_and(k1, simd_lt(a, b)),
34116            2 => simd_and(k1, simd_le(a, b)),
34117            3 => i64x2::ZERO,
34118            4 => simd_and(k1, simd_ne(a, b)),
34119            5 => simd_and(k1, simd_ge(a, b)),
34120            6 => simd_and(k1, simd_gt(a, b)),
34121            _ => k1,
34122        };
34123        simd_bitmask(r)
34124    }
34125}
34126
34127/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34128///
34129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
34130#[inline]
34131#[target_feature(enable = "avx512f")]
34132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34133#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34135pub const fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34136    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
34137}
34138
34139/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34140///
34141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
34142#[inline]
34143#[target_feature(enable = "avx512f")]
34144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34145#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34147pub const fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34148    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34149}
34150
34151/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34152///
34153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
34154#[inline]
34155#[target_feature(enable = "avx512f,avx512vl")]
34156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34157#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34159pub const fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34160    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
34161}
34162
34163/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34164///
34165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
34166#[inline]
34167#[target_feature(enable = "avx512f,avx512vl")]
34168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34169#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34171pub const fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34172    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34173}
34174
34175/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34176///
34177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
34178#[inline]
34179#[target_feature(enable = "avx512f,avx512vl")]
34180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34181#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34183pub const fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34184    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
34185}
34186
34187/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34188///
34189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
34190#[inline]
34191#[target_feature(enable = "avx512f,avx512vl")]
34192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34193#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34194#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34195pub const fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34196    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34197}
34198
34199/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34200///
34201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
34202#[inline]
34203#[target_feature(enable = "avx512f")]
34204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34205#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34207pub const fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34208    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
34209}
34210
34211/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34212///
34213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
34214#[inline]
34215#[target_feature(enable = "avx512f")]
34216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34217#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34219pub const fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34220    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34221}
34222
34223/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34224///
34225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
34226#[inline]
34227#[target_feature(enable = "avx512f,avx512vl")]
34228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34229#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34231pub const fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34232    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
34233}
34234
34235/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34236///
34237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
34238#[inline]
34239#[target_feature(enable = "avx512f,avx512vl")]
34240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34241#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34243pub const fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34244    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34245}
34246
34247/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34248///
34249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
34250#[inline]
34251#[target_feature(enable = "avx512f,avx512vl")]
34252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34253#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34255pub const fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34256    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
34257}
34258
34259/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34260///
34261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
34262#[inline]
34263#[target_feature(enable = "avx512f,avx512vl")]
34264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34265#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34267pub const fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34268    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34269}
34270
34271/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34272///
34273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
34274#[inline]
34275#[target_feature(enable = "avx512f")]
34276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34277#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34278#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34279pub const fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34280    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
34281}
34282
34283/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34284///
34285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
34286#[inline]
34287#[target_feature(enable = "avx512f")]
34288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34289#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34291pub const fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34292    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34293}
34294
34295/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34296///
34297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
34298#[inline]
34299#[target_feature(enable = "avx512f,avx512vl")]
34300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34301#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34303pub const fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34304    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
34305}
34306
34307/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34308///
34309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
34310#[inline]
34311#[target_feature(enable = "avx512f,avx512vl")]
34312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34313#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34315pub const fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34316    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34317}
34318
34319/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34320///
34321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
34322#[inline]
34323#[target_feature(enable = "avx512f,avx512vl")]
34324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34325#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34327pub const fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34328    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
34329}
34330
34331/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34332///
34333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
34334#[inline]
34335#[target_feature(enable = "avx512f,avx512vl")]
34336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34337#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34339pub const fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34340    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34341}
34342
34343/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34344///
34345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
34346#[inline]
34347#[target_feature(enable = "avx512f")]
34348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34349#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34350#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34351pub const fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34352    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
34353}
34354
34355/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34356///
34357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
34358#[inline]
34359#[target_feature(enable = "avx512f")]
34360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34361#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34362#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34363pub const fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34364    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34365}
34366
34367/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34368///
34369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
34370#[inline]
34371#[target_feature(enable = "avx512f,avx512vl")]
34372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34373#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34375pub const fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34376    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
34377}
34378
34379/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34380///
34381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
34382#[inline]
34383#[target_feature(enable = "avx512f,avx512vl")]
34384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34385#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34387pub const fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34388    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34389}
34390
34391/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34392///
34393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
34394#[inline]
34395#[target_feature(enable = "avx512f,avx512vl")]
34396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34397#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34399pub const fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34400    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
34401}
34402
34403/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34404///
34405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
34406#[inline]
34407#[target_feature(enable = "avx512f,avx512vl")]
34408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34409#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34411pub const fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34412    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34413}
34414
34415/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34416///
34417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
34418#[inline]
34419#[target_feature(enable = "avx512f")]
34420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34421#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34423pub const fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34424    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
34425}
34426
34427/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34428///
34429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
34430#[inline]
34431#[target_feature(enable = "avx512f")]
34432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34433#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34434#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34435pub const fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34436    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34437}
34438
34439/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34440///
34441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
34442#[inline]
34443#[target_feature(enable = "avx512f,avx512vl")]
34444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34445#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34447pub const fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34448    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
34449}
34450
34451/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
34454#[inline]
34455#[target_feature(enable = "avx512f,avx512vl")]
34456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34457#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34458#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34459pub const fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34460    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34461}
34462
34463/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34464///
34465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
34466#[inline]
34467#[target_feature(enable = "avx512f,avx512vl")]
34468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34469#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34471pub const fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34472    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
34473}
34474
34475/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34476///
34477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
34478#[inline]
34479#[target_feature(enable = "avx512f,avx512vl")]
34480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34481#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34483pub const fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34484    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34485}
34486
34487/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34488///
34489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
34490#[inline]
34491#[target_feature(enable = "avx512f")]
34492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34493#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34495pub const fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34496    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
34497}
34498
34499/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34500///
34501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
34502#[inline]
34503#[target_feature(enable = "avx512f")]
34504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34505#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34507pub const fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34508    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34509}
34510
34511/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34512///
34513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
34514#[inline]
34515#[target_feature(enable = "avx512f,avx512vl")]
34516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34517#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34519pub const fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34520    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
34521}
34522
34523/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34524///
34525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
34526#[inline]
34527#[target_feature(enable = "avx512f,avx512vl")]
34528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34529#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34531pub const fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34532    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34533}
34534
34535/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34536///
34537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
34538#[inline]
34539#[target_feature(enable = "avx512f,avx512vl")]
34540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34541#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34542#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34543pub const fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34544    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
34545}
34546
34547/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34548///
34549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
34550#[inline]
34551#[target_feature(enable = "avx512f,avx512vl")]
34552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34553#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34555pub const fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34556    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34557}
34558
34559/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34560///
34561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
34562#[inline]
34563#[target_feature(enable = "avx512f")]
34564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34565#[rustc_legacy_const_generics(2)]
34566#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34568pub const fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34569    a: __m512i,
34570    b: __m512i,
34571) -> __mmask8 {
34572    unsafe {
34573        static_assert_uimm_bits!(IMM3, 3);
34574        let a = a.as_i64x8();
34575        let b = b.as_i64x8();
34576        let r = match IMM3 {
34577            0 => simd_eq(a, b),
34578            1 => simd_lt(a, b),
34579            2 => simd_le(a, b),
34580            3 => i64x8::ZERO,
34581            4 => simd_ne(a, b),
34582            5 => simd_ge(a, b),
34583            6 => simd_gt(a, b),
34584            _ => i64x8::splat(-1),
34585        };
34586        simd_bitmask(r)
34587    }
34588}
34589
34590/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34591///
34592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
34593#[inline]
34594#[target_feature(enable = "avx512f")]
34595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34596#[rustc_legacy_const_generics(3)]
34597#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34599pub const fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34600    k1: __mmask8,
34601    a: __m512i,
34602    b: __m512i,
34603) -> __mmask8 {
34604    unsafe {
34605        static_assert_uimm_bits!(IMM3, 3);
34606        let a = a.as_i64x8();
34607        let b = b.as_i64x8();
34608        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
34609        let r = match IMM3 {
34610            0 => simd_and(k1, simd_eq(a, b)),
34611            1 => simd_and(k1, simd_lt(a, b)),
34612            2 => simd_and(k1, simd_le(a, b)),
34613            3 => i64x8::ZERO,
34614            4 => simd_and(k1, simd_ne(a, b)),
34615            5 => simd_and(k1, simd_ge(a, b)),
34616            6 => simd_and(k1, simd_gt(a, b)),
34617            _ => k1,
34618        };
34619        simd_bitmask(r)
34620    }
34621}
34622
34623/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34624///
34625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
34626#[inline]
34627#[target_feature(enable = "avx512f,avx512vl")]
34628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34629#[rustc_legacy_const_generics(2)]
34630#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34632pub const fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34633    a: __m256i,
34634    b: __m256i,
34635) -> __mmask8 {
34636    unsafe {
34637        static_assert_uimm_bits!(IMM3, 3);
34638        let a = a.as_i64x4();
34639        let b = b.as_i64x4();
34640        let r = match IMM3 {
34641            0 => simd_eq(a, b),
34642            1 => simd_lt(a, b),
34643            2 => simd_le(a, b),
34644            3 => i64x4::ZERO,
34645            4 => simd_ne(a, b),
34646            5 => simd_ge(a, b),
34647            6 => simd_gt(a, b),
34648            _ => i64x4::splat(-1),
34649        };
34650        simd_bitmask(r)
34651    }
34652}
34653
34654/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34655///
34656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
34657#[inline]
34658#[target_feature(enable = "avx512f,avx512vl")]
34659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34660#[rustc_legacy_const_generics(3)]
34661#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34663pub const fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34664    k1: __mmask8,
34665    a: __m256i,
34666    b: __m256i,
34667) -> __mmask8 {
34668    unsafe {
34669        static_assert_uimm_bits!(IMM3, 3);
34670        let a = a.as_i64x4();
34671        let b = b.as_i64x4();
34672        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
34673        let r = match IMM3 {
34674            0 => simd_and(k1, simd_eq(a, b)),
34675            1 => simd_and(k1, simd_lt(a, b)),
34676            2 => simd_and(k1, simd_le(a, b)),
34677            3 => i64x4::ZERO,
34678            4 => simd_and(k1, simd_ne(a, b)),
34679            5 => simd_and(k1, simd_ge(a, b)),
34680            6 => simd_and(k1, simd_gt(a, b)),
34681            _ => k1,
34682        };
34683        simd_bitmask(r)
34684    }
34685}
34686
34687/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34688///
34689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
34690#[inline]
34691#[target_feature(enable = "avx512f,avx512vl")]
34692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34693#[rustc_legacy_const_generics(2)]
34694#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34696pub const fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34697    unsafe {
34698        static_assert_uimm_bits!(IMM3, 3);
34699        let a = a.as_i64x2();
34700        let b = b.as_i64x2();
34701        let r = match IMM3 {
34702            0 => simd_eq(a, b),
34703            1 => simd_lt(a, b),
34704            2 => simd_le(a, b),
34705            3 => i64x2::ZERO,
34706            4 => simd_ne(a, b),
34707            5 => simd_ge(a, b),
34708            6 => simd_gt(a, b),
34709            _ => i64x2::splat(-1),
34710        };
34711        simd_bitmask(r)
34712    }
34713}
34714
34715/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34716///
34717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
34718#[inline]
34719#[target_feature(enable = "avx512f,avx512vl")]
34720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34721#[rustc_legacy_const_generics(3)]
34722#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34724pub const fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34725    k1: __mmask8,
34726    a: __m128i,
34727    b: __m128i,
34728) -> __mmask8 {
34729    unsafe {
34730        static_assert_uimm_bits!(IMM3, 3);
34731        let a = a.as_i64x2();
34732        let b = b.as_i64x2();
34733        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
34734        let r = match IMM3 {
34735            0 => simd_and(k1, simd_eq(a, b)),
34736            1 => simd_and(k1, simd_lt(a, b)),
34737            2 => simd_and(k1, simd_le(a, b)),
34738            3 => i64x2::ZERO,
34739            4 => simd_and(k1, simd_ne(a, b)),
34740            5 => simd_and(k1, simd_ge(a, b)),
34741            6 => simd_and(k1, simd_gt(a, b)),
34742            _ => k1,
34743        };
34744        simd_bitmask(r)
34745    }
34746}
34747
34748/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
34749///
34750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
34751#[inline]
34752#[target_feature(enable = "avx512f")]
34753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34755pub const fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
34756    unsafe { simd_reduce_add_ordered(a.as_i32x16(), 0) }
34757}
34758
34759/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34760///
34761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
34762#[inline]
34763#[target_feature(enable = "avx512f")]
34764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34766pub const fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
34767    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), 0) }
34768}
34769
34770/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
34771///
34772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
34773#[inline]
34774#[target_feature(enable = "avx512f")]
34775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34777pub const fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
34778    unsafe { simd_reduce_add_ordered(a.as_i64x8(), 0) }
34779}
34780
34781/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34782///
34783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
34784#[inline]
34785#[target_feature(enable = "avx512f")]
34786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34788pub const fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
34789    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), 0) }
34790}
34791
34792/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34799pub const fn _mm512_reduce_add_ps(a: __m512) -> f32 {
34800    unsafe {
34801        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34802        let a = _mm256_add_ps(
34803            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
34804            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
34805        );
34806        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
34807        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
34808        simd_extract!(a, 0, f32) + simd_extract!(a, 1, f32)
34809    }
34810}
34811
34812/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34813///
34814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
34815#[inline]
34816#[target_feature(enable = "avx512f")]
34817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34819pub const fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
34820    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
34821}
34822
34823/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34824///
34825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
34826#[inline]
34827#[target_feature(enable = "avx512f")]
34828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34830pub const fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
34831    unsafe {
34832        let a = _mm256_add_pd(
34833            _mm512_extractf64x4_pd::<0>(a),
34834            _mm512_extractf64x4_pd::<1>(a),
34835        );
34836        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
34837        simd_extract!(a, 0, f64) + simd_extract!(a, 1, f64)
34838    }
34839}
34840
34841/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34842///
34843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
34844#[inline]
34845#[target_feature(enable = "avx512f")]
34846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34847#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34848pub const fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
34849    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
34850}
34851
34852/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
34853///
34854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
34855#[inline]
34856#[target_feature(enable = "avx512f")]
34857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34859pub const fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
34860    unsafe { simd_reduce_mul_ordered(a.as_i32x16(), 1) }
34861}
34862
34863/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34864///
34865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
34866#[inline]
34867#[target_feature(enable = "avx512f")]
34868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34870pub const fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
34871    unsafe {
34872        simd_reduce_mul_ordered(
34873            simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(1).as_i32x16()),
34874            1,
34875        )
34876    }
34877}
34878
34879/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
34880///
34881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
34882#[inline]
34883#[target_feature(enable = "avx512f")]
34884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34886pub const fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
34887    unsafe { simd_reduce_mul_ordered(a.as_i64x8(), 1) }
34888}
34889
34890/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34891///
34892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
34893#[inline]
34894#[target_feature(enable = "avx512f")]
34895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34897pub const fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
34898    unsafe {
34899        simd_reduce_mul_ordered(
34900            simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(1).as_i64x8()),
34901            1,
34902        )
34903    }
34904}
34905
34906/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34907///
34908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
34909#[inline]
34910#[target_feature(enable = "avx512f")]
34911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34913pub const fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
34914    unsafe {
34915        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34916        let a = _mm256_mul_ps(
34917            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
34918            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
34919        );
34920        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
34921        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
34922        simd_extract!(a, 0, f32) * simd_extract!(a, 1, f32)
34923    }
34924}
34925
34926/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34927///
34928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
34929#[inline]
34930#[target_feature(enable = "avx512f")]
34931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34933pub const fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
34934    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
34935}
34936
34937/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34938///
34939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
34940#[inline]
34941#[target_feature(enable = "avx512f")]
34942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34944pub const fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
34945    unsafe {
34946        let a = _mm256_mul_pd(
34947            _mm512_extractf64x4_pd::<0>(a),
34948            _mm512_extractf64x4_pd::<1>(a),
34949        );
34950        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
34951        simd_extract!(a, 0, f64) * simd_extract!(a, 1, f64)
34952    }
34953}
34954
34955/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34956///
34957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
34958#[inline]
34959#[target_feature(enable = "avx512f")]
34960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34962pub const fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
34963    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
34964}
34965
34966/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
34967///
34968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
34969#[inline]
34970#[target_feature(enable = "avx512f")]
34971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34973pub const fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
34974    unsafe { simd_reduce_max(a.as_i32x16()) }
34975}
34976
34977/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
34978///
34979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
34980#[inline]
34981#[target_feature(enable = "avx512f")]
34982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34983#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34984pub const fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
34985    unsafe {
34986        simd_reduce_max(simd_select_bitmask(
34987            k,
34988            a.as_i32x16(),
34989            i32x16::splat(i32::MIN),
34990        ))
34991    }
34992}
34993
34994/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
34995///
34996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
34997#[inline]
34998#[target_feature(enable = "avx512f")]
34999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35001pub const fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
35002    unsafe { simd_reduce_max(a.as_i64x8()) }
35003}
35004
35005/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35006///
35007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
35008#[inline]
35009#[target_feature(enable = "avx512f")]
35010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35012pub const fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
35013    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
35014}
35015
35016/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
35017///
35018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
35019#[inline]
35020#[target_feature(enable = "avx512f")]
35021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35023pub const fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
35024    unsafe { simd_reduce_max(a.as_u32x16()) }
35025}
35026
35027/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35028///
35029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
35030#[inline]
35031#[target_feature(enable = "avx512f")]
35032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35033#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35034pub const fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
35035    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
35036}
35037
35038/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
35039///
35040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
35041#[inline]
35042#[target_feature(enable = "avx512f")]
35043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35045pub const fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
35046    unsafe { simd_reduce_max(a.as_u64x8()) }
35047}
35048
35049/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35050///
35051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
35052#[inline]
35053#[target_feature(enable = "avx512f")]
35054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35055#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35056pub const fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
35057    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
35058}
35059
35060/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
35061///
35062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
35063#[inline]
35064#[target_feature(enable = "avx512f")]
35065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35066pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
35067    unsafe {
35068        let a = _mm256_max_ps(
35069            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
35070            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
35071        );
35072        let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
35073        let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
35074        _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
35075    }
35076}
35077
35078/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
35079///
35080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
35081#[inline]
35082#[target_feature(enable = "avx512f")]
35083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35084pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
35085    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
35086}
35087
35088/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
35089///
35090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
35091#[inline]
35092#[target_feature(enable = "avx512f")]
35093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35094pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
35095    unsafe {
35096        let a = _mm256_max_pd(
35097            _mm512_extractf64x4_pd::<0>(a),
35098            _mm512_extractf64x4_pd::<1>(a),
35099        );
35100        let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
35101        _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
35102    }
35103}
35104
35105/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
35106///
35107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
35108#[inline]
35109#[target_feature(enable = "avx512f")]
35110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35111pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
35112    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
35113}
35114
35115/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
35116///
35117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
35118#[inline]
35119#[target_feature(enable = "avx512f")]
35120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35122pub const fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
35123    unsafe { simd_reduce_min(a.as_i32x16()) }
35124}
35125
35126/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35127///
35128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
35129#[inline]
35130#[target_feature(enable = "avx512f")]
35131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35133pub const fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
35134    unsafe {
35135        simd_reduce_min(simd_select_bitmask(
35136            k,
35137            a.as_i32x16(),
35138            i32x16::splat(i32::MAX),
35139        ))
35140    }
35141}
35142
35143/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
35144///
35145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
35146#[inline]
35147#[target_feature(enable = "avx512f")]
35148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35149#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35150pub const fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
35151    unsafe { simd_reduce_min(a.as_i64x8()) }
35152}
35153
35154/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35155///
35156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
35157#[inline]
35158#[target_feature(enable = "avx512f")]
35159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35160#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35161pub const fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
35162    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
35163}
35164
35165/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
35166///
35167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
35168#[inline]
35169#[target_feature(enable = "avx512f")]
35170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35172pub const fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
35173    unsafe { simd_reduce_min(a.as_u32x16()) }
35174}
35175
35176/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35177///
35178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
35179#[inline]
35180#[target_feature(enable = "avx512f")]
35181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35183pub const fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
35184    unsafe {
35185        simd_reduce_min(simd_select_bitmask(
35186            k,
35187            a.as_u32x16(),
35188            u32x16::splat(u32::MAX),
35189        ))
35190    }
35191}
35192
35193/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
35194///
35195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
35196#[inline]
35197#[target_feature(enable = "avx512f")]
35198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35200pub const fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
35201    unsafe { simd_reduce_min(a.as_u64x8()) }
35202}
35203
35204/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35205///
35206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
35207#[inline]
35208#[target_feature(enable = "avx512f")]
35209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35211pub const fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
35212    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
35213}
35214
35215/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
35216///
35217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
35218#[inline]
35219#[target_feature(enable = "avx512f")]
35220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35221pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
35222    unsafe {
35223        let a = _mm256_min_ps(
35224            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
35225            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
35226        );
35227        let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
35228        let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
35229        _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
35230    }
35231}
35232
35233/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
35234///
35235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
35236#[inline]
35237#[target_feature(enable = "avx512f")]
35238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35239pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
35240    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
35241}
35242
35243/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
35244///
35245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
35246#[inline]
35247#[target_feature(enable = "avx512f")]
35248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35249pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
35250    unsafe {
35251        let a = _mm256_min_pd(
35252            _mm512_extractf64x4_pd::<0>(a),
35253            _mm512_extractf64x4_pd::<1>(a),
35254        );
35255        let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
35256        _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
35257    }
35258}
35259
35260/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
35261///
35262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
35263#[inline]
35264#[target_feature(enable = "avx512f")]
35265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35266pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
35267    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
35268}
35269
35270/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
35271///
35272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
35273#[inline]
35274#[target_feature(enable = "avx512f")]
35275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35277pub const fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
35278    unsafe { simd_reduce_and(a.as_i32x16()) }
35279}
35280
35281/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
35282///
35283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
35284#[inline]
35285#[target_feature(enable = "avx512f")]
35286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35288pub const fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
35289    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
35290}
35291
35292/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
35293///
35294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
35295#[inline]
35296#[target_feature(enable = "avx512f")]
35297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35299pub const fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
35300    unsafe { simd_reduce_and(a.as_i64x8()) }
35301}
35302
35303/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
35304///
35305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
35306#[inline]
35307#[target_feature(enable = "avx512f")]
35308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35310pub const fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
35311    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
35312}
35313
35314/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
35315///
35316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
35317#[inline]
35318#[target_feature(enable = "avx512f")]
35319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35320#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35321pub const fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
35322    unsafe { simd_reduce_or(a.as_i32x16()) }
35323}
35324
35325/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
35326///
35327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
35328#[inline]
35329#[target_feature(enable = "avx512f")]
35330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35332pub const fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
35333    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
35334}
35335
35336/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
35337///
35338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
35339#[inline]
35340#[target_feature(enable = "avx512f")]
35341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35343pub const fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
35344    unsafe { simd_reduce_or(a.as_i64x8()) }
35345}
35346
35347/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
35348///
35349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
35350#[inline]
35351#[target_feature(enable = "avx512f")]
35352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35353#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35354pub const fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
35355    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
35356}
35357
35358/// Returns vector of type `__m512d` with indeterminate elements.
35359/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35360/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35361/// In practice, this is typically equivalent to [`mem::zeroed`].
35362///
35363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
35364#[inline]
35365#[target_feature(enable = "avx512f")]
35366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35367// This intrinsic has no corresponding instruction.
35368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35369pub const fn _mm512_undefined_pd() -> __m512d {
35370    unsafe { const { mem::zeroed() } }
35371}
35372
35373/// Returns vector of type `__m512` with indeterminate elements.
35374/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35375/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35376/// In practice, this is typically equivalent to [`mem::zeroed`].
35377///
35378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
35379#[inline]
35380#[target_feature(enable = "avx512f")]
35381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35382// This intrinsic has no corresponding instruction.
35383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35384pub const fn _mm512_undefined_ps() -> __m512 {
35385    unsafe { const { mem::zeroed() } }
35386}
35387
35388/// Return vector of type __m512i with indeterminate elements.
35389/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35390/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35391/// In practice, this is typically equivalent to [`mem::zeroed`].
35392///
35393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
35394#[inline]
35395#[target_feature(enable = "avx512f")]
35396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35397// This intrinsic has no corresponding instruction.
35398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35399pub const fn _mm512_undefined_epi32() -> __m512i {
35400    unsafe { const { mem::zeroed() } }
35401}
35402
35403/// Return vector of type __m512 with indeterminate elements.
35404/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35405/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35406/// In practice, this is typically equivalent to [`mem::zeroed`].
35407///
35408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
35409#[inline]
35410#[target_feature(enable = "avx512f")]
35411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35412// This intrinsic has no corresponding instruction.
35413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35414pub const fn _mm512_undefined() -> __m512 {
35415    unsafe { const { mem::zeroed() } }
35416}
35417
35418/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35419///
35420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
35421#[inline]
35422#[target_feature(enable = "avx512f")]
35423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35424#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35426pub const unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
35427    ptr::read_unaligned(mem_addr as *const __m512i)
35428}
35429
35430/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35431///
35432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
35433#[inline]
35434#[target_feature(enable = "avx512f,avx512vl")]
35435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35436#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35437#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35438pub const unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
35439    ptr::read_unaligned(mem_addr as *const __m256i)
35440}
35441
35442/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35443///
35444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
35445#[inline]
35446#[target_feature(enable = "avx512f,avx512vl")]
35447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35448#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35450pub const unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
35451    ptr::read_unaligned(mem_addr as *const __m128i)
35452}
35453
35454/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35455///
35456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
35457#[inline]
35458#[target_feature(enable = "avx512f")]
35459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35460#[cfg_attr(test, assert_instr(vpmovdw))]
35461pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35462    vpmovdwmem(mem_addr.cast(), a.as_i32x16(), k);
35463}
35464
35465/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35466///
35467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
35468#[inline]
35469#[target_feature(enable = "avx512f,avx512vl")]
35470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35471#[cfg_attr(test, assert_instr(vpmovdw))]
35472pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35473    vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), k);
35474}
35475
35476/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35477///
35478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
35479#[inline]
35480#[target_feature(enable = "avx512f,avx512vl")]
35481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35482#[cfg_attr(test, assert_instr(vpmovdw))]
35483pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35484    vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), k);
35485}
35486
35487/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35488///
35489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
35490#[inline]
35491#[target_feature(enable = "avx512f")]
35492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35493#[cfg_attr(test, assert_instr(vpmovsdw))]
35494pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35495    vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), k);
35496}
35497
35498/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35499///
35500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
35501#[inline]
35502#[target_feature(enable = "avx512f,avx512vl")]
35503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35504#[cfg_attr(test, assert_instr(vpmovsdw))]
35505pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35506    vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), k);
35507}
35508
35509/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35510///
35511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
35512#[inline]
35513#[target_feature(enable = "avx512f,avx512vl")]
35514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35515#[cfg_attr(test, assert_instr(vpmovsdw))]
35516pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35517    vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), k);
35518}
35519
35520/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35521///
35522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
35523#[inline]
35524#[target_feature(enable = "avx512f")]
35525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35526#[cfg_attr(test, assert_instr(vpmovusdw))]
35527pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35528    vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), k);
35529}
35530
35531/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35532///
35533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
35534#[inline]
35535#[target_feature(enable = "avx512f,avx512vl")]
35536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35537#[cfg_attr(test, assert_instr(vpmovusdw))]
35538pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35539    vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), k);
35540}
35541
35542/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35543///
35544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
35545#[inline]
35546#[target_feature(enable = "avx512f,avx512vl")]
35547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35548#[cfg_attr(test, assert_instr(vpmovusdw))]
35549pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35550    vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), k);
35551}
35552
35553/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35554///
35555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
35556#[inline]
35557#[target_feature(enable = "avx512f")]
35558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35559#[cfg_attr(test, assert_instr(vpmovdb))]
35560pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35561    vpmovdbmem(mem_addr, a.as_i32x16(), k);
35562}
35563
35564/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35565///
35566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
35567#[inline]
35568#[target_feature(enable = "avx512f,avx512vl")]
35569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35570#[cfg_attr(test, assert_instr(vpmovdb))]
35571pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35572    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
35573}
35574
35575/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35576///
35577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
35578#[inline]
35579#[target_feature(enable = "avx512f,avx512vl")]
35580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35581#[cfg_attr(test, assert_instr(vpmovdb))]
35582pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35583    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
35584}
35585
35586/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35587///
35588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
35589#[inline]
35590#[target_feature(enable = "avx512f")]
35591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35592#[cfg_attr(test, assert_instr(vpmovsdb))]
35593pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35594    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
35595}
35596
35597/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35598///
35599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
35600#[inline]
35601#[target_feature(enable = "avx512f,avx512vl")]
35602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35603#[cfg_attr(test, assert_instr(vpmovsdb))]
35604pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35605    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
35606}
35607
35608/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35609///
35610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
35611#[inline]
35612#[target_feature(enable = "avx512f,avx512vl")]
35613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35614#[cfg_attr(test, assert_instr(vpmovsdb))]
35615pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35616    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
35617}
35618
35619/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35620///
35621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
35622#[inline]
35623#[target_feature(enable = "avx512f")]
35624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35625#[cfg_attr(test, assert_instr(vpmovusdb))]
35626pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35627    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
35628}
35629
35630/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35631///
35632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
35633#[inline]
35634#[target_feature(enable = "avx512f,avx512vl")]
35635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35636#[cfg_attr(test, assert_instr(vpmovusdb))]
35637pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35638    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
35639}
35640
35641/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35642///
35643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
35644#[inline]
35645#[target_feature(enable = "avx512f,avx512vl")]
35646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35647#[cfg_attr(test, assert_instr(vpmovusdb))]
35648pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35649    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
35650}
35651
35652/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35653///
35654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
35655#[inline]
35656#[target_feature(enable = "avx512f")]
35657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35658#[cfg_attr(test, assert_instr(vpmovqw))]
35659pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35660    vpmovqwmem(mem_addr.cast(), a.as_i64x8(), k);
35661}
35662
35663/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35664///
35665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
35666#[inline]
35667#[target_feature(enable = "avx512f,avx512vl")]
35668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35669#[cfg_attr(test, assert_instr(vpmovqw))]
35670pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35671    vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), k);
35672}
35673
35674/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35675///
35676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
35677#[inline]
35678#[target_feature(enable = "avx512f,avx512vl")]
35679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35680#[cfg_attr(test, assert_instr(vpmovqw))]
35681pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35682    vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), k);
35683}
35684
35685/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35686///
35687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
35688#[inline]
35689#[target_feature(enable = "avx512f")]
35690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35691#[cfg_attr(test, assert_instr(vpmovsqw))]
35692pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35693    vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), k);
35694}
35695
35696/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35697///
35698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
35699#[inline]
35700#[target_feature(enable = "avx512f,avx512vl")]
35701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35702#[cfg_attr(test, assert_instr(vpmovsqw))]
35703pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35704    vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), k);
35705}
35706
35707/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35708///
35709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
35710#[inline]
35711#[target_feature(enable = "avx512f,avx512vl")]
35712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35713#[cfg_attr(test, assert_instr(vpmovsqw))]
35714pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35715    vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), k);
35716}
35717
35718/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35719///
35720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
35721#[inline]
35722#[target_feature(enable = "avx512f")]
35723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35724#[cfg_attr(test, assert_instr(vpmovusqw))]
35725pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35726    vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), k);
35727}
35728
35729/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35730///
35731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
35732#[inline]
35733#[target_feature(enable = "avx512f,avx512vl")]
35734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35735#[cfg_attr(test, assert_instr(vpmovusqw))]
35736pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35737    vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), k);
35738}
35739
35740/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35741///
35742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
35743#[inline]
35744#[target_feature(enable = "avx512f,avx512vl")]
35745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35746#[cfg_attr(test, assert_instr(vpmovusqw))]
35747pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35748    vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), k);
35749}
35750
35751/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35752///
35753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
35754#[inline]
35755#[target_feature(enable = "avx512f")]
35756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35757#[cfg_attr(test, assert_instr(vpmovqb))]
35758pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35759    vpmovqbmem(mem_addr, a.as_i64x8(), k);
35760}
35761
35762/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35763///
35764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
35765#[inline]
35766#[target_feature(enable = "avx512f,avx512vl")]
35767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35768#[cfg_attr(test, assert_instr(vpmovqb))]
35769pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35770    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
35771}
35772
35773/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35774///
35775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
35776#[inline]
35777#[target_feature(enable = "avx512f,avx512vl")]
35778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35779#[cfg_attr(test, assert_instr(vpmovqb))]
35780pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35781    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
35782}
35783
35784/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35785///
35786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
35787#[inline]
35788#[target_feature(enable = "avx512f")]
35789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35790#[cfg_attr(test, assert_instr(vpmovsqb))]
35791pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35792    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
35793}
35794
35795/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35796///
35797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
35798#[inline]
35799#[target_feature(enable = "avx512f,avx512vl")]
35800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35801#[cfg_attr(test, assert_instr(vpmovsqb))]
35802pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35803    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
35804}
35805
35806/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35807///
35808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
35809#[inline]
35810#[target_feature(enable = "avx512f,avx512vl")]
35811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35812#[cfg_attr(test, assert_instr(vpmovsqb))]
35813pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35814    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
35815}
35816
35817/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35818///
35819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
35820#[inline]
35821#[target_feature(enable = "avx512f")]
35822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35823#[cfg_attr(test, assert_instr(vpmovusqb))]
35824pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35825    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
35826}
35827
35828/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
35831#[inline]
35832#[target_feature(enable = "avx512f,avx512vl")]
35833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35834#[cfg_attr(test, assert_instr(vpmovusqb))]
35835pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35836    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
35837}
35838
35839/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35840///
35841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
35842#[inline]
35843#[target_feature(enable = "avx512f,avx512vl")]
35844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35845#[cfg_attr(test, assert_instr(vpmovusqb))]
35846pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35847    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
35848}
35849
35850///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35851///
35852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
35853#[inline]
35854#[target_feature(enable = "avx512f")]
35855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35856#[cfg_attr(test, assert_instr(vpmovqd))]
35857pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35858    vpmovqdmem(mem_addr.cast(), a.as_i64x8(), k);
35859}
35860
35861///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35862///
35863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
35864#[inline]
35865#[target_feature(enable = "avx512f,avx512vl")]
35866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35867#[cfg_attr(test, assert_instr(vpmovqd))]
35868pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35869    vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), k);
35870}
35871
35872///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35873///
35874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
35875#[inline]
35876#[target_feature(enable = "avx512f,avx512vl")]
35877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35878#[cfg_attr(test, assert_instr(vpmovqd))]
35879pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35880    vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), k);
35881}
35882
35883/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35884///
35885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
35886#[inline]
35887#[target_feature(enable = "avx512f")]
35888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35889#[cfg_attr(test, assert_instr(vpmovsqd))]
35890pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35891    vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), k);
35892}
35893
35894/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35895///
35896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
35897#[inline]
35898#[target_feature(enable = "avx512f,avx512vl")]
35899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35900#[cfg_attr(test, assert_instr(vpmovsqd))]
35901pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35902    vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), k);
35903}
35904
35905/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35906///
35907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
35908#[inline]
35909#[target_feature(enable = "avx512f,avx512vl")]
35910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35911#[cfg_attr(test, assert_instr(vpmovsqd))]
35912pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35913    vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), k);
35914}
35915
35916/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35917///
35918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
35919#[inline]
35920#[target_feature(enable = "avx512f")]
35921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35922#[cfg_attr(test, assert_instr(vpmovusqd))]
35923pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35924    vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), k);
35925}
35926
35927/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35928///
35929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
35930#[inline]
35931#[target_feature(enable = "avx512f,avx512vl")]
35932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35933#[cfg_attr(test, assert_instr(vpmovusqd))]
35934pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35935    vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), k);
35936}
35937
35938/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35939///
35940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
35941#[inline]
35942#[target_feature(enable = "avx512f,avx512vl")]
35943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35944#[cfg_attr(test, assert_instr(vpmovusqd))]
35945pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35946    vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), k);
35947}
35948
35949/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35950///
35951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
35952#[inline]
35953#[target_feature(enable = "avx512f")]
35954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35955#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35956#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35957pub const unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
35958    ptr::write_unaligned(mem_addr as *mut __m512i, a);
35959}
35960
35961/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35962///
35963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
35964#[inline]
35965#[target_feature(enable = "avx512f,avx512vl")]
35966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35967#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35969pub const unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
35970    ptr::write_unaligned(mem_addr as *mut __m256i, a);
35971}
35972
35973/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35974///
35975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
35976#[inline]
35977#[target_feature(enable = "avx512f,avx512vl")]
35978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35979#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35981pub const unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
35982    ptr::write_unaligned(mem_addr as *mut __m128i, a);
35983}
35984
35985/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35986///
35987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
35988#[inline]
35989#[target_feature(enable = "avx512f")]
35990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35991#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
35992#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35993pub const unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
35994    ptr::read_unaligned(mem_addr as *const __m512i)
35995}
35996
35997/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35998///
35999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
36000#[inline]
36001#[target_feature(enable = "avx512f,avx512vl")]
36002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36003#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36005pub const unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
36006    ptr::read_unaligned(mem_addr as *const __m256i)
36007}
36008
36009/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
36010///
36011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
36012#[inline]
36013#[target_feature(enable = "avx512f,avx512vl")]
36014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36015#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36017pub const unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
36018    ptr::read_unaligned(mem_addr as *const __m128i)
36019}
36020
36021/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36022///
36023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
36024#[inline]
36025#[target_feature(enable = "avx512f")]
36026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36027#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36029pub const unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
36030    ptr::write_unaligned(mem_addr as *mut __m512i, a);
36031}
36032
36033/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36034///
36035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
36036#[inline]
36037#[target_feature(enable = "avx512f,avx512vl")]
36038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36039#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36040#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36041pub const unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
36042    ptr::write_unaligned(mem_addr as *mut __m256i, a);
36043}
36044
36045/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36046///
36047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
36048#[inline]
36049#[target_feature(enable = "avx512f,avx512vl")]
36050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36051#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36052#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36053pub const unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
36054    ptr::write_unaligned(mem_addr as *mut __m128i, a);
36055}
36056
36057/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
36058///
36059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
36060#[inline]
36061#[target_feature(enable = "avx512f")]
36062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36063#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
36064#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36065pub const unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
36066    ptr::read_unaligned(mem_addr)
36067}
36068
36069/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
36070///
36071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
36072#[inline]
36073#[target_feature(enable = "avx512f")]
36074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36075#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
36076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36077pub const unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
36078    ptr::write_unaligned(mem_addr, a);
36079}
36080
36081/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
36082/// floating-point elements) from memory into result.
36083/// `mem_addr` does not need to be aligned on any particular boundary.
36084///
36085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
36086#[inline]
36087#[target_feature(enable = "avx512f")]
36088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36089#[cfg_attr(test, assert_instr(vmovups))]
36090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36091pub const unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
36092    ptr::read_unaligned(mem_addr as *const __m512d)
36093}
36094
36095/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
36096/// floating-point elements) from `a` into memory.
36097/// `mem_addr` does not need to be aligned on any particular boundary.
36098///
36099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
36100#[inline]
36101#[target_feature(enable = "avx512f")]
36102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36103#[cfg_attr(test, assert_instr(vmovups))]
36104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36105pub const unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
36106    ptr::write_unaligned(mem_addr as *mut __m512d, a);
36107}
36108
36109/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
36110/// floating-point elements) from memory into result.
36111/// `mem_addr` does not need to be aligned on any particular boundary.
36112///
36113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
36114#[inline]
36115#[target_feature(enable = "avx512f")]
36116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36117#[cfg_attr(test, assert_instr(vmovups))]
36118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36119pub const unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
36120    ptr::read_unaligned(mem_addr as *const __m512)
36121}
36122
36123/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
36124/// floating-point elements) from `a` into memory.
36125/// `mem_addr` does not need to be aligned on any particular boundary.
36126///
36127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
36128#[inline]
36129#[target_feature(enable = "avx512f")]
36130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36131#[cfg_attr(test, assert_instr(vmovups))]
36132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36133pub const unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
36134    ptr::write_unaligned(mem_addr as *mut __m512, a);
36135}
36136
36137/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36138///
36139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
36140#[inline]
36141#[target_feature(enable = "avx512f")]
36142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36143#[cfg_attr(
36144    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36145    assert_instr(vmovaps)
36146)] //should be vmovdqa32
36147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36148pub const unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
36149    ptr::read(mem_addr)
36150}
36151
36152/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36153///
36154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
36155#[inline]
36156#[target_feature(enable = "avx512f")]
36157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36158#[cfg_attr(
36159    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36160    assert_instr(vmovaps)
36161)] //should be vmovdqa32
36162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36163pub const unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
36164    ptr::write(mem_addr, a);
36165}
36166
36167/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36168///
36169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
36170#[inline]
36171#[target_feature(enable = "avx512f")]
36172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36173#[cfg_attr(
36174    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36175    assert_instr(vmovaps)
36176)] //should be vmovdqa32
36177#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36178pub const unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
36179    ptr::read(mem_addr as *const __m512i)
36180}
36181
36182/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36183///
36184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
36185#[inline]
36186#[target_feature(enable = "avx512f,avx512vl")]
36187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36188#[cfg_attr(
36189    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36190    assert_instr(vmovaps)
36191)] //should be vmovdqa32
36192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36193pub const unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
36194    ptr::read(mem_addr as *const __m256i)
36195}
36196
36197/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36198///
36199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
36200#[inline]
36201#[target_feature(enable = "avx512f,avx512vl")]
36202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36203#[cfg_attr(
36204    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36205    assert_instr(vmovaps)
36206)] //should be vmovdqa32
36207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36208pub const unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
36209    ptr::read(mem_addr as *const __m128i)
36210}
36211
36212/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36213///
36214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
36215#[inline]
36216#[target_feature(enable = "avx512f")]
36217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36218#[cfg_attr(
36219    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36220    assert_instr(vmovaps)
36221)] //should be vmovdqa32
36222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36223pub const unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
36224    ptr::write(mem_addr as *mut __m512i, a);
36225}
36226
36227/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36228///
36229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
36230#[inline]
36231#[target_feature(enable = "avx512f,avx512vl")]
36232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36233#[cfg_attr(
36234    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36235    assert_instr(vmovaps)
36236)] //should be vmovdqa32
36237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36238pub const unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
36239    ptr::write(mem_addr as *mut __m256i, a);
36240}
36241
36242/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36243///
36244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
36245#[inline]
36246#[target_feature(enable = "avx512f,avx512vl")]
36247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36248#[cfg_attr(
36249    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36250    assert_instr(vmovaps)
36251)] //should be vmovdqa32
36252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36253pub const unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
36254    ptr::write(mem_addr as *mut __m128i, a);
36255}
36256
36257/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36258///
36259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
36260#[inline]
36261#[target_feature(enable = "avx512f")]
36262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36263#[cfg_attr(
36264    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36265    assert_instr(vmovaps)
36266)] //should be vmovdqa64
36267#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36268pub const unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
36269    ptr::read(mem_addr as *const __m512i)
36270}
36271
36272/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36273///
36274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
36275#[inline]
36276#[target_feature(enable = "avx512f,avx512vl")]
36277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36278#[cfg_attr(
36279    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36280    assert_instr(vmovaps)
36281)] //should be vmovdqa64
36282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36283pub const unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
36284    ptr::read(mem_addr as *const __m256i)
36285}
36286
36287/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36288///
36289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
36290#[inline]
36291#[target_feature(enable = "avx512f,avx512vl")]
36292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36293#[cfg_attr(
36294    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36295    assert_instr(vmovaps)
36296)] //should be vmovdqa64
36297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36298pub const unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
36299    ptr::read(mem_addr as *const __m128i)
36300}
36301
36302/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36308#[cfg_attr(
36309    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36310    assert_instr(vmovaps)
36311)] //should be vmovdqa64
36312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36313pub const unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
36314    ptr::write(mem_addr as *mut __m512i, a);
36315}
36316
36317/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36318///
36319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
36320#[inline]
36321#[target_feature(enable = "avx512f,avx512vl")]
36322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36323#[cfg_attr(
36324    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36325    assert_instr(vmovaps)
36326)] //should be vmovdqa64
36327#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36328pub const unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
36329    ptr::write(mem_addr as *mut __m256i, a);
36330}
36331
36332/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36333///
36334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
36335#[inline]
36336#[target_feature(enable = "avx512f,avx512vl")]
36337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36338#[cfg_attr(
36339    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36340    assert_instr(vmovaps)
36341)] //should be vmovdqa64
36342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36343pub const unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
36344    ptr::write(mem_addr as *mut __m128i, a);
36345}
36346
36347/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36348///
36349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
36350#[inline]
36351#[target_feature(enable = "avx512f")]
36352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36353#[cfg_attr(
36354    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36355    assert_instr(vmovaps)
36356)]
36357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36358pub const unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
36359    ptr::read(mem_addr as *const __m512)
36360}
36361
36362/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36363///
36364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
36365#[inline]
36366#[target_feature(enable = "avx512f")]
36367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36368#[cfg_attr(
36369    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36370    assert_instr(vmovaps)
36371)]
36372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36373pub const unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
36374    ptr::write(mem_addr as *mut __m512, a);
36375}
36376
36377/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36378///
36379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
36380#[inline]
36381#[target_feature(enable = "avx512f")]
36382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36383#[cfg_attr(
36384    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36385    assert_instr(vmovaps)
36386)] //should be vmovapd
36387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36388pub const unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
36389    ptr::read(mem_addr as *const __m512d)
36390}
36391
36392/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36393///
36394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
36395#[inline]
36396#[target_feature(enable = "avx512f")]
36397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36398#[cfg_attr(
36399    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36400    assert_instr(vmovaps)
36401)] //should be vmovapd
36402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36403pub const unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
36404    ptr::write(mem_addr as *mut __m512d, a);
36405}
36406
36407/// Load packed 32-bit integers from memory into dst using writemask k
36408/// (elements are copied from src when the corresponding mask bit is not set).
36409/// mem_addr does not need to be aligned on any particular boundary.
36410///
36411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
36412#[inline]
36413#[target_feature(enable = "avx512f")]
36414#[cfg_attr(test, assert_instr(vmovdqu32))]
36415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36417pub const unsafe fn _mm512_mask_loadu_epi32(
36418    src: __m512i,
36419    k: __mmask16,
36420    mem_addr: *const i32,
36421) -> __m512i {
36422    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
36423    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x16()).as_m512i()
36424}
36425
36426/// Load packed 32-bit integers from memory into dst using zeromask k
36427/// (elements are zeroed out when the corresponding mask bit is not set).
36428/// mem_addr does not need to be aligned on any particular boundary.
36429///
36430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
36431#[inline]
36432#[target_feature(enable = "avx512f")]
36433#[cfg_attr(test, assert_instr(vmovdqu32))]
36434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36436pub const unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
36437    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
36438}
36439
36440/// Load packed 64-bit integers from memory into dst using writemask k
36441/// (elements are copied from src when the corresponding mask bit is not set).
36442/// mem_addr does not need to be aligned on any particular boundary.
36443///
36444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
36445#[inline]
36446#[target_feature(enable = "avx512f")]
36447#[cfg_attr(test, assert_instr(vmovdqu64))]
36448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36450pub const unsafe fn _mm512_mask_loadu_epi64(
36451    src: __m512i,
36452    k: __mmask8,
36453    mem_addr: *const i64,
36454) -> __m512i {
36455    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
36456    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x8()).as_m512i()
36457}
36458
36459/// Load packed 64-bit integers from memory into dst using zeromask k
36460/// (elements are zeroed out when the corresponding mask bit is not set).
36461/// mem_addr does not need to be aligned on any particular boundary.
36462///
36463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
36464#[inline]
36465#[target_feature(enable = "avx512f")]
36466#[cfg_attr(test, assert_instr(vmovdqu64))]
36467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36469pub const unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
36470    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
36471}
36472
36473/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36474/// (elements are copied from src when the corresponding mask bit is not set).
36475/// mem_addr does not need to be aligned on any particular boundary.
36476///
36477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
36478#[inline]
36479#[target_feature(enable = "avx512f")]
36480#[cfg_attr(test, assert_instr(vmovups))]
36481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36483pub const unsafe fn _mm512_mask_loadu_ps(
36484    src: __m512,
36485    k: __mmask16,
36486    mem_addr: *const f32,
36487) -> __m512 {
36488    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
36489    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x16()).as_m512()
36490}
36491
36492/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36493/// (elements are zeroed out when the corresponding mask bit is not set).
36494/// mem_addr does not need to be aligned on any particular boundary.
36495///
36496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
36497#[inline]
36498#[target_feature(enable = "avx512f")]
36499#[cfg_attr(test, assert_instr(vmovups))]
36500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36502pub const unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36503    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
36504}
36505
36506/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36507/// (elements are copied from src when the corresponding mask bit is not set).
36508/// mem_addr does not need to be aligned on any particular boundary.
36509///
36510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
36511#[inline]
36512#[target_feature(enable = "avx512f")]
36513#[cfg_attr(test, assert_instr(vmovupd))]
36514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36515#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36516pub const unsafe fn _mm512_mask_loadu_pd(
36517    src: __m512d,
36518    k: __mmask8,
36519    mem_addr: *const f64,
36520) -> __m512d {
36521    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
36522    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x8()).as_m512d()
36523}
36524
36525/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36526/// (elements are zeroed out when the corresponding mask bit is not set).
36527/// mem_addr does not need to be aligned on any particular boundary.
36528///
36529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
36530#[inline]
36531#[target_feature(enable = "avx512f")]
36532#[cfg_attr(test, assert_instr(vmovupd))]
36533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36535pub const unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36536    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
36537}
36538
36539/// Load packed 32-bit integers from memory into dst using writemask k
36540/// (elements are copied from src when the corresponding mask bit is not set).
36541/// mem_addr does not need to be aligned on any particular boundary.
36542///
36543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
36544#[inline]
36545#[target_feature(enable = "avx512f,avx512vl")]
36546#[cfg_attr(test, assert_instr(vmovdqu32))]
36547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36549pub const unsafe fn _mm256_mask_loadu_epi32(
36550    src: __m256i,
36551    k: __mmask8,
36552    mem_addr: *const i32,
36553) -> __m256i {
36554    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
36555    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x8()).as_m256i()
36556}
36557
36558/// Load packed 32-bit integers from memory into dst using zeromask k
36559/// (elements are zeroed out when the corresponding mask bit is not set).
36560/// mem_addr does not need to be aligned on any particular boundary.
36561///
36562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
36563#[inline]
36564#[target_feature(enable = "avx512f,avx512vl")]
36565#[cfg_attr(test, assert_instr(vmovdqu32))]
36566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36568pub const unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
36569    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
36570}
36571
36572/// Load packed 64-bit integers from memory into dst using writemask k
36573/// (elements are copied from src when the corresponding mask bit is not set).
36574/// mem_addr does not need to be aligned on any particular boundary.
36575///
36576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
36577#[inline]
36578#[target_feature(enable = "avx512f,avx512vl")]
36579#[cfg_attr(test, assert_instr(vmovdqu64))]
36580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36582pub const unsafe fn _mm256_mask_loadu_epi64(
36583    src: __m256i,
36584    k: __mmask8,
36585    mem_addr: *const i64,
36586) -> __m256i {
36587    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
36588    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x4()).as_m256i()
36589}
36590
36591/// Load packed 64-bit integers from memory into dst using zeromask k
36592/// (elements are zeroed out when the corresponding mask bit is not set).
36593/// mem_addr does not need to be aligned on any particular boundary.
36594///
36595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
36596#[inline]
36597#[target_feature(enable = "avx512f,avx512vl")]
36598#[cfg_attr(test, assert_instr(vmovdqu64))]
36599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36601pub const unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
36602    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
36603}
36604
36605/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36606/// (elements are copied from src when the corresponding mask bit is not set).
36607/// mem_addr does not need to be aligned on any particular boundary.
36608///
36609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
36610#[inline]
36611#[target_feature(enable = "avx512f,avx512vl")]
36612#[cfg_attr(test, assert_instr(vmovups))]
36613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36615pub const unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36616    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
36617    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x8()).as_m256()
36618}
36619
36620/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36621/// (elements are zeroed out when the corresponding mask bit is not set).
36622/// mem_addr does not need to be aligned on any particular boundary.
36623///
36624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
36625#[inline]
36626#[target_feature(enable = "avx512f,avx512vl")]
36627#[cfg_attr(test, assert_instr(vmovups))]
36628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36630pub const unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
36631    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
36632}
36633
36634/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36635/// (elements are copied from src when the corresponding mask bit is not set).
36636/// mem_addr does not need to be aligned on any particular boundary.
36637///
36638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
36639#[inline]
36640#[target_feature(enable = "avx512f,avx512vl")]
36641#[cfg_attr(test, assert_instr(vmovupd))]
36642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36643#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36644pub const unsafe fn _mm256_mask_loadu_pd(
36645    src: __m256d,
36646    k: __mmask8,
36647    mem_addr: *const f64,
36648) -> __m256d {
36649    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
36650    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x4()).as_m256d()
36651}
36652
36653/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36654/// (elements are zeroed out when the corresponding mask bit is not set).
36655/// mem_addr does not need to be aligned on any particular boundary.
36656///
36657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
36658#[inline]
36659#[target_feature(enable = "avx512f,avx512vl")]
36660#[cfg_attr(test, assert_instr(vmovupd))]
36661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36663pub const unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36664    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
36665}
36666
36667/// Load packed 32-bit integers from memory into dst using writemask k
36668/// (elements are copied from src when the corresponding mask bit is not set).
36669/// mem_addr does not need to be aligned on any particular boundary.
36670///
36671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
36672#[inline]
36673#[target_feature(enable = "avx512f,avx512vl")]
36674#[cfg_attr(test, assert_instr(vmovdqu32))]
36675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36676#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36677pub const unsafe fn _mm_mask_loadu_epi32(
36678    src: __m128i,
36679    k: __mmask8,
36680    mem_addr: *const i32,
36681) -> __m128i {
36682    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
36683    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x4()).as_m128i()
36684}
36685
36686/// Load packed 32-bit integers from memory into dst using zeromask k
36687/// (elements are zeroed out when the corresponding mask bit is not set).
36688/// mem_addr does not need to be aligned on any particular boundary.
36689///
36690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
36691#[inline]
36692#[target_feature(enable = "avx512f,avx512vl")]
36693#[cfg_attr(test, assert_instr(vmovdqu32))]
36694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36696pub const unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
36697    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
36698}
36699
36700/// Load packed 64-bit integers from memory into dst using writemask k
36701/// (elements are copied from src when the corresponding mask bit is not set).
36702/// mem_addr does not need to be aligned on any particular boundary.
36703///
36704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
36705#[inline]
36706#[target_feature(enable = "avx512f,avx512vl")]
36707#[cfg_attr(test, assert_instr(vmovdqu64))]
36708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36710pub const unsafe fn _mm_mask_loadu_epi64(
36711    src: __m128i,
36712    k: __mmask8,
36713    mem_addr: *const i64,
36714) -> __m128i {
36715    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
36716    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x2()).as_m128i()
36717}
36718
36719/// Load packed 64-bit integers from memory into dst using zeromask k
36720/// (elements are zeroed out when the corresponding mask bit is not set).
36721/// mem_addr does not need to be aligned on any particular boundary.
36722///
36723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
36724#[inline]
36725#[target_feature(enable = "avx512f,avx512vl")]
36726#[cfg_attr(test, assert_instr(vmovdqu64))]
36727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36729pub const unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
36730    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
36731}
36732
36733/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36734/// (elements are copied from src when the corresponding mask bit is not set).
36735/// mem_addr does not need to be aligned on any particular boundary.
36736///
36737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
36738#[inline]
36739#[target_feature(enable = "avx512f,avx512vl")]
36740#[cfg_attr(test, assert_instr(vmovups))]
36741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36743pub const unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
36744    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
36745    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x4()).as_m128()
36746}
36747
36748/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36749/// (elements are zeroed out when the corresponding mask bit is not set).
36750/// mem_addr does not need to be aligned on any particular boundary.
36751///
36752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
36753#[inline]
36754#[target_feature(enable = "avx512f,avx512vl")]
36755#[cfg_attr(test, assert_instr(vmovups))]
36756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36758pub const unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
36759    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
36760}
36761
36762/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36763/// (elements are copied from src when the corresponding mask bit is not set).
36764/// mem_addr does not need to be aligned on any particular boundary.
36765///
36766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
36767#[inline]
36768#[target_feature(enable = "avx512f,avx512vl")]
36769#[cfg_attr(test, assert_instr(vmovupd))]
36770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36772pub const unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36773    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
36774    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x2()).as_m128d()
36775}
36776
36777/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36778/// (elements are zeroed out when the corresponding mask bit is not set).
36779/// mem_addr does not need to be aligned on any particular boundary.
36780///
36781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
36782#[inline]
36783#[target_feature(enable = "avx512f,avx512vl")]
36784#[cfg_attr(test, assert_instr(vmovupd))]
36785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36787pub const unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36788    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
36789}
36790
36791/// Load packed 32-bit integers from memory into dst using writemask k
36792/// (elements are copied from src when the corresponding mask bit is not set).
36793/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36794///
36795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
36796#[inline]
36797#[target_feature(enable = "avx512f")]
36798#[cfg_attr(test, assert_instr(vmovdqa32))]
36799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36801pub const unsafe fn _mm512_mask_load_epi32(
36802    src: __m512i,
36803    k: __mmask16,
36804    mem_addr: *const i32,
36805) -> __m512i {
36806    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
36807    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x16()).as_m512i()
36808}
36809
36810/// Load packed 32-bit integers from memory into dst using zeromask k
36811/// (elements are zeroed out when the corresponding mask bit is not set).
36812/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36813///
36814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
36815#[inline]
36816#[target_feature(enable = "avx512f")]
36817#[cfg_attr(test, assert_instr(vmovdqa32))]
36818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36819#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36820pub const unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
36821    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
36822}
36823
36824/// Load packed 64-bit integers from memory into dst using writemask k
36825/// (elements are copied from src when the corresponding mask bit is not set).
36826/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36827///
36828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
36829#[inline]
36830#[target_feature(enable = "avx512f")]
36831#[cfg_attr(test, assert_instr(vmovdqa64))]
36832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36833#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36834pub const unsafe fn _mm512_mask_load_epi64(
36835    src: __m512i,
36836    k: __mmask8,
36837    mem_addr: *const i64,
36838) -> __m512i {
36839    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
36840    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x8()).as_m512i()
36841}
36842
36843/// Load packed 64-bit integers from memory into dst using zeromask k
36844/// (elements are zeroed out when the corresponding mask bit is not set).
36845/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36846///
36847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
36848#[inline]
36849#[target_feature(enable = "avx512f")]
36850#[cfg_attr(test, assert_instr(vmovdqa64))]
36851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36853pub const unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
36854    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
36855}
36856
36857/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36858/// (elements are copied from src when the corresponding mask bit is not set).
36859/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36860///
36861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
36862#[inline]
36863#[target_feature(enable = "avx512f")]
36864#[cfg_attr(test, assert_instr(vmovaps))]
36865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36867pub const unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
36868    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
36869    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x16()).as_m512()
36870}
36871
36872/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36873/// (elements are zeroed out when the corresponding mask bit is not set).
36874/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36875///
36876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
36877#[inline]
36878#[target_feature(enable = "avx512f")]
36879#[cfg_attr(test, assert_instr(vmovaps))]
36880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36882pub const unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36883    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
36884}
36885
36886/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36887/// (elements are copied from src when the corresponding mask bit is not set).
36888/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36889///
36890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
36891#[inline]
36892#[target_feature(enable = "avx512f")]
36893#[cfg_attr(test, assert_instr(vmovapd))]
36894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36896pub const unsafe fn _mm512_mask_load_pd(
36897    src: __m512d,
36898    k: __mmask8,
36899    mem_addr: *const f64,
36900) -> __m512d {
36901    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
36902    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x8()).as_m512d()
36903}
36904
36905/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36906/// (elements are zeroed out when the corresponding mask bit is not set).
36907/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36908///
36909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
36910#[inline]
36911#[target_feature(enable = "avx512f")]
36912#[cfg_attr(test, assert_instr(vmovapd))]
36913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36915pub const unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36916    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
36917}
36918
36919/// Load packed 32-bit integers from memory into dst using writemask k
36920/// (elements are copied from src when the corresponding mask bit is not set).
36921/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36922///
36923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
36924#[inline]
36925#[target_feature(enable = "avx512f,avx512vl")]
36926#[cfg_attr(test, assert_instr(vmovdqa32))]
36927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36929pub const unsafe fn _mm256_mask_load_epi32(
36930    src: __m256i,
36931    k: __mmask8,
36932    mem_addr: *const i32,
36933) -> __m256i {
36934    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
36935    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x8()).as_m256i()
36936}
36937
36938/// Load packed 32-bit integers from memory into dst using zeromask k
36939/// (elements are zeroed out when the corresponding mask bit is not set).
36940/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36941///
36942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
36943#[inline]
36944#[target_feature(enable = "avx512f,avx512vl")]
36945#[cfg_attr(test, assert_instr(vmovdqa32))]
36946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36948pub const unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
36949    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
36950}
36951
36952/// Load packed 64-bit integers from memory into dst using writemask k
36953/// (elements are copied from src when the corresponding mask bit is not set).
36954/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36955///
36956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
36957#[inline]
36958#[target_feature(enable = "avx512f,avx512vl")]
36959#[cfg_attr(test, assert_instr(vmovdqa64))]
36960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36962pub const unsafe fn _mm256_mask_load_epi64(
36963    src: __m256i,
36964    k: __mmask8,
36965    mem_addr: *const i64,
36966) -> __m256i {
36967    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
36968    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x4()).as_m256i()
36969}
36970
36971/// Load packed 64-bit integers from memory into dst using zeromask k
36972/// (elements are zeroed out when the corresponding mask bit is not set).
36973/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36974///
36975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
36976#[inline]
36977#[target_feature(enable = "avx512f,avx512vl")]
36978#[cfg_attr(test, assert_instr(vmovdqa64))]
36979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36981pub const unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
36982    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
36983}
36984
36985/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36986/// (elements are copied from src when the corresponding mask bit is not set).
36987/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36988///
36989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
36990#[inline]
36991#[target_feature(enable = "avx512f,avx512vl")]
36992#[cfg_attr(test, assert_instr(vmovaps))]
36993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36995pub const unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36996    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
36997    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x8()).as_m256()
36998}
36999
37000/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
37001/// (elements are zeroed out when the corresponding mask bit is not set).
37002/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37003///
37004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
37005#[inline]
37006#[target_feature(enable = "avx512f,avx512vl")]
37007#[cfg_attr(test, assert_instr(vmovaps))]
37008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37010pub const unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
37011    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
37012}
37013
37014/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
37015/// (elements are copied from src when the corresponding mask bit is not set).
37016/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37017///
37018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
37019#[inline]
37020#[target_feature(enable = "avx512f,avx512vl")]
37021#[cfg_attr(test, assert_instr(vmovapd))]
37022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37023#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37024pub const unsafe fn _mm256_mask_load_pd(
37025    src: __m256d,
37026    k: __mmask8,
37027    mem_addr: *const f64,
37028) -> __m256d {
37029    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
37030    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x4()).as_m256d()
37031}
37032
37033/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
37034/// (elements are zeroed out when the corresponding mask bit is not set).
37035/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37036///
37037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
37038#[inline]
37039#[target_feature(enable = "avx512f,avx512vl")]
37040#[cfg_attr(test, assert_instr(vmovapd))]
37041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37043pub const unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
37044    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
37045}
37046
37047/// Load packed 32-bit integers from memory into dst using writemask k
37048/// (elements are copied from src when the corresponding mask bit is not set).
37049/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37050///
37051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
37052#[inline]
37053#[target_feature(enable = "avx512f,avx512vl")]
37054#[cfg_attr(test, assert_instr(vmovdqa32))]
37055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37057pub const unsafe fn _mm_mask_load_epi32(
37058    src: __m128i,
37059    k: __mmask8,
37060    mem_addr: *const i32,
37061) -> __m128i {
37062    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
37063    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x4()).as_m128i()
37064}
37065
37066/// Load packed 32-bit integers from memory into dst using zeromask k
37067/// (elements are zeroed out when the corresponding mask bit is not set).
37068/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37069///
37070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
37071#[inline]
37072#[target_feature(enable = "avx512f,avx512vl")]
37073#[cfg_attr(test, assert_instr(vmovdqa32))]
37074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37075#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37076pub const unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
37077    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
37078}
37079
37080/// Load packed 64-bit integers from memory into dst using writemask k
37081/// (elements are copied from src when the corresponding mask bit is not set).
37082/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37083///
37084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
37085#[inline]
37086#[target_feature(enable = "avx512f,avx512vl")]
37087#[cfg_attr(test, assert_instr(vmovdqa64))]
37088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37089#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37090pub const unsafe fn _mm_mask_load_epi64(
37091    src: __m128i,
37092    k: __mmask8,
37093    mem_addr: *const i64,
37094) -> __m128i {
37095    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
37096    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x2()).as_m128i()
37097}
37098
37099/// Load packed 64-bit integers from memory into dst using zeromask k
37100/// (elements are zeroed out when the corresponding mask bit is not set).
37101/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37102///
37103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
37104#[inline]
37105#[target_feature(enable = "avx512f,avx512vl")]
37106#[cfg_attr(test, assert_instr(vmovdqa64))]
37107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37109pub const unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
37110    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
37111}
37112
37113/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
37114/// (elements are copied from src when the corresponding mask bit is not set).
37115/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37116///
37117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
37118#[inline]
37119#[target_feature(enable = "avx512f,avx512vl")]
37120#[cfg_attr(test, assert_instr(vmovaps))]
37121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37123pub const unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37124    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
37125    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x4()).as_m128()
37126}
37127
37128/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
37129/// (elements are zeroed out when the corresponding mask bit is not set).
37130/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37131///
37132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
37133#[inline]
37134#[target_feature(enable = "avx512f,avx512vl")]
37135#[cfg_attr(test, assert_instr(vmovaps))]
37136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37138pub const unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
37139    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
37140}
37141
37142/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
37143/// (elements are copied from src when the corresponding mask bit is not set).
37144/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37145///
37146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
37147#[inline]
37148#[target_feature(enable = "avx512f,avx512vl")]
37149#[cfg_attr(test, assert_instr(vmovapd))]
37150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37151#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37152pub const unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37153    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
37154    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x2()).as_m128d()
37155}
37156
37157/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
37158/// (elements are zeroed out when the corresponding mask bit is not set).
37159/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37160///
37161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
37162#[inline]
37163#[target_feature(enable = "avx512f,avx512vl")]
37164#[cfg_attr(test, assert_instr(vmovapd))]
37165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37167pub const unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37168    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
37169}
37170
37171/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
37172/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
37173/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37174/// exception may be generated.
37175///
37176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
37177#[inline]
37178#[cfg_attr(test, assert_instr(vmovss))]
37179#[target_feature(enable = "avx512f")]
37180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37181pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37182    let mut dst: __m128 = src;
37183    asm!(
37184        vpl!("vmovss {dst}{{{k}}}"),
37185        p = in(reg) mem_addr,
37186        k = in(kreg) k,
37187        dst = inout(xmm_reg) dst,
37188        options(pure, readonly, nostack, preserves_flags),
37189    );
37190    dst
37191}
37192
37193/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
37194/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
37195/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37196/// exception may be generated.
37197///
37198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
37199#[inline]
37200#[cfg_attr(test, assert_instr(vmovss))]
37201#[target_feature(enable = "avx512f")]
37202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37203pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
37204    let mut dst: __m128;
37205    asm!(
37206        vpl!("vmovss {dst}{{{k}}} {{z}}"),
37207        p = in(reg) mem_addr,
37208        k = in(kreg) k,
37209        dst = out(xmm_reg) dst,
37210        options(pure, readonly, nostack, preserves_flags),
37211    );
37212    dst
37213}
37214
37215/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
37216/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
37217/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37218/// exception may be generated.
37219///
37220/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
37221#[inline]
37222#[cfg_attr(test, assert_instr(vmovsd))]
37223#[target_feature(enable = "avx512f")]
37224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37225pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37226    let mut dst: __m128d = src;
37227    asm!(
37228        vpl!("vmovsd {dst}{{{k}}}"),
37229        p = in(reg) mem_addr,
37230        k = in(kreg) k,
37231        dst = inout(xmm_reg) dst,
37232        options(pure, readonly, nostack, preserves_flags),
37233    );
37234    dst
37235}
37236
37237/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
37238/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
37239/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
37240/// may be generated.
37241///
37242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
37243#[inline]
37244#[cfg_attr(test, assert_instr(vmovsd))]
37245#[target_feature(enable = "avx512f")]
37246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37247pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37248    let mut dst: __m128d;
37249    asm!(
37250        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
37251        p = in(reg) mem_addr,
37252        k = in(kreg) k,
37253        dst = out(xmm_reg) dst,
37254        options(pure, readonly, nostack, preserves_flags),
37255    );
37256    dst
37257}
37258
37259/// Store packed 32-bit integers from a into memory using writemask k.
37260/// mem_addr does not need to be aligned on any particular boundary.
37261///
37262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
37263#[inline]
37264#[target_feature(enable = "avx512f")]
37265#[cfg_attr(test, assert_instr(vmovdqu32))]
37266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37267#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37268pub const unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
37269    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
37270    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x16());
37271}
37272
37273/// Store packed 64-bit integers from a into memory using writemask k.
37274/// mem_addr does not need to be aligned on any particular boundary.
37275///
37276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
37277#[inline]
37278#[target_feature(enable = "avx512f")]
37279#[cfg_attr(test, assert_instr(vmovdqu64))]
37280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37282pub const unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
37283    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
37284    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x8());
37285}
37286
37287/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37288/// mem_addr does not need to be aligned on any particular boundary.
37289///
37290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
37291#[inline]
37292#[target_feature(enable = "avx512f")]
37293#[cfg_attr(test, assert_instr(vmovups))]
37294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37296pub const unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
37297    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
37298    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x16());
37299}
37300
37301/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37302/// mem_addr does not need to be aligned on any particular boundary.
37303///
37304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
37305#[inline]
37306#[target_feature(enable = "avx512f")]
37307#[cfg_attr(test, assert_instr(vmovupd))]
37308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37310pub const unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
37311    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
37312    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x8());
37313}
37314
37315/// Store packed 32-bit integers from a into memory using writemask k.
37316/// mem_addr does not need to be aligned on any particular boundary.
37317///
37318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
37319#[inline]
37320#[target_feature(enable = "avx512f,avx512vl")]
37321#[cfg_attr(test, assert_instr(vmovdqu32))]
37322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37323#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37324pub const unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
37325    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
37326    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8());
37327}
37328
37329/// Store packed 64-bit integers from a into memory using writemask k.
37330/// mem_addr does not need to be aligned on any particular boundary.
37331///
37332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
37333#[inline]
37334#[target_feature(enable = "avx512f,avx512vl")]
37335#[cfg_attr(test, assert_instr(vmovdqu64))]
37336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37337#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37338pub const unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
37339    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
37340    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4());
37341}
37342
37343/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37344/// mem_addr does not need to be aligned on any particular boundary.
37345///
37346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
37347#[inline]
37348#[target_feature(enable = "avx512f,avx512vl")]
37349#[cfg_attr(test, assert_instr(vmovups))]
37350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37352pub const unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
37353    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
37354    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x8());
37355}
37356
37357/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37358/// mem_addr does not need to be aligned on any particular boundary.
37359///
37360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
37361#[inline]
37362#[target_feature(enable = "avx512f,avx512vl")]
37363#[cfg_attr(test, assert_instr(vmovupd))]
37364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37365#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37366pub const unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
37367    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
37368    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x4());
37369}
37370
37371/// Store packed 32-bit integers from a into memory using writemask k.
37372/// mem_addr does not need to be aligned on any particular boundary.
37373///
37374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
37375#[inline]
37376#[target_feature(enable = "avx512f,avx512vl")]
37377#[cfg_attr(test, assert_instr(vmovdqu32))]
37378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37380pub const unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
37381    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
37382    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4());
37383}
37384
37385/// Store packed 64-bit integers from a into memory using writemask k.
37386/// mem_addr does not need to be aligned on any particular boundary.
37387///
37388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
37389#[inline]
37390#[target_feature(enable = "avx512f,avx512vl")]
37391#[cfg_attr(test, assert_instr(vmovdqu64))]
37392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37394pub const unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
37395    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
37396    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2());
37397}
37398
37399/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37400/// mem_addr does not need to be aligned on any particular boundary.
37401///
37402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
37403#[inline]
37404#[target_feature(enable = "avx512f,avx512vl")]
37405#[cfg_attr(test, assert_instr(vmovups))]
37406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37407#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37408pub const unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
37409    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
37410    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x4());
37411}
37412
37413/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37414/// mem_addr does not need to be aligned on any particular boundary.
37415///
37416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
37417#[inline]
37418#[target_feature(enable = "avx512f,avx512vl")]
37419#[cfg_attr(test, assert_instr(vmovupd))]
37420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37421#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37422pub const unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
37423    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
37424    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x2());
37425}
37426
37427/// Store packed 32-bit integers from a into memory using writemask k.
37428/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37429///
37430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
37431#[inline]
37432#[target_feature(enable = "avx512f")]
37433#[cfg_attr(test, assert_instr(vmovdqa32))]
37434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37436pub const unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
37437    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
37438    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x16());
37439}
37440
37441/// Store packed 64-bit integers from a into memory using writemask k.
37442/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37443///
37444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
37445#[inline]
37446#[target_feature(enable = "avx512f")]
37447#[cfg_attr(test, assert_instr(vmovdqa64))]
37448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37450pub const unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
37451    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
37452    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x8());
37453}
37454
37455/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37456/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37457///
37458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
37459#[inline]
37460#[target_feature(enable = "avx512f")]
37461#[cfg_attr(test, assert_instr(vmovaps))]
37462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37464pub const unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
37465    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
37466    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x16());
37467}
37468
37469/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37470/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37471///
37472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
37473#[inline]
37474#[target_feature(enable = "avx512f")]
37475#[cfg_attr(test, assert_instr(vmovapd))]
37476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37478pub const unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
37479    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
37480    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x8());
37481}
37482
37483/// Store packed 32-bit integers from a into memory using writemask k.
37484/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37485///
37486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
37487#[inline]
37488#[target_feature(enable = "avx512f,avx512vl")]
37489#[cfg_attr(test, assert_instr(vmovdqa32))]
37490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37492pub const unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
37493    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
37494    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x8());
37495}
37496
37497/// Store packed 64-bit integers from a into memory using writemask k.
37498/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37499///
37500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
37501#[inline]
37502#[target_feature(enable = "avx512f,avx512vl")]
37503#[cfg_attr(test, assert_instr(vmovdqa64))]
37504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37506pub const unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
37507    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
37508    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x4());
37509}
37510
37511/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37512/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37513///
37514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
37515#[inline]
37516#[target_feature(enable = "avx512f,avx512vl")]
37517#[cfg_attr(test, assert_instr(vmovaps))]
37518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37520pub const unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
37521    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
37522    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x8());
37523}
37524
37525/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37526/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37527///
37528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
37529#[inline]
37530#[target_feature(enable = "avx512f,avx512vl")]
37531#[cfg_attr(test, assert_instr(vmovapd))]
37532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37534pub const unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
37535    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
37536    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x4());
37537}
37538
37539/// Store packed 32-bit integers from a into memory using writemask k.
37540/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37541///
37542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
37543#[inline]
37544#[target_feature(enable = "avx512f,avx512vl")]
37545#[cfg_attr(test, assert_instr(vmovdqa32))]
37546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37548pub const unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
37549    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
37550    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x4());
37551}
37552
37553/// Store packed 64-bit integers from a into memory using writemask k.
37554/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37555///
37556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
37557#[inline]
37558#[target_feature(enable = "avx512f,avx512vl")]
37559#[cfg_attr(test, assert_instr(vmovdqa64))]
37560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37562pub const unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
37563    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
37564    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x2());
37565}
37566
37567/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37568/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37569///
37570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
37571#[inline]
37572#[target_feature(enable = "avx512f,avx512vl")]
37573#[cfg_attr(test, assert_instr(vmovaps))]
37574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37575#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37576pub const unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
37577    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
37578    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x4());
37579}
37580
37581/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37582/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37583///
37584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
37585#[inline]
37586#[target_feature(enable = "avx512f,avx512vl")]
37587#[cfg_attr(test, assert_instr(vmovapd))]
37588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37590pub const unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
37591    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
37592    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x2());
37593}
37594
37595/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
37596/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37597///
37598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
37599#[inline]
37600#[cfg_attr(test, assert_instr(vmovss))]
37601#[target_feature(enable = "avx512f")]
37602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37603pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
37604    asm!(
37605        vps!("vmovss", "{{{k}}}, {a}"),
37606        p = in(reg) mem_addr,
37607        k = in(kreg) k,
37608        a = in(xmm_reg) a,
37609        options(nostack, preserves_flags),
37610    );
37611}
37612
37613/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
37614/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37615///
37616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
37617#[inline]
37618#[cfg_attr(test, assert_instr(vmovsd))]
37619#[target_feature(enable = "avx512f")]
37620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37621pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
37622    asm!(
37623        vps!("vmovsd", "{{{k}}}, {a}"),
37624        p = in(reg) mem_addr,
37625        k = in(kreg) k,
37626        a = in(xmm_reg) a,
37627        options(nostack, preserves_flags),
37628    );
37629}
37630
37631/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37632///
37633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
37634#[inline]
37635#[target_feature(enable = "avx512f")]
37636#[cfg_attr(test, assert_instr(vpexpandd))]
37637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37638pub unsafe fn _mm512_mask_expandloadu_epi32(
37639    src: __m512i,
37640    k: __mmask16,
37641    mem_addr: *const i32,
37642) -> __m512i {
37643    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
37644}
37645
37646/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37647///
37648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
37649#[inline]
37650#[target_feature(enable = "avx512f")]
37651#[cfg_attr(test, assert_instr(vpexpandd))]
37652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37653pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
37654    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
37655}
37656
37657/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37658///
37659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
37660#[inline]
37661#[target_feature(enable = "avx512f,avx512vl")]
37662#[cfg_attr(test, assert_instr(vpexpandd))]
37663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37664pub unsafe fn _mm256_mask_expandloadu_epi32(
37665    src: __m256i,
37666    k: __mmask8,
37667    mem_addr: *const i32,
37668) -> __m256i {
37669    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
37670}
37671
37672/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37673///
37674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
37675#[inline]
37676#[target_feature(enable = "avx512f,avx512vl")]
37677#[cfg_attr(test, assert_instr(vpexpandd))]
37678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37679pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
37680    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
37681}
37682
37683/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37684///
37685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
37686#[inline]
37687#[target_feature(enable = "avx512f,avx512vl")]
37688#[cfg_attr(test, assert_instr(vpexpandd))]
37689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37690pub unsafe fn _mm_mask_expandloadu_epi32(
37691    src: __m128i,
37692    k: __mmask8,
37693    mem_addr: *const i32,
37694) -> __m128i {
37695    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
37696}
37697
37698/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37699///
37700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
37701#[inline]
37702#[target_feature(enable = "avx512f,avx512vl")]
37703#[cfg_attr(test, assert_instr(vpexpandd))]
37704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37705pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
37706    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
37707}
37708
37709/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37710///
37711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
37712#[inline]
37713#[target_feature(enable = "avx512f")]
37714#[cfg_attr(test, assert_instr(vpexpandq))]
37715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37716pub unsafe fn _mm512_mask_expandloadu_epi64(
37717    src: __m512i,
37718    k: __mmask8,
37719    mem_addr: *const i64,
37720) -> __m512i {
37721    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
37722}
37723
37724/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37725///
37726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
37727#[inline]
37728#[target_feature(enable = "avx512f")]
37729#[cfg_attr(test, assert_instr(vpexpandq))]
37730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37731pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
37732    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
37733}
37734
37735/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37736///
37737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
37738#[inline]
37739#[target_feature(enable = "avx512f,avx512vl")]
37740#[cfg_attr(test, assert_instr(vpexpandq))]
37741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37742pub unsafe fn _mm256_mask_expandloadu_epi64(
37743    src: __m256i,
37744    k: __mmask8,
37745    mem_addr: *const i64,
37746) -> __m256i {
37747    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
37748}
37749
37750/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37751///
37752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
37753#[inline]
37754#[target_feature(enable = "avx512f,avx512vl")]
37755#[cfg_attr(test, assert_instr(vpexpandq))]
37756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37757pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
37758    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
37759}
37760
37761/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37762///
37763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
37764#[inline]
37765#[target_feature(enable = "avx512f,avx512vl")]
37766#[cfg_attr(test, assert_instr(vpexpandq))]
37767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37768pub unsafe fn _mm_mask_expandloadu_epi64(
37769    src: __m128i,
37770    k: __mmask8,
37771    mem_addr: *const i64,
37772) -> __m128i {
37773    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
37774}
37775
37776/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37777///
37778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
37779#[inline]
37780#[target_feature(enable = "avx512f,avx512vl")]
37781#[cfg_attr(test, assert_instr(vpexpandq))]
37782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37783pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
37784    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
37785}
37786
37787/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37788///
37789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
37790#[inline]
37791#[target_feature(enable = "avx512f")]
37792#[cfg_attr(test, assert_instr(vexpandps))]
37793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37794pub unsafe fn _mm512_mask_expandloadu_ps(
37795    src: __m512,
37796    k: __mmask16,
37797    mem_addr: *const f32,
37798) -> __m512 {
37799    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
37800}
37801
37802/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37803///
37804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
37805#[inline]
37806#[target_feature(enable = "avx512f")]
37807#[cfg_attr(test, assert_instr(vexpandps))]
37808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37809pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
37810    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
37811}
37812
37813/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37814///
37815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
37816#[inline]
37817#[target_feature(enable = "avx512f,avx512vl")]
37818#[cfg_attr(test, assert_instr(vexpandps))]
37819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37820pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
37821    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
37822}
37823
37824/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37825///
37826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
37827#[inline]
37828#[target_feature(enable = "avx512f,avx512vl")]
37829#[cfg_attr(test, assert_instr(vexpandps))]
37830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37831pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
37832    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
37833}
37834
37835/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37836///
37837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
37838#[inline]
37839#[target_feature(enable = "avx512f,avx512vl")]
37840#[cfg_attr(test, assert_instr(vexpandps))]
37841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37842pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37843    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
37844}
37845
37846/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37847///
37848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
37849#[inline]
37850#[target_feature(enable = "avx512f,avx512vl")]
37851#[cfg_attr(test, assert_instr(vexpandps))]
37852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37853pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
37854    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
37855}
37856
37857/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37858///
37859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
37860#[inline]
37861#[target_feature(enable = "avx512f")]
37862#[cfg_attr(test, assert_instr(vexpandpd))]
37863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37864pub unsafe fn _mm512_mask_expandloadu_pd(
37865    src: __m512d,
37866    k: __mmask8,
37867    mem_addr: *const f64,
37868) -> __m512d {
37869    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
37870}
37871
37872/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37873///
37874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
37875#[inline]
37876#[target_feature(enable = "avx512f")]
37877#[cfg_attr(test, assert_instr(vexpandpd))]
37878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37879pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
37880    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
37881}
37882
37883/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37884///
37885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
37886#[inline]
37887#[target_feature(enable = "avx512f,avx512vl")]
37888#[cfg_attr(test, assert_instr(vexpandpd))]
37889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37890pub unsafe fn _mm256_mask_expandloadu_pd(
37891    src: __m256d,
37892    k: __mmask8,
37893    mem_addr: *const f64,
37894) -> __m256d {
37895    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
37896}
37897
37898/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37899///
37900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
37901#[inline]
37902#[target_feature(enable = "avx512f,avx512vl")]
37903#[cfg_attr(test, assert_instr(vexpandpd))]
37904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37905pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
37906    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
37907}
37908
37909/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37910///
37911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
37912#[inline]
37913#[target_feature(enable = "avx512f,avx512vl")]
37914#[cfg_attr(test, assert_instr(vexpandpd))]
37915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37916pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37917    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
37918}
37919
37920/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37921///
37922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
37923#[inline]
37924#[target_feature(enable = "avx512f,avx512vl")]
37925#[cfg_attr(test, assert_instr(vexpandpd))]
37926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37927pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37928    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
37929}
37930
37931/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
37932///
37933/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
37934#[inline]
37935#[target_feature(enable = "avx512f")]
37936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37938pub const fn _mm512_setr_pd(
37939    e0: f64,
37940    e1: f64,
37941    e2: f64,
37942    e3: f64,
37943    e4: f64,
37944    e5: f64,
37945    e6: f64,
37946    e7: f64,
37947) -> __m512d {
37948    unsafe {
37949        let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
37950        transmute(r)
37951    }
37952}
37953
37954/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
37955///
37956/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
37957#[inline]
37958#[target_feature(enable = "avx512f")]
37959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37961pub const fn _mm512_set_pd(
37962    e0: f64,
37963    e1: f64,
37964    e2: f64,
37965    e3: f64,
37966    e4: f64,
37967    e5: f64,
37968    e6: f64,
37969    e7: f64,
37970) -> __m512d {
37971    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
37972}
37973
37974/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37975///
37976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
37977#[inline]
37978#[target_feature(enable = "avx512f")]
37979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37980#[cfg_attr(test, assert_instr(vmovss))]
37981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37982pub const fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37983    unsafe {
37984        let extractsrc: f32 = simd_extract!(src, 0);
37985        let mut mov: f32 = extractsrc;
37986        if (k & 0b00000001) != 0 {
37987            mov = simd_extract!(b, 0);
37988        }
37989        simd_insert!(a, 0, mov)
37990    }
37991}
37992
37993/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37994///
37995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
37996#[inline]
37997#[target_feature(enable = "avx512f")]
37998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37999#[cfg_attr(test, assert_instr(vmovss))]
38000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38001pub const fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38002    unsafe {
38003        let mut mov: f32 = 0.;
38004        if (k & 0b00000001) != 0 {
38005            mov = simd_extract!(b, 0);
38006        }
38007        simd_insert!(a, 0, mov)
38008    }
38009}
38010
38011/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38012///
38013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
38014#[inline]
38015#[target_feature(enable = "avx512f")]
38016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38017#[cfg_attr(test, assert_instr(vmovsd))]
38018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38019pub const fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38020    unsafe {
38021        let extractsrc: f64 = simd_extract!(src, 0);
38022        let mut mov: f64 = extractsrc;
38023        if (k & 0b00000001) != 0 {
38024            mov = simd_extract!(b, 0);
38025        }
38026        simd_insert!(a, 0, mov)
38027    }
38028}
38029
38030/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38031///
38032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
38033#[inline]
38034#[target_feature(enable = "avx512f")]
38035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38036#[cfg_attr(test, assert_instr(vmovsd))]
38037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38038pub const fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38039    unsafe {
38040        let mut mov: f64 = 0.;
38041        if (k & 0b00000001) != 0 {
38042            mov = simd_extract!(b, 0);
38043        }
38044        simd_insert!(a, 0, mov)
38045    }
38046}
38047
38048/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38049///
38050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
38051#[inline]
38052#[target_feature(enable = "avx512f")]
38053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38054#[cfg_attr(test, assert_instr(vaddss))]
38055#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38056pub const fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38057    unsafe {
38058        let extractsrc: f32 = simd_extract!(src, 0);
38059        let mut add: f32 = extractsrc;
38060        if (k & 0b00000001) != 0 {
38061            let extracta: f32 = simd_extract!(a, 0);
38062            let extractb: f32 = simd_extract!(b, 0);
38063            add = extracta + extractb;
38064        }
38065        simd_insert!(a, 0, add)
38066    }
38067}
38068
38069/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38070///
38071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
38072#[inline]
38073#[target_feature(enable = "avx512f")]
38074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38075#[cfg_attr(test, assert_instr(vaddss))]
38076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38077pub const fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38078    unsafe {
38079        let mut add: f32 = 0.;
38080        if (k & 0b00000001) != 0 {
38081            let extracta: f32 = simd_extract!(a, 0);
38082            let extractb: f32 = simd_extract!(b, 0);
38083            add = extracta + extractb;
38084        }
38085        simd_insert!(a, 0, add)
38086    }
38087}
38088
38089/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38090///
38091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
38092#[inline]
38093#[target_feature(enable = "avx512f")]
38094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38095#[cfg_attr(test, assert_instr(vaddsd))]
38096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38097pub const fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38098    unsafe {
38099        let extractsrc: f64 = simd_extract!(src, 0);
38100        let mut add: f64 = extractsrc;
38101        if (k & 0b00000001) != 0 {
38102            let extracta: f64 = simd_extract!(a, 0);
38103            let extractb: f64 = simd_extract!(b, 0);
38104            add = extracta + extractb;
38105        }
38106        simd_insert!(a, 0, add)
38107    }
38108}
38109
38110/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38111///
38112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
38113#[inline]
38114#[target_feature(enable = "avx512f")]
38115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38116#[cfg_attr(test, assert_instr(vaddsd))]
38117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38118pub const fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38119    unsafe {
38120        let mut add: f64 = 0.;
38121        if (k & 0b00000001) != 0 {
38122            let extracta: f64 = simd_extract!(a, 0);
38123            let extractb: f64 = simd_extract!(b, 0);
38124            add = extracta + extractb;
38125        }
38126        simd_insert!(a, 0, add)
38127    }
38128}
38129
38130/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38131///
38132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
38133#[inline]
38134#[target_feature(enable = "avx512f")]
38135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38136#[cfg_attr(test, assert_instr(vsubss))]
38137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38138pub const fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38139    unsafe {
38140        let extractsrc: f32 = simd_extract!(src, 0);
38141        let mut add: f32 = extractsrc;
38142        if (k & 0b00000001) != 0 {
38143            let extracta: f32 = simd_extract!(a, 0);
38144            let extractb: f32 = simd_extract!(b, 0);
38145            add = extracta - extractb;
38146        }
38147        simd_insert!(a, 0, add)
38148    }
38149}
38150
38151/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38152///
38153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
38154#[inline]
38155#[target_feature(enable = "avx512f")]
38156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38157#[cfg_attr(test, assert_instr(vsubss))]
38158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38159pub const fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38160    unsafe {
38161        let mut add: f32 = 0.;
38162        if (k & 0b00000001) != 0 {
38163            let extracta: f32 = simd_extract!(a, 0);
38164            let extractb: f32 = simd_extract!(b, 0);
38165            add = extracta - extractb;
38166        }
38167        simd_insert!(a, 0, add)
38168    }
38169}
38170
38171/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38172///
38173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
38174#[inline]
38175#[target_feature(enable = "avx512f")]
38176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38177#[cfg_attr(test, assert_instr(vsubsd))]
38178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38179pub const fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38180    unsafe {
38181        let extractsrc: f64 = simd_extract!(src, 0);
38182        let mut add: f64 = extractsrc;
38183        if (k & 0b00000001) != 0 {
38184            let extracta: f64 = simd_extract!(a, 0);
38185            let extractb: f64 = simd_extract!(b, 0);
38186            add = extracta - extractb;
38187        }
38188        simd_insert!(a, 0, add)
38189    }
38190}
38191
38192/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38193///
38194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
38195#[inline]
38196#[target_feature(enable = "avx512f")]
38197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38198#[cfg_attr(test, assert_instr(vsubsd))]
38199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38200pub const fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38201    unsafe {
38202        let mut add: f64 = 0.;
38203        if (k & 0b00000001) != 0 {
38204            let extracta: f64 = simd_extract!(a, 0);
38205            let extractb: f64 = simd_extract!(b, 0);
38206            add = extracta - extractb;
38207        }
38208        simd_insert!(a, 0, add)
38209    }
38210}
38211
38212/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38213///
38214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
38215#[inline]
38216#[target_feature(enable = "avx512f")]
38217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38218#[cfg_attr(test, assert_instr(vmulss))]
38219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38220pub const fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38221    unsafe {
38222        let extractsrc: f32 = simd_extract!(src, 0);
38223        let mut add: f32 = extractsrc;
38224        if (k & 0b00000001) != 0 {
38225            let extracta: f32 = simd_extract!(a, 0);
38226            let extractb: f32 = simd_extract!(b, 0);
38227            add = extracta * extractb;
38228        }
38229        simd_insert!(a, 0, add)
38230    }
38231}
38232
38233/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38234///
38235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
38236#[inline]
38237#[target_feature(enable = "avx512f")]
38238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38239#[cfg_attr(test, assert_instr(vmulss))]
38240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38241pub const fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38242    unsafe {
38243        let mut add: f32 = 0.;
38244        if (k & 0b00000001) != 0 {
38245            let extracta: f32 = simd_extract!(a, 0);
38246            let extractb: f32 = simd_extract!(b, 0);
38247            add = extracta * extractb;
38248        }
38249        simd_insert!(a, 0, add)
38250    }
38251}
38252
38253/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38254///
38255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
38256#[inline]
38257#[target_feature(enable = "avx512f")]
38258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38259#[cfg_attr(test, assert_instr(vmulsd))]
38260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38261pub const fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38262    unsafe {
38263        let extractsrc: f64 = simd_extract!(src, 0);
38264        let mut add: f64 = extractsrc;
38265        if (k & 0b00000001) != 0 {
38266            let extracta: f64 = simd_extract!(a, 0);
38267            let extractb: f64 = simd_extract!(b, 0);
38268            add = extracta * extractb;
38269        }
38270        simd_insert!(a, 0, add)
38271    }
38272}
38273
38274/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38275///
38276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
38277#[inline]
38278#[target_feature(enable = "avx512f")]
38279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38280#[cfg_attr(test, assert_instr(vmulsd))]
38281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38282pub const fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38283    unsafe {
38284        let mut add: f64 = 0.;
38285        if (k & 0b00000001) != 0 {
38286            let extracta: f64 = simd_extract!(a, 0);
38287            let extractb: f64 = simd_extract!(b, 0);
38288            add = extracta * extractb;
38289        }
38290        simd_insert!(a, 0, add)
38291    }
38292}
38293
38294/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38295///
38296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
38297#[inline]
38298#[target_feature(enable = "avx512f")]
38299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38300#[cfg_attr(test, assert_instr(vdivss))]
38301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38302pub const fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38303    unsafe {
38304        let extractsrc: f32 = simd_extract!(src, 0);
38305        let mut add: f32 = extractsrc;
38306        if (k & 0b00000001) != 0 {
38307            let extracta: f32 = simd_extract!(a, 0);
38308            let extractb: f32 = simd_extract!(b, 0);
38309            add = extracta / extractb;
38310        }
38311        simd_insert!(a, 0, add)
38312    }
38313}
38314
38315/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38316///
38317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
38318#[inline]
38319#[target_feature(enable = "avx512f")]
38320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38321#[cfg_attr(test, assert_instr(vdivss))]
38322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38323pub const fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38324    unsafe {
38325        let mut add: f32 = 0.;
38326        if (k & 0b00000001) != 0 {
38327            let extracta: f32 = simd_extract!(a, 0);
38328            let extractb: f32 = simd_extract!(b, 0);
38329            add = extracta / extractb;
38330        }
38331        simd_insert!(a, 0, add)
38332    }
38333}
38334
38335/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38336///
38337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
38338#[inline]
38339#[target_feature(enable = "avx512f")]
38340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38341#[cfg_attr(test, assert_instr(vdivsd))]
38342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38343pub const fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38344    unsafe {
38345        let extractsrc: f64 = simd_extract!(src, 0);
38346        let mut add: f64 = extractsrc;
38347        if (k & 0b00000001) != 0 {
38348            let extracta: f64 = simd_extract!(a, 0);
38349            let extractb: f64 = simd_extract!(b, 0);
38350            add = extracta / extractb;
38351        }
38352        simd_insert!(a, 0, add)
38353    }
38354}
38355
38356/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38357///
38358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
38359#[inline]
38360#[target_feature(enable = "avx512f")]
38361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38362#[cfg_attr(test, assert_instr(vdivsd))]
38363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38364pub const fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38365    unsafe {
38366        let mut add: f64 = 0.;
38367        if (k & 0b00000001) != 0 {
38368            let extracta: f64 = simd_extract!(a, 0);
38369            let extractb: f64 = simd_extract!(b, 0);
38370            add = extracta / extractb;
38371        }
38372        simd_insert!(a, 0, add)
38373    }
38374}
38375
38376/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38377///
38378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
38379#[inline]
38380#[target_feature(enable = "avx512f")]
38381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38382#[cfg_attr(test, assert_instr(vmaxss))]
38383pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38384    unsafe {
38385        transmute(vmaxss(
38386            a.as_f32x4(),
38387            b.as_f32x4(),
38388            src.as_f32x4(),
38389            k,
38390            _MM_FROUND_CUR_DIRECTION,
38391        ))
38392    }
38393}
38394
38395/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38396///
38397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
38398#[inline]
38399#[target_feature(enable = "avx512f")]
38400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38401#[cfg_attr(test, assert_instr(vmaxss))]
38402pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38403    unsafe {
38404        transmute(vmaxss(
38405            a.as_f32x4(),
38406            b.as_f32x4(),
38407            f32x4::ZERO,
38408            k,
38409            _MM_FROUND_CUR_DIRECTION,
38410        ))
38411    }
38412}
38413
38414/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38415///
38416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
38417#[inline]
38418#[target_feature(enable = "avx512f")]
38419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38420#[cfg_attr(test, assert_instr(vmaxsd))]
38421pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38422    unsafe {
38423        transmute(vmaxsd(
38424            a.as_f64x2(),
38425            b.as_f64x2(),
38426            src.as_f64x2(),
38427            k,
38428            _MM_FROUND_CUR_DIRECTION,
38429        ))
38430    }
38431}
38432
38433/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38434///
38435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
38436#[inline]
38437#[target_feature(enable = "avx512f")]
38438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38439#[cfg_attr(test, assert_instr(vmaxsd))]
38440pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38441    unsafe {
38442        transmute(vmaxsd(
38443            a.as_f64x2(),
38444            b.as_f64x2(),
38445            f64x2::ZERO,
38446            k,
38447            _MM_FROUND_CUR_DIRECTION,
38448        ))
38449    }
38450}
38451
38452/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38453///
38454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
38455#[inline]
38456#[target_feature(enable = "avx512f")]
38457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38458#[cfg_attr(test, assert_instr(vminss))]
38459pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38460    unsafe {
38461        transmute(vminss(
38462            a.as_f32x4(),
38463            b.as_f32x4(),
38464            src.as_f32x4(),
38465            k,
38466            _MM_FROUND_CUR_DIRECTION,
38467        ))
38468    }
38469}
38470
38471/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38472///
38473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
38474#[inline]
38475#[target_feature(enable = "avx512f")]
38476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38477#[cfg_attr(test, assert_instr(vminss))]
38478pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38479    unsafe {
38480        transmute(vminss(
38481            a.as_f32x4(),
38482            b.as_f32x4(),
38483            f32x4::ZERO,
38484            k,
38485            _MM_FROUND_CUR_DIRECTION,
38486        ))
38487    }
38488}
38489
38490/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38491///
38492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
38493#[inline]
38494#[target_feature(enable = "avx512f")]
38495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38496#[cfg_attr(test, assert_instr(vminsd))]
38497pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38498    unsafe {
38499        transmute(vminsd(
38500            a.as_f64x2(),
38501            b.as_f64x2(),
38502            src.as_f64x2(),
38503            k,
38504            _MM_FROUND_CUR_DIRECTION,
38505        ))
38506    }
38507}
38508
38509/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38510///
38511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
38512#[inline]
38513#[target_feature(enable = "avx512f")]
38514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38515#[cfg_attr(test, assert_instr(vminsd))]
38516pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38517    unsafe {
38518        transmute(vminsd(
38519            a.as_f64x2(),
38520            b.as_f64x2(),
38521            f64x2::ZERO,
38522            k,
38523            _MM_FROUND_CUR_DIRECTION,
38524        ))
38525    }
38526}
38527
38528/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38529///
38530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
38531#[inline]
38532#[target_feature(enable = "avx512f")]
38533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38534#[cfg_attr(test, assert_instr(vsqrtss))]
38535pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38536    unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
38537}
38538
38539/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38540///
38541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
38542#[inline]
38543#[target_feature(enable = "avx512f")]
38544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38545#[cfg_attr(test, assert_instr(vsqrtss))]
38546pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38547    unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) }
38548}
38549
38550/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38551///
38552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
38553#[inline]
38554#[target_feature(enable = "avx512f")]
38555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38556#[cfg_attr(test, assert_instr(vsqrtsd))]
38557pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38558    unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
38559}
38560
38561/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38562///
38563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
38564#[inline]
38565#[target_feature(enable = "avx512f")]
38566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38567#[cfg_attr(test, assert_instr(vsqrtsd))]
38568pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38569    unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) }
38570}
38571
38572/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38573///
38574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
38575#[inline]
38576#[target_feature(enable = "avx512f")]
38577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38578#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38579pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
38580    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
38581}
38582
38583/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38584///
38585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
38586#[inline]
38587#[target_feature(enable = "avx512f")]
38588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38589#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38590pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38591    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
38592}
38593
38594/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38595///
38596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
38597#[inline]
38598#[target_feature(enable = "avx512f")]
38599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38600#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38601pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38602    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
38603}
38604
38605/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38606///
38607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
38608#[inline]
38609#[target_feature(enable = "avx512f")]
38610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38611#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38612pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
38613    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
38614}
38615
38616/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38617///
38618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
38619#[inline]
38620#[target_feature(enable = "avx512f")]
38621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38622#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38623pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38624    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
38625}
38626
38627/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38628///
38629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
38630#[inline]
38631#[target_feature(enable = "avx512f")]
38632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38633#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38634pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38635    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
38636}
38637
38638/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38639///
38640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
38641#[inline]
38642#[target_feature(enable = "avx512f")]
38643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38644#[cfg_attr(test, assert_instr(vrcp14ss))]
38645pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
38646    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
38647}
38648
38649/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38650///
38651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
38652#[inline]
38653#[target_feature(enable = "avx512f")]
38654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38655#[cfg_attr(test, assert_instr(vrcp14ss))]
38656pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38657    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
38658}
38659
38660/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38661///
38662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
38663#[inline]
38664#[target_feature(enable = "avx512f")]
38665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38666#[cfg_attr(test, assert_instr(vrcp14ss))]
38667pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38668    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
38669}
38670
38671/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38672///
38673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
38674#[inline]
38675#[target_feature(enable = "avx512f")]
38676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38677#[cfg_attr(test, assert_instr(vrcp14sd))]
38678pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
38679    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
38680}
38681
38682/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38683///
38684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
38685#[inline]
38686#[target_feature(enable = "avx512f")]
38687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38688#[cfg_attr(test, assert_instr(vrcp14sd))]
38689pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38690    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
38691}
38692
38693/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38694///
38695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
38696#[inline]
38697#[target_feature(enable = "avx512f")]
38698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38699#[cfg_attr(test, assert_instr(vrcp14sd))]
38700pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38701    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
38702}
38703
38704/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38705///
38706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
38707#[inline]
38708#[target_feature(enable = "avx512f")]
38709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38710#[cfg_attr(test, assert_instr(vgetexpss))]
38711pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
38712    unsafe {
38713        transmute(vgetexpss(
38714            a.as_f32x4(),
38715            b.as_f32x4(),
38716            f32x4::ZERO,
38717            0b1,
38718            _MM_FROUND_NO_EXC,
38719        ))
38720    }
38721}
38722
38723/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38724///
38725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
38726#[inline]
38727#[target_feature(enable = "avx512f")]
38728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38729#[cfg_attr(test, assert_instr(vgetexpss))]
38730pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38731    unsafe {
38732        transmute(vgetexpss(
38733            a.as_f32x4(),
38734            b.as_f32x4(),
38735            src.as_f32x4(),
38736            k,
38737            _MM_FROUND_NO_EXC,
38738        ))
38739    }
38740}
38741
38742/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38743///
38744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
38745#[inline]
38746#[target_feature(enable = "avx512f")]
38747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38748#[cfg_attr(test, assert_instr(vgetexpss))]
38749pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38750    unsafe {
38751        transmute(vgetexpss(
38752            a.as_f32x4(),
38753            b.as_f32x4(),
38754            f32x4::ZERO,
38755            k,
38756            _MM_FROUND_NO_EXC,
38757        ))
38758    }
38759}
38760
38761/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38762///
38763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
38764#[inline]
38765#[target_feature(enable = "avx512f")]
38766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38767#[cfg_attr(test, assert_instr(vgetexpsd))]
38768pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
38769    unsafe {
38770        transmute(vgetexpsd(
38771            a.as_f64x2(),
38772            b.as_f64x2(),
38773            f64x2::ZERO,
38774            0b1,
38775            _MM_FROUND_NO_EXC,
38776        ))
38777    }
38778}
38779
38780/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38781///
38782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
38783#[inline]
38784#[target_feature(enable = "avx512f")]
38785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38786#[cfg_attr(test, assert_instr(vgetexpsd))]
38787pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38788    unsafe {
38789        transmute(vgetexpsd(
38790            a.as_f64x2(),
38791            b.as_f64x2(),
38792            src.as_f64x2(),
38793            k,
38794            _MM_FROUND_NO_EXC,
38795        ))
38796    }
38797}
38798
38799/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38800///
38801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
38802#[inline]
38803#[target_feature(enable = "avx512f")]
38804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38805#[cfg_attr(test, assert_instr(vgetexpsd))]
38806pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38807    unsafe {
38808        transmute(vgetexpsd(
38809            a.as_f64x2(),
38810            b.as_f64x2(),
38811            f64x2::ZERO,
38812            k,
38813            _MM_FROUND_NO_EXC,
38814        ))
38815    }
38816}
38817
38818/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38819/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38820///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38821///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38822///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38823///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38824/// The sign is determined by sc which can take the following values:\
38825///    _MM_MANT_SIGN_src     // sign = sign(src)\
38826///    _MM_MANT_SIGN_zero    // sign = 0\
38827///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38828/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38829///
38830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
38831#[inline]
38832#[target_feature(enable = "avx512f")]
38833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38834#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38835#[rustc_legacy_const_generics(2, 3)]
38836pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
38837    a: __m128,
38838    b: __m128,
38839) -> __m128 {
38840    unsafe {
38841        static_assert_uimm_bits!(NORM, 4);
38842        static_assert_uimm_bits!(SIGN, 2);
38843        let a = a.as_f32x4();
38844        let b = b.as_f32x4();
38845        let r = vgetmantss(
38846            a,
38847            b,
38848            SIGN << 2 | NORM,
38849            f32x4::ZERO,
38850            0b1,
38851            _MM_FROUND_CUR_DIRECTION,
38852        );
38853        transmute(r)
38854    }
38855}
38856
38857/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38858/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38859///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38860///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38861///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38862///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38863/// The sign is determined by sc which can take the following values:\
38864///    _MM_MANT_SIGN_src     // sign = sign(src)\
38865///    _MM_MANT_SIGN_zero    // sign = 0\
38866///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38867/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38868///
38869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
38870#[inline]
38871#[target_feature(enable = "avx512f")]
38872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38873#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38874#[rustc_legacy_const_generics(4, 5)]
38875pub fn _mm_mask_getmant_ss<
38876    const NORM: _MM_MANTISSA_NORM_ENUM,
38877    const SIGN: _MM_MANTISSA_SIGN_ENUM,
38878>(
38879    src: __m128,
38880    k: __mmask8,
38881    a: __m128,
38882    b: __m128,
38883) -> __m128 {
38884    unsafe {
38885        static_assert_uimm_bits!(NORM, 4);
38886        static_assert_uimm_bits!(SIGN, 2);
38887        let a = a.as_f32x4();
38888        let b = b.as_f32x4();
38889        let src = src.as_f32x4();
38890        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
38891        transmute(r)
38892    }
38893}
38894
38895/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38896/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38897///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38898///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38899///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38900///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38901/// The sign is determined by sc which can take the following values:\
38902///    _MM_MANT_SIGN_src     // sign = sign(src)\
38903///    _MM_MANT_SIGN_zero    // sign = 0\
38904///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38905/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38906///
38907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
38908#[inline]
38909#[target_feature(enable = "avx512f")]
38910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38911#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38912#[rustc_legacy_const_generics(3, 4)]
38913pub fn _mm_maskz_getmant_ss<
38914    const NORM: _MM_MANTISSA_NORM_ENUM,
38915    const SIGN: _MM_MANTISSA_SIGN_ENUM,
38916>(
38917    k: __mmask8,
38918    a: __m128,
38919    b: __m128,
38920) -> __m128 {
38921    unsafe {
38922        static_assert_uimm_bits!(NORM, 4);
38923        static_assert_uimm_bits!(SIGN, 2);
38924        let a = a.as_f32x4();
38925        let b = b.as_f32x4();
38926        let r = vgetmantss(
38927            a,
38928            b,
38929            SIGN << 2 | NORM,
38930            f32x4::ZERO,
38931            k,
38932            _MM_FROUND_CUR_DIRECTION,
38933        );
38934        transmute(r)
38935    }
38936}
38937
38938/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38939/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38940///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38941///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38942///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38943///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38944/// The sign is determined by sc which can take the following values:\
38945///    _MM_MANT_SIGN_src     // sign = sign(src)\
38946///    _MM_MANT_SIGN_zero    // sign = 0\
38947///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38948/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38949///
38950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
38951#[inline]
38952#[target_feature(enable = "avx512f")]
38953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38954#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
38955#[rustc_legacy_const_generics(2, 3)]
38956pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
38957    a: __m128d,
38958    b: __m128d,
38959) -> __m128d {
38960    unsafe {
38961        static_assert_uimm_bits!(NORM, 4);
38962        static_assert_uimm_bits!(SIGN, 2);
38963        let a = a.as_f64x2();
38964        let b = b.as_f64x2();
38965        let r = vgetmantsd(
38966            a,
38967            b,
38968            SIGN << 2 | NORM,
38969            f64x2::ZERO,
38970            0b1,
38971            _MM_FROUND_CUR_DIRECTION,
38972        );
38973        transmute(r)
38974    }
38975}
38976
38977/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38978/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38979///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38980///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38981///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38982///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38983/// The sign is determined by sc which can take the following values:\
38984///    _MM_MANT_SIGN_src     // sign = sign(src)\
38985///    _MM_MANT_SIGN_zero    // sign = 0\
38986///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38987/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38988///
38989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
38990#[inline]
38991#[target_feature(enable = "avx512f")]
38992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38993#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
38994#[rustc_legacy_const_generics(4, 5)]
38995pub fn _mm_mask_getmant_sd<
38996    const NORM: _MM_MANTISSA_NORM_ENUM,
38997    const SIGN: _MM_MANTISSA_SIGN_ENUM,
38998>(
38999    src: __m128d,
39000    k: __mmask8,
39001    a: __m128d,
39002    b: __m128d,
39003) -> __m128d {
39004    unsafe {
39005        static_assert_uimm_bits!(NORM, 4);
39006        static_assert_uimm_bits!(SIGN, 2);
39007        let a = a.as_f64x2();
39008        let b = b.as_f64x2();
39009        let src = src.as_f64x2();
39010        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
39011        transmute(r)
39012    }
39013}
39014
39015/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39016/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39017///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39018///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39019///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39020///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39021/// The sign is determined by sc which can take the following values:\
39022///    _MM_MANT_SIGN_src     // sign = sign(src)\
39023///    _MM_MANT_SIGN_zero    // sign = 0\
39024///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39026///
39027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
39028#[inline]
39029#[target_feature(enable = "avx512f")]
39030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39031#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
39032#[rustc_legacy_const_generics(3, 4)]
39033pub fn _mm_maskz_getmant_sd<
39034    const NORM: _MM_MANTISSA_NORM_ENUM,
39035    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39036>(
39037    k: __mmask8,
39038    a: __m128d,
39039    b: __m128d,
39040) -> __m128d {
39041    unsafe {
39042        static_assert_uimm_bits!(NORM, 4);
39043        static_assert_uimm_bits!(SIGN, 2);
39044        let a = a.as_f64x2();
39045        let b = b.as_f64x2();
39046        let r = vgetmantsd(
39047            a,
39048            b,
39049            SIGN << 2 | NORM,
39050            f64x2::ZERO,
39051            k,
39052            _MM_FROUND_CUR_DIRECTION,
39053        );
39054        transmute(r)
39055    }
39056}
39057
39058/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39059/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39060/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39061/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39062/// * [`_MM_FROUND_TO_POS_INF`] : round up
39063/// * [`_MM_FROUND_TO_ZERO`] : truncate
39064/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39065///
39066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
39067#[inline]
39068#[target_feature(enable = "avx512f")]
39069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39070#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
39071#[rustc_legacy_const_generics(2)]
39072pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
39073    unsafe {
39074        static_assert_uimm_bits!(IMM8, 8);
39075        let a = a.as_f32x4();
39076        let b = b.as_f32x4();
39077        let r = vrndscaless(
39078            a,
39079            b,
39080            f32x4::ZERO,
39081            0b11111111,
39082            IMM8,
39083            _MM_FROUND_CUR_DIRECTION,
39084        );
39085        transmute(r)
39086    }
39087}
39088
39089/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39090/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39091/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39092/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39093/// * [`_MM_FROUND_TO_POS_INF`] : round up
39094/// * [`_MM_FROUND_TO_ZERO`] : truncate
39095/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39096///
39097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
39098#[inline]
39099#[target_feature(enable = "avx512f")]
39100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39101#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
39102#[rustc_legacy_const_generics(4)]
39103pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
39104    src: __m128,
39105    k: __mmask8,
39106    a: __m128,
39107    b: __m128,
39108) -> __m128 {
39109    unsafe {
39110        static_assert_uimm_bits!(IMM8, 8);
39111        let a = a.as_f32x4();
39112        let b = b.as_f32x4();
39113        let src = src.as_f32x4();
39114        let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
39115        transmute(r)
39116    }
39117}
39118
39119/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39120/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39121/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39122/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39123/// * [`_MM_FROUND_TO_POS_INF`] : round up
39124/// * [`_MM_FROUND_TO_ZERO`] : truncate
39125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39126///
39127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
39128#[inline]
39129#[target_feature(enable = "avx512f")]
39130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39131#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
39132#[rustc_legacy_const_generics(3)]
39133pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39134    unsafe {
39135        static_assert_uimm_bits!(IMM8, 8);
39136        let a = a.as_f32x4();
39137        let b = b.as_f32x4();
39138        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
39139        transmute(r)
39140    }
39141}
39142
39143/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39144/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39145/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39146/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39147/// * [`_MM_FROUND_TO_POS_INF`] : round up
39148/// * [`_MM_FROUND_TO_ZERO`] : truncate
39149/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39150///
39151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
39152#[inline]
39153#[target_feature(enable = "avx512f")]
39154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39155#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
39156#[rustc_legacy_const_generics(2)]
39157pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
39158    unsafe {
39159        static_assert_uimm_bits!(IMM8, 8);
39160        let a = a.as_f64x2();
39161        let b = b.as_f64x2();
39162        let r = vrndscalesd(
39163            a,
39164            b,
39165            f64x2::ZERO,
39166            0b11111111,
39167            IMM8,
39168            _MM_FROUND_CUR_DIRECTION,
39169        );
39170        transmute(r)
39171    }
39172}
39173
39174/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39175/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39176/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39177/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39178/// * [`_MM_FROUND_TO_POS_INF`] : round up
39179/// * [`_MM_FROUND_TO_ZERO`] : truncate
39180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39181///
39182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
39183#[inline]
39184#[target_feature(enable = "avx512f")]
39185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39186#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
39187#[rustc_legacy_const_generics(4)]
39188pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
39189    src: __m128d,
39190    k: __mmask8,
39191    a: __m128d,
39192    b: __m128d,
39193) -> __m128d {
39194    unsafe {
39195        static_assert_uimm_bits!(IMM8, 8);
39196        let a = a.as_f64x2();
39197        let b = b.as_f64x2();
39198        let src = src.as_f64x2();
39199        let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
39200        transmute(r)
39201    }
39202}
39203
39204/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39205/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39206/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39207/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39208/// * [`_MM_FROUND_TO_POS_INF`] : round up
39209/// * [`_MM_FROUND_TO_ZERO`] : truncate
39210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39211///
39212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
39213#[inline]
39214#[target_feature(enable = "avx512f")]
39215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39216#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
39217#[rustc_legacy_const_generics(3)]
39218pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39219    unsafe {
39220        static_assert_uimm_bits!(IMM8, 8);
39221        let a = a.as_f64x2();
39222        let b = b.as_f64x2();
39223        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
39224        transmute(r)
39225    }
39226}
39227
39228/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39229///
39230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
39231#[inline]
39232#[target_feature(enable = "avx512f")]
39233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39234#[cfg_attr(test, assert_instr(vscalefss))]
39235pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
39236    unsafe {
39237        let a = a.as_f32x4();
39238        let b = b.as_f32x4();
39239        transmute(vscalefss(
39240            a,
39241            b,
39242            f32x4::ZERO,
39243            0b11111111,
39244            _MM_FROUND_CUR_DIRECTION,
39245        ))
39246    }
39247}
39248
39249/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39250///
39251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
39252#[inline]
39253#[target_feature(enable = "avx512f")]
39254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39255#[cfg_attr(test, assert_instr(vscalefss))]
39256pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
39257    unsafe {
39258        let a = a.as_f32x4();
39259        let b = b.as_f32x4();
39260        let src = src.as_f32x4();
39261        transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
39262    }
39263}
39264
39265/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39266///
39267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
39268#[inline]
39269#[target_feature(enable = "avx512f")]
39270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39271#[cfg_attr(test, assert_instr(vscalefss))]
39272pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39273    unsafe {
39274        transmute(vscalefss(
39275            a.as_f32x4(),
39276            b.as_f32x4(),
39277            f32x4::ZERO,
39278            k,
39279            _MM_FROUND_CUR_DIRECTION,
39280        ))
39281    }
39282}
39283
39284/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39285///
39286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
39287#[inline]
39288#[target_feature(enable = "avx512f")]
39289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39290#[cfg_attr(test, assert_instr(vscalefsd))]
39291pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
39292    unsafe {
39293        transmute(vscalefsd(
39294            a.as_f64x2(),
39295            b.as_f64x2(),
39296            f64x2::ZERO,
39297            0b11111111,
39298            _MM_FROUND_CUR_DIRECTION,
39299        ))
39300    }
39301}
39302
39303/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39304///
39305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
39306#[inline]
39307#[target_feature(enable = "avx512f")]
39308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39309#[cfg_attr(test, assert_instr(vscalefsd))]
39310pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39311    unsafe {
39312        transmute(vscalefsd(
39313            a.as_f64x2(),
39314            b.as_f64x2(),
39315            src.as_f64x2(),
39316            k,
39317            _MM_FROUND_CUR_DIRECTION,
39318        ))
39319    }
39320}
39321
39322/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39323///
39324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
39325#[inline]
39326#[target_feature(enable = "avx512f")]
39327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39328#[cfg_attr(test, assert_instr(vscalefsd))]
39329pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39330    unsafe {
39331        transmute(vscalefsd(
39332            a.as_f64x2(),
39333            b.as_f64x2(),
39334            f64x2::ZERO,
39335            k,
39336            _MM_FROUND_CUR_DIRECTION,
39337        ))
39338    }
39339}
39340
39341/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39342///
39343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
39344#[inline]
39345#[target_feature(enable = "avx512f")]
39346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39347#[cfg_attr(test, assert_instr(vfmadd))]
39348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39349pub const fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39350    unsafe {
39351        let mut fmadd: f32 = simd_extract!(a, 0);
39352        if (k & 0b00000001) != 0 {
39353            let extractb: f32 = simd_extract!(b, 0);
39354            let extractc: f32 = simd_extract!(c, 0);
39355            fmadd = fmaf32(fmadd, extractb, extractc);
39356        }
39357        simd_insert!(a, 0, fmadd)
39358    }
39359}
39360
39361/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39362///
39363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
39364#[inline]
39365#[target_feature(enable = "avx512f")]
39366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39367#[cfg_attr(test, assert_instr(vfmadd))]
39368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39369pub const fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39370    unsafe {
39371        let mut fmadd: f32 = 0.;
39372        if (k & 0b00000001) != 0 {
39373            let extracta: f32 = simd_extract!(a, 0);
39374            let extractb: f32 = simd_extract!(b, 0);
39375            let extractc: f32 = simd_extract!(c, 0);
39376            fmadd = fmaf32(extracta, extractb, extractc);
39377        }
39378        simd_insert!(a, 0, fmadd)
39379    }
39380}
39381
39382/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39383///
39384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
39385#[inline]
39386#[target_feature(enable = "avx512f")]
39387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39388#[cfg_attr(test, assert_instr(vfmadd))]
39389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39390pub const fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39391    unsafe {
39392        let mut fmadd: f32 = simd_extract!(c, 0);
39393        if (k & 0b00000001) != 0 {
39394            let extracta: f32 = simd_extract!(a, 0);
39395            let extractb: f32 = simd_extract!(b, 0);
39396            fmadd = fmaf32(extracta, extractb, fmadd);
39397        }
39398        simd_insert!(c, 0, fmadd)
39399    }
39400}
39401
39402/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39403///
39404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
39405#[inline]
39406#[target_feature(enable = "avx512f")]
39407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39408#[cfg_attr(test, assert_instr(vfmadd))]
39409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39410pub const fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39411    unsafe {
39412        let mut fmadd: f64 = simd_extract!(a, 0);
39413        if (k & 0b00000001) != 0 {
39414            let extractb: f64 = simd_extract!(b, 0);
39415            let extractc: f64 = simd_extract!(c, 0);
39416            fmadd = fmaf64(fmadd, extractb, extractc);
39417        }
39418        simd_insert!(a, 0, fmadd)
39419    }
39420}
39421
39422/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39423///
39424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
39425#[inline]
39426#[target_feature(enable = "avx512f")]
39427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39428#[cfg_attr(test, assert_instr(vfmadd))]
39429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39430pub const fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39431    unsafe {
39432        let mut fmadd: f64 = 0.;
39433        if (k & 0b00000001) != 0 {
39434            let extracta: f64 = simd_extract!(a, 0);
39435            let extractb: f64 = simd_extract!(b, 0);
39436            let extractc: f64 = simd_extract!(c, 0);
39437            fmadd = fmaf64(extracta, extractb, extractc);
39438        }
39439        simd_insert!(a, 0, fmadd)
39440    }
39441}
39442
39443/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39444///
39445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
39446#[inline]
39447#[target_feature(enable = "avx512f")]
39448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39449#[cfg_attr(test, assert_instr(vfmadd))]
39450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39451pub const fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39452    unsafe {
39453        let mut fmadd: f64 = simd_extract!(c, 0);
39454        if (k & 0b00000001) != 0 {
39455            let extracta: f64 = simd_extract!(a, 0);
39456            let extractb: f64 = simd_extract!(b, 0);
39457            fmadd = fmaf64(extracta, extractb, fmadd);
39458        }
39459        simd_insert!(c, 0, fmadd)
39460    }
39461}
39462
39463/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39464///
39465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
39466#[inline]
39467#[target_feature(enable = "avx512f")]
39468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39469#[cfg_attr(test, assert_instr(vfmsub))]
39470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39471pub const fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39472    unsafe {
39473        let mut fmsub: f32 = simd_extract!(a, 0);
39474        if (k & 0b00000001) != 0 {
39475            let extractb: f32 = simd_extract!(b, 0);
39476            let extractc: f32 = simd_extract!(c, 0);
39477            let extractc = -extractc;
39478            fmsub = fmaf32(fmsub, extractb, extractc);
39479        }
39480        simd_insert!(a, 0, fmsub)
39481    }
39482}
39483
39484/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39485///
39486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
39487#[inline]
39488#[target_feature(enable = "avx512f")]
39489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39490#[cfg_attr(test, assert_instr(vfmsub))]
39491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39492pub const fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39493    unsafe {
39494        let mut fmsub: f32 = 0.;
39495        if (k & 0b00000001) != 0 {
39496            let extracta: f32 = simd_extract!(a, 0);
39497            let extractb: f32 = simd_extract!(b, 0);
39498            let extractc: f32 = simd_extract!(c, 0);
39499            let extractc = -extractc;
39500            fmsub = fmaf32(extracta, extractb, extractc);
39501        }
39502        simd_insert!(a, 0, fmsub)
39503    }
39504}
39505
39506/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39507///
39508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
39509#[inline]
39510#[target_feature(enable = "avx512f")]
39511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39512#[cfg_attr(test, assert_instr(vfmsub))]
39513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39514pub const fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39515    unsafe {
39516        let mut fmsub: f32 = simd_extract!(c, 0);
39517        if (k & 0b00000001) != 0 {
39518            let extracta: f32 = simd_extract!(a, 0);
39519            let extractb: f32 = simd_extract!(b, 0);
39520            let extractc = -fmsub;
39521            fmsub = fmaf32(extracta, extractb, extractc);
39522        }
39523        simd_insert!(c, 0, fmsub)
39524    }
39525}
39526
39527/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39528///
39529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
39530#[inline]
39531#[target_feature(enable = "avx512f")]
39532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39533#[cfg_attr(test, assert_instr(vfmsub))]
39534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39535pub const fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39536    unsafe {
39537        let mut fmsub: f64 = simd_extract!(a, 0);
39538        if (k & 0b00000001) != 0 {
39539            let extractb: f64 = simd_extract!(b, 0);
39540            let extractc: f64 = simd_extract!(c, 0);
39541            let extractc = -extractc;
39542            fmsub = fmaf64(fmsub, extractb, extractc);
39543        }
39544        simd_insert!(a, 0, fmsub)
39545    }
39546}
39547
39548/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39549///
39550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
39551#[inline]
39552#[target_feature(enable = "avx512f")]
39553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39554#[cfg_attr(test, assert_instr(vfmsub))]
39555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39556pub const fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39557    unsafe {
39558        let mut fmsub: f64 = 0.;
39559        if (k & 0b00000001) != 0 {
39560            let extracta: f64 = simd_extract!(a, 0);
39561            let extractb: f64 = simd_extract!(b, 0);
39562            let extractc: f64 = simd_extract!(c, 0);
39563            let extractc = -extractc;
39564            fmsub = fmaf64(extracta, extractb, extractc);
39565        }
39566        simd_insert!(a, 0, fmsub)
39567    }
39568}
39569
39570/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39571///
39572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
39573#[inline]
39574#[target_feature(enable = "avx512f")]
39575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39576#[cfg_attr(test, assert_instr(vfmsub))]
39577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39578pub const fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39579    unsafe {
39580        let mut fmsub: f64 = simd_extract!(c, 0);
39581        if (k & 0b00000001) != 0 {
39582            let extracta: f64 = simd_extract!(a, 0);
39583            let extractb: f64 = simd_extract!(b, 0);
39584            let extractc = -fmsub;
39585            fmsub = fmaf64(extracta, extractb, extractc);
39586        }
39587        simd_insert!(c, 0, fmsub)
39588    }
39589}
39590
39591/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39592///
39593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
39594#[inline]
39595#[target_feature(enable = "avx512f")]
39596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39597#[cfg_attr(test, assert_instr(vfnmadd))]
39598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39599pub const fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39600    unsafe {
39601        let mut fnmadd: f32 = simd_extract!(a, 0);
39602        if (k & 0b00000001) != 0 {
39603            let extracta = -fnmadd;
39604            let extractb: f32 = simd_extract!(b, 0);
39605            let extractc: f32 = simd_extract!(c, 0);
39606            fnmadd = fmaf32(extracta, extractb, extractc);
39607        }
39608        simd_insert!(a, 0, fnmadd)
39609    }
39610}
39611
39612/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39613///
39614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
39615#[inline]
39616#[target_feature(enable = "avx512f")]
39617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39618#[cfg_attr(test, assert_instr(vfnmadd))]
39619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39620pub const fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39621    unsafe {
39622        let mut fnmadd: f32 = 0.;
39623        if (k & 0b00000001) != 0 {
39624            let extracta: f32 = simd_extract!(a, 0);
39625            let extracta = -extracta;
39626            let extractb: f32 = simd_extract!(b, 0);
39627            let extractc: f32 = simd_extract!(c, 0);
39628            fnmadd = fmaf32(extracta, extractb, extractc);
39629        }
39630        simd_insert!(a, 0, fnmadd)
39631    }
39632}
39633
39634/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39635///
39636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
39637#[inline]
39638#[target_feature(enable = "avx512f")]
39639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39640#[cfg_attr(test, assert_instr(vfnmadd))]
39641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39642pub const fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39643    unsafe {
39644        let mut fnmadd: f32 = simd_extract!(c, 0);
39645        if (k & 0b00000001) != 0 {
39646            let extracta: f32 = simd_extract!(a, 0);
39647            let extracta = -extracta;
39648            let extractb: f32 = simd_extract!(b, 0);
39649            fnmadd = fmaf32(extracta, extractb, fnmadd);
39650        }
39651        simd_insert!(c, 0, fnmadd)
39652    }
39653}
39654
39655/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39656///
39657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
39658#[inline]
39659#[target_feature(enable = "avx512f")]
39660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39661#[cfg_attr(test, assert_instr(vfnmadd))]
39662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39663pub const fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39664    unsafe {
39665        let mut fnmadd: f64 = simd_extract!(a, 0);
39666        if (k & 0b00000001) != 0 {
39667            let extracta = -fnmadd;
39668            let extractb: f64 = simd_extract!(b, 0);
39669            let extractc: f64 = simd_extract!(c, 0);
39670            fnmadd = fmaf64(extracta, extractb, extractc);
39671        }
39672        simd_insert!(a, 0, fnmadd)
39673    }
39674}
39675
39676/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39677///
39678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
39679#[inline]
39680#[target_feature(enable = "avx512f")]
39681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39682#[cfg_attr(test, assert_instr(vfnmadd))]
39683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39684pub const fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39685    unsafe {
39686        let mut fnmadd: f64 = 0.;
39687        if (k & 0b00000001) != 0 {
39688            let extracta: f64 = simd_extract!(a, 0);
39689            let extracta = -extracta;
39690            let extractb: f64 = simd_extract!(b, 0);
39691            let extractc: f64 = simd_extract!(c, 0);
39692            fnmadd = fmaf64(extracta, extractb, extractc);
39693        }
39694        simd_insert!(a, 0, fnmadd)
39695    }
39696}
39697
39698/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39699///
39700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
39701#[inline]
39702#[target_feature(enable = "avx512f")]
39703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39704#[cfg_attr(test, assert_instr(vfnmadd))]
39705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39706pub const fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39707    unsafe {
39708        let mut fnmadd: f64 = simd_extract!(c, 0);
39709        if (k & 0b00000001) != 0 {
39710            let extracta: f64 = simd_extract!(a, 0);
39711            let extracta = -extracta;
39712            let extractb: f64 = simd_extract!(b, 0);
39713            fnmadd = fmaf64(extracta, extractb, fnmadd);
39714        }
39715        simd_insert!(c, 0, fnmadd)
39716    }
39717}
39718
39719/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39720///
39721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
39722#[inline]
39723#[target_feature(enable = "avx512f")]
39724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39725#[cfg_attr(test, assert_instr(vfnmsub))]
39726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39727pub const fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39728    unsafe {
39729        let mut fnmsub: f32 = simd_extract!(a, 0);
39730        if (k & 0b00000001) != 0 {
39731            let extracta = -fnmsub;
39732            let extractb: f32 = simd_extract!(b, 0);
39733            let extractc: f32 = simd_extract!(c, 0);
39734            let extractc = -extractc;
39735            fnmsub = fmaf32(extracta, extractb, extractc);
39736        }
39737        simd_insert!(a, 0, fnmsub)
39738    }
39739}
39740
39741/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39742///
39743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
39744#[inline]
39745#[target_feature(enable = "avx512f")]
39746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39747#[cfg_attr(test, assert_instr(vfnmsub))]
39748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39749pub const fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39750    unsafe {
39751        let mut fnmsub: f32 = 0.;
39752        if (k & 0b00000001) != 0 {
39753            let extracta: f32 = simd_extract!(a, 0);
39754            let extracta = -extracta;
39755            let extractb: f32 = simd_extract!(b, 0);
39756            let extractc: f32 = simd_extract!(c, 0);
39757            let extractc = -extractc;
39758            fnmsub = fmaf32(extracta, extractb, extractc);
39759        }
39760        simd_insert!(a, 0, fnmsub)
39761    }
39762}
39763
39764/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39765///
39766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
39767#[inline]
39768#[target_feature(enable = "avx512f")]
39769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39770#[cfg_attr(test, assert_instr(vfnmsub))]
39771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39772pub const fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39773    unsafe {
39774        let mut fnmsub: f32 = simd_extract!(c, 0);
39775        if (k & 0b00000001) != 0 {
39776            let extracta: f32 = simd_extract!(a, 0);
39777            let extracta = -extracta;
39778            let extractb: f32 = simd_extract!(b, 0);
39779            let extractc = -fnmsub;
39780            fnmsub = fmaf32(extracta, extractb, extractc);
39781        }
39782        simd_insert!(c, 0, fnmsub)
39783    }
39784}
39785
39786/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39787///
39788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
39789#[inline]
39790#[target_feature(enable = "avx512f")]
39791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39792#[cfg_attr(test, assert_instr(vfnmsub))]
39793#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39794pub const fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39795    unsafe {
39796        let mut fnmsub: f64 = simd_extract!(a, 0);
39797        if (k & 0b00000001) != 0 {
39798            let extracta = -fnmsub;
39799            let extractb: f64 = simd_extract!(b, 0);
39800            let extractc: f64 = simd_extract!(c, 0);
39801            let extractc = -extractc;
39802            fnmsub = fmaf64(extracta, extractb, extractc);
39803        }
39804        simd_insert!(a, 0, fnmsub)
39805    }
39806}
39807
39808/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39809///
39810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
39811#[inline]
39812#[target_feature(enable = "avx512f")]
39813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39814#[cfg_attr(test, assert_instr(vfnmsub))]
39815#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39816pub const fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39817    unsafe {
39818        let mut fnmsub: f64 = 0.;
39819        if (k & 0b00000001) != 0 {
39820            let extracta: f64 = simd_extract!(a, 0);
39821            let extracta = -extracta;
39822            let extractb: f64 = simd_extract!(b, 0);
39823            let extractc: f64 = simd_extract!(c, 0);
39824            let extractc = -extractc;
39825            fnmsub = fmaf64(extracta, extractb, extractc);
39826        }
39827        simd_insert!(a, 0, fnmsub)
39828    }
39829}
39830
39831/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39832///
39833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
39834#[inline]
39835#[target_feature(enable = "avx512f")]
39836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39837#[cfg_attr(test, assert_instr(vfnmsub))]
39838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39839pub const fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39840    unsafe {
39841        let mut fnmsub: f64 = simd_extract!(c, 0);
39842        if (k & 0b00000001) != 0 {
39843            let extracta: f64 = simd_extract!(a, 0);
39844            let extracta = -extracta;
39845            let extractb: f64 = simd_extract!(b, 0);
39846            let extractc = -fnmsub;
39847            fnmsub = fmaf64(extracta, extractb, extractc);
39848        }
39849        simd_insert!(c, 0, fnmsub)
39850    }
39851}
39852
39853/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39854///
39855/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39856/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39857/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39858/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39859/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39860/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39861///
39862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
39863#[inline]
39864#[target_feature(enable = "avx512f")]
39865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39866#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39867#[rustc_legacy_const_generics(2)]
39868pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39869    unsafe {
39870        static_assert_rounding!(ROUNDING);
39871        let a = a.as_f32x4();
39872        let b = b.as_f32x4();
39873        let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
39874        transmute(r)
39875    }
39876}
39877
39878/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39879///
39880/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39881/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39882/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39883/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39884/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39885/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39886///
39887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
39888#[inline]
39889#[target_feature(enable = "avx512f")]
39890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39891#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39892#[rustc_legacy_const_generics(4)]
39893pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
39894    src: __m128,
39895    k: __mmask8,
39896    a: __m128,
39897    b: __m128,
39898) -> __m128 {
39899    unsafe {
39900        static_assert_rounding!(ROUNDING);
39901        let a = a.as_f32x4();
39902        let b = b.as_f32x4();
39903        let src = src.as_f32x4();
39904        let r = vaddss(a, b, src, k, ROUNDING);
39905        transmute(r)
39906    }
39907}
39908
39909/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39910///
39911/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39912/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39913/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39914/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39915/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39917///
39918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
39919#[inline]
39920#[target_feature(enable = "avx512f")]
39921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39922#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39923#[rustc_legacy_const_generics(3)]
39924pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39925    unsafe {
39926        static_assert_rounding!(ROUNDING);
39927        let a = a.as_f32x4();
39928        let b = b.as_f32x4();
39929        let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
39930        transmute(r)
39931    }
39932}
39933
39934/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39935///
39936/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39937/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39938/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39939/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39940/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39942///
39943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
39944#[inline]
39945#[target_feature(enable = "avx512f")]
39946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39947#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
39948#[rustc_legacy_const_generics(2)]
39949pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39950    unsafe {
39951        static_assert_rounding!(ROUNDING);
39952        let a = a.as_f64x2();
39953        let b = b.as_f64x2();
39954        let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
39955        transmute(r)
39956    }
39957}
39958
39959/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39960///
39961/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39962/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39963/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39964/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39965/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39966/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39967///
39968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
39969#[inline]
39970#[target_feature(enable = "avx512f")]
39971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39972#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
39973#[rustc_legacy_const_generics(4)]
39974pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
39975    src: __m128d,
39976    k: __mmask8,
39977    a: __m128d,
39978    b: __m128d,
39979) -> __m128d {
39980    unsafe {
39981        static_assert_rounding!(ROUNDING);
39982        let a = a.as_f64x2();
39983        let b = b.as_f64x2();
39984        let src = src.as_f64x2();
39985        let r = vaddsd(a, b, src, k, ROUNDING);
39986        transmute(r)
39987    }
39988}
39989
39990/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39991///
39992/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39993/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39994/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39995/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39996/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39997/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39998///
39999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
40000#[inline]
40001#[target_feature(enable = "avx512f")]
40002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40003#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
40004#[rustc_legacy_const_generics(3)]
40005pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40006    unsafe {
40007        static_assert_rounding!(ROUNDING);
40008        let a = a.as_f64x2();
40009        let b = b.as_f64x2();
40010        let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
40011        transmute(r)
40012    }
40013}
40014
40015/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40016///
40017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40023///
40024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
40025#[inline]
40026#[target_feature(enable = "avx512f")]
40027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40028#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40029#[rustc_legacy_const_generics(2)]
40030pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40031    unsafe {
40032        static_assert_rounding!(ROUNDING);
40033        let a = a.as_f32x4();
40034        let b = b.as_f32x4();
40035        let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
40036        transmute(r)
40037    }
40038}
40039
40040/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40041///
40042/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40043/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40044/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40045/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40046/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40047/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40048///
40049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
40050#[inline]
40051#[target_feature(enable = "avx512f")]
40052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40053#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40054#[rustc_legacy_const_generics(4)]
40055pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
40056    src: __m128,
40057    k: __mmask8,
40058    a: __m128,
40059    b: __m128,
40060) -> __m128 {
40061    unsafe {
40062        static_assert_rounding!(ROUNDING);
40063        let a = a.as_f32x4();
40064        let b = b.as_f32x4();
40065        let src = src.as_f32x4();
40066        let r = vsubss(a, b, src, k, ROUNDING);
40067        transmute(r)
40068    }
40069}
40070
40071/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40072///
40073/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40074/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40075/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40076/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40077/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40078/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40079///
40080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
40081#[inline]
40082#[target_feature(enable = "avx512f")]
40083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40084#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40085#[rustc_legacy_const_generics(3)]
40086pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40087    unsafe {
40088        static_assert_rounding!(ROUNDING);
40089        let a = a.as_f32x4();
40090        let b = b.as_f32x4();
40091        let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
40092        transmute(r)
40093    }
40094}
40095
40096/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40097///
40098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40104///
40105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
40106#[inline]
40107#[target_feature(enable = "avx512f")]
40108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40109#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40110#[rustc_legacy_const_generics(2)]
40111pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40112    unsafe {
40113        static_assert_rounding!(ROUNDING);
40114        let a = a.as_f64x2();
40115        let b = b.as_f64x2();
40116        let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
40117        transmute(r)
40118    }
40119}
40120
40121/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40122///
40123/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40124/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40125/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40126/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40127/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40128/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40129///
40130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
40131#[inline]
40132#[target_feature(enable = "avx512f")]
40133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40134#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40135#[rustc_legacy_const_generics(4)]
40136pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
40137    src: __m128d,
40138    k: __mmask8,
40139    a: __m128d,
40140    b: __m128d,
40141) -> __m128d {
40142    unsafe {
40143        static_assert_rounding!(ROUNDING);
40144        let a = a.as_f64x2();
40145        let b = b.as_f64x2();
40146        let src = src.as_f64x2();
40147        let r = vsubsd(a, b, src, k, ROUNDING);
40148        transmute(r)
40149    }
40150}
40151
40152/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40153///
40154/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40155/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40156/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40157/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40158/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40160///
40161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
40162#[inline]
40163#[target_feature(enable = "avx512f")]
40164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40165#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40166#[rustc_legacy_const_generics(3)]
40167pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40168    unsafe {
40169        static_assert_rounding!(ROUNDING);
40170        let a = a.as_f64x2();
40171        let b = b.as_f64x2();
40172        let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
40173        transmute(r)
40174    }
40175}
40176
40177/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40178///
40179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40185///
40186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
40187#[inline]
40188#[target_feature(enable = "avx512f")]
40189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40190#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40191#[rustc_legacy_const_generics(2)]
40192pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40193    unsafe {
40194        static_assert_rounding!(ROUNDING);
40195        let a = a.as_f32x4();
40196        let b = b.as_f32x4();
40197        let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
40198        transmute(r)
40199    }
40200}
40201
40202/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40203///
40204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40210///
40211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
40212#[inline]
40213#[target_feature(enable = "avx512f")]
40214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40215#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40216#[rustc_legacy_const_generics(4)]
40217pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
40218    src: __m128,
40219    k: __mmask8,
40220    a: __m128,
40221    b: __m128,
40222) -> __m128 {
40223    unsafe {
40224        static_assert_rounding!(ROUNDING);
40225        let a = a.as_f32x4();
40226        let b = b.as_f32x4();
40227        let src = src.as_f32x4();
40228        let r = vmulss(a, b, src, k, ROUNDING);
40229        transmute(r)
40230    }
40231}
40232
40233/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40234///
40235/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40236/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40237/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40238/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40239/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40240/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40241///
40242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
40243#[inline]
40244#[target_feature(enable = "avx512f")]
40245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40246#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40247#[rustc_legacy_const_generics(3)]
40248pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40249    unsafe {
40250        static_assert_rounding!(ROUNDING);
40251        let a = a.as_f32x4();
40252        let b = b.as_f32x4();
40253        let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
40254        transmute(r)
40255    }
40256}
40257
40258/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40259///
40260/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40261/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40262/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40263/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40264/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40265/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40266///
40267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
40268#[inline]
40269#[target_feature(enable = "avx512f")]
40270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40271#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40272#[rustc_legacy_const_generics(2)]
40273pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40274    unsafe {
40275        static_assert_rounding!(ROUNDING);
40276        let a = a.as_f64x2();
40277        let b = b.as_f64x2();
40278        let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
40279        transmute(r)
40280    }
40281}
40282
40283/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40284///
40285/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40286/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40287/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40288/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40289/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40290/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40291///
40292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
40293#[inline]
40294#[target_feature(enable = "avx512f")]
40295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40296#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40297#[rustc_legacy_const_generics(4)]
40298pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
40299    src: __m128d,
40300    k: __mmask8,
40301    a: __m128d,
40302    b: __m128d,
40303) -> __m128d {
40304    unsafe {
40305        static_assert_rounding!(ROUNDING);
40306        let a = a.as_f64x2();
40307        let b = b.as_f64x2();
40308        let src = src.as_f64x2();
40309        let r = vmulsd(a, b, src, k, ROUNDING);
40310        transmute(r)
40311    }
40312}
40313
40314/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40315///
40316/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40317/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40318/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40319/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40320/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40321/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40322///
40323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
40324#[inline]
40325#[target_feature(enable = "avx512f")]
40326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40327#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40328#[rustc_legacy_const_generics(3)]
40329pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40330    unsafe {
40331        static_assert_rounding!(ROUNDING);
40332        let a = a.as_f64x2();
40333        let b = b.as_f64x2();
40334        let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
40335        transmute(r)
40336    }
40337}
40338
40339/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40340///
40341/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40342/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40343/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40344/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40345/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40346/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40347///
40348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
40349#[inline]
40350#[target_feature(enable = "avx512f")]
40351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40352#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40353#[rustc_legacy_const_generics(2)]
40354pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40355    unsafe {
40356        static_assert_rounding!(ROUNDING);
40357        let a = a.as_f32x4();
40358        let b = b.as_f32x4();
40359        let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
40360        transmute(r)
40361    }
40362}
40363
40364/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40365///
40366/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40367/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40368/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40369/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40370/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40371/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40372///
40373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
40374#[inline]
40375#[target_feature(enable = "avx512f")]
40376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40377#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40378#[rustc_legacy_const_generics(4)]
40379pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
40380    src: __m128,
40381    k: __mmask8,
40382    a: __m128,
40383    b: __m128,
40384) -> __m128 {
40385    unsafe {
40386        static_assert_rounding!(ROUNDING);
40387        let a = a.as_f32x4();
40388        let b = b.as_f32x4();
40389        let src = src.as_f32x4();
40390        let r = vdivss(a, b, src, k, ROUNDING);
40391        transmute(r)
40392    }
40393}
40394
40395/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40396///
40397/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40398/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40399/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40400/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40401/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40402/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40403///
40404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
40405#[inline]
40406#[target_feature(enable = "avx512f")]
40407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40408#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40409#[rustc_legacy_const_generics(3)]
40410pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40411    unsafe {
40412        static_assert_rounding!(ROUNDING);
40413        let a = a.as_f32x4();
40414        let b = b.as_f32x4();
40415        let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
40416        transmute(r)
40417    }
40418}
40419
40420/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40421///
40422/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40423/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40424/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40425/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40426/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40427/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40428///
40429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
40430#[inline]
40431#[target_feature(enable = "avx512f")]
40432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40433#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40434#[rustc_legacy_const_generics(2)]
40435pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40436    unsafe {
40437        static_assert_rounding!(ROUNDING);
40438        let a = a.as_f64x2();
40439        let b = b.as_f64x2();
40440        let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
40441        transmute(r)
40442    }
40443}
40444
40445/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40446///
40447/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40448/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40449/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40450/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40451/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40452/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40453///
40454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
40455#[inline]
40456#[target_feature(enable = "avx512f")]
40457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40458#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40459#[rustc_legacy_const_generics(4)]
40460pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
40461    src: __m128d,
40462    k: __mmask8,
40463    a: __m128d,
40464    b: __m128d,
40465) -> __m128d {
40466    unsafe {
40467        static_assert_rounding!(ROUNDING);
40468        let a = a.as_f64x2();
40469        let b = b.as_f64x2();
40470        let src = src.as_f64x2();
40471        let r = vdivsd(a, b, src, k, ROUNDING);
40472        transmute(r)
40473    }
40474}
40475
40476/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40477///
40478/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40479/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40480/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40481/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40482/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40483/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40484///
40485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
40486#[inline]
40487#[target_feature(enable = "avx512f")]
40488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40489#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40490#[rustc_legacy_const_generics(3)]
40491pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40492    unsafe {
40493        static_assert_rounding!(ROUNDING);
40494        let a = a.as_f64x2();
40495        let b = b.as_f64x2();
40496        let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
40497        transmute(r)
40498    }
40499}
40500
40501/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40502/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40503///
40504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
40505#[inline]
40506#[target_feature(enable = "avx512f")]
40507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40508#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40509#[rustc_legacy_const_generics(2)]
40510pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40511    unsafe {
40512        static_assert_sae!(SAE);
40513        let a = a.as_f32x4();
40514        let b = b.as_f32x4();
40515        let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
40516        transmute(r)
40517    }
40518}
40519
40520/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40521/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40522///
40523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
40524#[inline]
40525#[target_feature(enable = "avx512f")]
40526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40527#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40528#[rustc_legacy_const_generics(4)]
40529pub fn _mm_mask_max_round_ss<const SAE: i32>(
40530    src: __m128,
40531    k: __mmask8,
40532    a: __m128,
40533    b: __m128,
40534) -> __m128 {
40535    unsafe {
40536        static_assert_sae!(SAE);
40537        let a = a.as_f32x4();
40538        let b = b.as_f32x4();
40539        let src = src.as_f32x4();
40540        let r = vmaxss(a, b, src, k, SAE);
40541        transmute(r)
40542    }
40543}
40544
40545/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40546/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40547///
40548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
40549#[inline]
40550#[target_feature(enable = "avx512f")]
40551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40552#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40553#[rustc_legacy_const_generics(3)]
40554pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40555    unsafe {
40556        static_assert_sae!(SAE);
40557        let a = a.as_f32x4();
40558        let b = b.as_f32x4();
40559        let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
40560        transmute(r)
40561    }
40562}
40563
40564/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40565/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40566///
40567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
40568#[inline]
40569#[target_feature(enable = "avx512f")]
40570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40571#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40572#[rustc_legacy_const_generics(2)]
40573pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40574    unsafe {
40575        static_assert_sae!(SAE);
40576        let a = a.as_f64x2();
40577        let b = b.as_f64x2();
40578        let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
40579        transmute(r)
40580    }
40581}
40582
40583/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40584/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40585///
40586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
40587#[inline]
40588#[target_feature(enable = "avx512f")]
40589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40590#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40591#[rustc_legacy_const_generics(4)]
40592pub fn _mm_mask_max_round_sd<const SAE: i32>(
40593    src: __m128d,
40594    k: __mmask8,
40595    a: __m128d,
40596    b: __m128d,
40597) -> __m128d {
40598    unsafe {
40599        static_assert_sae!(SAE);
40600        let a = a.as_f64x2();
40601        let b = b.as_f64x2();
40602        let src = src.as_f64x2();
40603        let r = vmaxsd(a, b, src, k, SAE);
40604        transmute(r)
40605    }
40606}
40607
40608/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40609/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40610///
40611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
40612#[inline]
40613#[target_feature(enable = "avx512f")]
40614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40615#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40616#[rustc_legacy_const_generics(3)]
40617pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40618    unsafe {
40619        static_assert_sae!(SAE);
40620        let a = a.as_f64x2();
40621        let b = b.as_f64x2();
40622        let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
40623        transmute(r)
40624    }
40625}
40626
40627/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40628/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40629///
40630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
40631#[inline]
40632#[target_feature(enable = "avx512f")]
40633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40634#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40635#[rustc_legacy_const_generics(2)]
40636pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40637    unsafe {
40638        static_assert_sae!(SAE);
40639        let a = a.as_f32x4();
40640        let b = b.as_f32x4();
40641        let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
40642        transmute(r)
40643    }
40644}
40645
40646/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40647/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40648///
40649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
40650#[inline]
40651#[target_feature(enable = "avx512f")]
40652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40653#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40654#[rustc_legacy_const_generics(4)]
40655pub fn _mm_mask_min_round_ss<const SAE: i32>(
40656    src: __m128,
40657    k: __mmask8,
40658    a: __m128,
40659    b: __m128,
40660) -> __m128 {
40661    unsafe {
40662        static_assert_sae!(SAE);
40663        let a = a.as_f32x4();
40664        let b = b.as_f32x4();
40665        let src = src.as_f32x4();
40666        let r = vminss(a, b, src, k, SAE);
40667        transmute(r)
40668    }
40669}
40670
40671/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40672/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40673///
40674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
40675#[inline]
40676#[target_feature(enable = "avx512f")]
40677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40678#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40679#[rustc_legacy_const_generics(3)]
40680pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40681    unsafe {
40682        static_assert_sae!(SAE);
40683        let a = a.as_f32x4();
40684        let b = b.as_f32x4();
40685        let r = vminss(a, b, f32x4::ZERO, k, SAE);
40686        transmute(r)
40687    }
40688}
40689
40690/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
40691/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40692///
40693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
40694#[inline]
40695#[target_feature(enable = "avx512f")]
40696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40697#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40698#[rustc_legacy_const_generics(2)]
40699pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40700    unsafe {
40701        static_assert_sae!(SAE);
40702        let a = a.as_f64x2();
40703        let b = b.as_f64x2();
40704        let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
40705        transmute(r)
40706    }
40707}
40708
40709/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40710/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40711///
40712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
40713#[inline]
40714#[target_feature(enable = "avx512f")]
40715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40716#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40717#[rustc_legacy_const_generics(4)]
40718pub fn _mm_mask_min_round_sd<const SAE: i32>(
40719    src: __m128d,
40720    k: __mmask8,
40721    a: __m128d,
40722    b: __m128d,
40723) -> __m128d {
40724    unsafe {
40725        static_assert_sae!(SAE);
40726        let a = a.as_f64x2();
40727        let b = b.as_f64x2();
40728        let src = src.as_f64x2();
40729        let r = vminsd(a, b, src, k, SAE);
40730        transmute(r)
40731    }
40732}
40733
40734/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40735/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40736///
40737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
40738#[inline]
40739#[target_feature(enable = "avx512f")]
40740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40741#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40742#[rustc_legacy_const_generics(3)]
40743pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40744    unsafe {
40745        static_assert_sae!(SAE);
40746        let a = a.as_f64x2();
40747        let b = b.as_f64x2();
40748        let r = vminsd(a, b, f64x2::ZERO, k, SAE);
40749        transmute(r)
40750    }
40751}
40752
40753/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40754///
40755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40761///
40762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
40763#[inline]
40764#[target_feature(enable = "avx512f")]
40765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40766#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40767#[rustc_legacy_const_generics(2)]
40768pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40769    unsafe {
40770        static_assert_rounding!(ROUNDING);
40771        vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
40772    }
40773}
40774
40775/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40776///
40777/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40778/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40779/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40780/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40781/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40782/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40783///
40784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
40785#[inline]
40786#[target_feature(enable = "avx512f")]
40787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40788#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40789#[rustc_legacy_const_generics(4)]
40790pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
40791    src: __m128,
40792    k: __mmask8,
40793    a: __m128,
40794    b: __m128,
40795) -> __m128 {
40796    unsafe {
40797        static_assert_rounding!(ROUNDING);
40798        vsqrtss(a, b, src, k, ROUNDING)
40799    }
40800}
40801
40802/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40803///
40804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40810///
40811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
40812#[inline]
40813#[target_feature(enable = "avx512f")]
40814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40815#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40816#[rustc_legacy_const_generics(3)]
40817pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40818    unsafe {
40819        static_assert_rounding!(ROUNDING);
40820        vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
40821    }
40822}
40823
40824/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40825///
40826/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40827/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40828/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40829/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40830/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40831/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40832///
40833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
40834#[inline]
40835#[target_feature(enable = "avx512f")]
40836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40837#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40838#[rustc_legacy_const_generics(2)]
40839pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40840    unsafe {
40841        static_assert_rounding!(ROUNDING);
40842        vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
40843    }
40844}
40845
40846/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40847///
40848/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40849/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40850/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40851/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40852/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40853/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40854///
40855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
40856#[inline]
40857#[target_feature(enable = "avx512f")]
40858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40859#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40860#[rustc_legacy_const_generics(4)]
40861pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
40862    src: __m128d,
40863    k: __mmask8,
40864    a: __m128d,
40865    b: __m128d,
40866) -> __m128d {
40867    unsafe {
40868        static_assert_rounding!(ROUNDING);
40869        vsqrtsd(a, b, src, k, ROUNDING)
40870    }
40871}
40872
40873/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40874///
40875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40881///
40882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
40883#[inline]
40884#[target_feature(enable = "avx512f")]
40885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40886#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40887#[rustc_legacy_const_generics(3)]
40888pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
40889    k: __mmask8,
40890    a: __m128d,
40891    b: __m128d,
40892) -> __m128d {
40893    unsafe {
40894        static_assert_rounding!(ROUNDING);
40895        vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
40896    }
40897}
40898
40899/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40900/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40901///
40902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
40903#[inline]
40904#[target_feature(enable = "avx512f")]
40905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40906#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40907#[rustc_legacy_const_generics(2)]
40908pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40909    unsafe {
40910        static_assert_sae!(SAE);
40911        let a = a.as_f32x4();
40912        let b = b.as_f32x4();
40913        let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
40914        transmute(r)
40915    }
40916}
40917
40918/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40920///
40921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
40922#[inline]
40923#[target_feature(enable = "avx512f")]
40924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40925#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40926#[rustc_legacy_const_generics(4)]
40927pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
40928    src: __m128,
40929    k: __mmask8,
40930    a: __m128,
40931    b: __m128,
40932) -> __m128 {
40933    unsafe {
40934        static_assert_sae!(SAE);
40935        let a = a.as_f32x4();
40936        let b = b.as_f32x4();
40937        let src = src.as_f32x4();
40938        let r = vgetexpss(a, b, src, k, SAE);
40939        transmute(r)
40940    }
40941}
40942
40943/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40944/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40945///
40946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
40947#[inline]
40948#[target_feature(enable = "avx512f")]
40949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40950#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40951#[rustc_legacy_const_generics(3)]
40952pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40953    unsafe {
40954        static_assert_sae!(SAE);
40955        let a = a.as_f32x4();
40956        let b = b.as_f32x4();
40957        let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
40958        transmute(r)
40959    }
40960}
40961
40962/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40963/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40964///
40965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
40966#[inline]
40967#[target_feature(enable = "avx512f")]
40968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40969#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
40970#[rustc_legacy_const_generics(2)]
40971pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40972    unsafe {
40973        static_assert_sae!(SAE);
40974        let a = a.as_f64x2();
40975        let b = b.as_f64x2();
40976        let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
40977        transmute(r)
40978    }
40979}
40980
40981/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40982/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40983///
40984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
40985#[inline]
40986#[target_feature(enable = "avx512f")]
40987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40988#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
40989#[rustc_legacy_const_generics(4)]
40990pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
40991    src: __m128d,
40992    k: __mmask8,
40993    a: __m128d,
40994    b: __m128d,
40995) -> __m128d {
40996    unsafe {
40997        static_assert_sae!(SAE);
40998        let a = a.as_f64x2();
40999        let b = b.as_f64x2();
41000        let src = src.as_f64x2();
41001        let r = vgetexpsd(a, b, src, k, SAE);
41002        transmute(r)
41003    }
41004}
41005
41006/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
41007/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41008///
41009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
41010#[inline]
41011#[target_feature(enable = "avx512f")]
41012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41013#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
41014#[rustc_legacy_const_generics(3)]
41015pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
41016    unsafe {
41017        static_assert_sae!(SAE);
41018        let a = a.as_f64x2();
41019        let b = b.as_f64x2();
41020        let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
41021        transmute(r)
41022    }
41023}
41024
41025/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41026/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41027///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41028///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41029///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41030///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41031/// The sign is determined by sc which can take the following values:\
41032///    _MM_MANT_SIGN_src     // sign = sign(src)\
41033///    _MM_MANT_SIGN_zero    // sign = 0\
41034///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41035/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41036///
41037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
41038#[inline]
41039#[target_feature(enable = "avx512f")]
41040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41041#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41042#[rustc_legacy_const_generics(2, 3, 4)]
41043pub fn _mm_getmant_round_ss<
41044    const NORM: _MM_MANTISSA_NORM_ENUM,
41045    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41046    const SAE: i32,
41047>(
41048    a: __m128,
41049    b: __m128,
41050) -> __m128 {
41051    unsafe {
41052        static_assert_uimm_bits!(NORM, 4);
41053        static_assert_uimm_bits!(SIGN, 2);
41054        static_assert_mantissas_sae!(SAE);
41055        let a = a.as_f32x4();
41056        let b = b.as_f32x4();
41057        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
41058        transmute(r)
41059    }
41060}
41061
41062/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41063/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41064///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41065///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41066///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41067///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41068/// The sign is determined by sc which can take the following values:\
41069///    _MM_MANT_SIGN_src     // sign = sign(src)\
41070///    _MM_MANT_SIGN_zero    // sign = 0\
41071///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41072/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41073///
41074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
41075#[inline]
41076#[target_feature(enable = "avx512f")]
41077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41078#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41079#[rustc_legacy_const_generics(4, 5, 6)]
41080pub fn _mm_mask_getmant_round_ss<
41081    const NORM: _MM_MANTISSA_NORM_ENUM,
41082    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41083    const SAE: i32,
41084>(
41085    src: __m128,
41086    k: __mmask8,
41087    a: __m128,
41088    b: __m128,
41089) -> __m128 {
41090    unsafe {
41091        static_assert_uimm_bits!(NORM, 4);
41092        static_assert_uimm_bits!(SIGN, 2);
41093        static_assert_mantissas_sae!(SAE);
41094        let a = a.as_f32x4();
41095        let b = b.as_f32x4();
41096        let src = src.as_f32x4();
41097        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
41098        transmute(r)
41099    }
41100}
41101
41102/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41103/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41104///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41105///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41106///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41107///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41108/// The sign is determined by sc which can take the following values:\
41109///    _MM_MANT_SIGN_src     // sign = sign(src)\
41110///    _MM_MANT_SIGN_zero    // sign = 0\
41111///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41112/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41113///
41114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
41115#[inline]
41116#[target_feature(enable = "avx512f")]
41117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41118#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41119#[rustc_legacy_const_generics(3, 4, 5)]
41120pub fn _mm_maskz_getmant_round_ss<
41121    const NORM: _MM_MANTISSA_NORM_ENUM,
41122    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41123    const SAE: i32,
41124>(
41125    k: __mmask8,
41126    a: __m128,
41127    b: __m128,
41128) -> __m128 {
41129    unsafe {
41130        static_assert_uimm_bits!(NORM, 4);
41131        static_assert_uimm_bits!(SIGN, 2);
41132        static_assert_mantissas_sae!(SAE);
41133        let a = a.as_f32x4();
41134        let b = b.as_f32x4();
41135        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
41136        transmute(r)
41137    }
41138}
41139
41140/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41141/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41142///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41143///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41144///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41145///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41146/// The sign is determined by sc which can take the following values:\
41147///    _MM_MANT_SIGN_src     // sign = sign(src)\
41148///    _MM_MANT_SIGN_zero    // sign = 0\
41149///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41150/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41151///
41152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
41153#[inline]
41154#[target_feature(enable = "avx512f")]
41155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41156#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41157#[rustc_legacy_const_generics(2, 3, 4)]
41158pub fn _mm_getmant_round_sd<
41159    const NORM: _MM_MANTISSA_NORM_ENUM,
41160    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41161    const SAE: i32,
41162>(
41163    a: __m128d,
41164    b: __m128d,
41165) -> __m128d {
41166    unsafe {
41167        static_assert_uimm_bits!(NORM, 4);
41168        static_assert_uimm_bits!(SIGN, 2);
41169        static_assert_mantissas_sae!(SAE);
41170        let a = a.as_f64x2();
41171        let b = b.as_f64x2();
41172        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
41173        transmute(r)
41174    }
41175}
41176
41177/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41178/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41179///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41180///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41181///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41182///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41183/// The sign is determined by sc which can take the following values:\
41184///    _MM_MANT_SIGN_src     // sign = sign(src)\
41185///    _MM_MANT_SIGN_zero    // sign = 0\
41186///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41187/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41188///
41189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
41190#[inline]
41191#[target_feature(enable = "avx512f")]
41192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41193#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41194#[rustc_legacy_const_generics(4, 5, 6)]
41195pub fn _mm_mask_getmant_round_sd<
41196    const NORM: _MM_MANTISSA_NORM_ENUM,
41197    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41198    const SAE: i32,
41199>(
41200    src: __m128d,
41201    k: __mmask8,
41202    a: __m128d,
41203    b: __m128d,
41204) -> __m128d {
41205    unsafe {
41206        static_assert_uimm_bits!(NORM, 4);
41207        static_assert_uimm_bits!(SIGN, 2);
41208        static_assert_mantissas_sae!(SAE);
41209        let a = a.as_f64x2();
41210        let b = b.as_f64x2();
41211        let src = src.as_f64x2();
41212        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
41213        transmute(r)
41214    }
41215}
41216
41217/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41218/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41219///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41220///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41221///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41222///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41223/// The sign is determined by sc which can take the following values:\
41224///    _MM_MANT_SIGN_src     // sign = sign(src)\
41225///    _MM_MANT_SIGN_zero    // sign = 0\
41226///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41227/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41228///
41229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
41230#[inline]
41231#[target_feature(enable = "avx512f")]
41232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41233#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41234#[rustc_legacy_const_generics(3, 4, 5)]
41235pub fn _mm_maskz_getmant_round_sd<
41236    const NORM: _MM_MANTISSA_NORM_ENUM,
41237    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41238    const SAE: i32,
41239>(
41240    k: __mmask8,
41241    a: __m128d,
41242    b: __m128d,
41243) -> __m128d {
41244    unsafe {
41245        static_assert_uimm_bits!(NORM, 4);
41246        static_assert_uimm_bits!(SIGN, 2);
41247        static_assert_mantissas_sae!(SAE);
41248        let a = a.as_f64x2();
41249        let b = b.as_f64x2();
41250        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
41251        transmute(r)
41252    }
41253}
41254
41255/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41256/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41257/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41258/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41259/// * [`_MM_FROUND_TO_POS_INF`] : round up
41260/// * [`_MM_FROUND_TO_ZERO`] : truncate
41261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41262///
41263/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
41265#[inline]
41266#[target_feature(enable = "avx512f")]
41267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41268#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41269#[rustc_legacy_const_generics(2, 3)]
41270pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
41271    unsafe {
41272        static_assert_uimm_bits!(IMM8, 8);
41273        static_assert_mantissas_sae!(SAE);
41274        let a = a.as_f32x4();
41275        let b = b.as_f32x4();
41276        let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
41277        transmute(r)
41278    }
41279}
41280
41281/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41282/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41283/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41284/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41285/// * [`_MM_FROUND_TO_POS_INF`] : round up
41286/// * [`_MM_FROUND_TO_ZERO`] : truncate
41287/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41288///
41289/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
41291#[inline]
41292#[target_feature(enable = "avx512f")]
41293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41294#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41295#[rustc_legacy_const_generics(4, 5)]
41296pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
41297    src: __m128,
41298    k: __mmask8,
41299    a: __m128,
41300    b: __m128,
41301) -> __m128 {
41302    unsafe {
41303        static_assert_uimm_bits!(IMM8, 8);
41304        static_assert_mantissas_sae!(SAE);
41305        let a = a.as_f32x4();
41306        let b = b.as_f32x4();
41307        let src = src.as_f32x4();
41308        let r = vrndscaless(a, b, src, k, IMM8, SAE);
41309        transmute(r)
41310    }
41311}
41312
41313/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41314/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41315/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41316/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41317/// * [`_MM_FROUND_TO_POS_INF`] : round up
41318/// * [`_MM_FROUND_TO_ZERO`] : truncate
41319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41320///
41321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
41323#[inline]
41324#[target_feature(enable = "avx512f")]
41325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41326#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41327#[rustc_legacy_const_generics(3, 4)]
41328pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
41329    k: __mmask8,
41330    a: __m128,
41331    b: __m128,
41332) -> __m128 {
41333    unsafe {
41334        static_assert_uimm_bits!(IMM8, 8);
41335        static_assert_mantissas_sae!(SAE);
41336        let a = a.as_f32x4();
41337        let b = b.as_f32x4();
41338        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
41339        transmute(r)
41340    }
41341}
41342
41343/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41344/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41345/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41346/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41347/// * [`_MM_FROUND_TO_POS_INF`] : round up
41348/// * [`_MM_FROUND_TO_ZERO`] : truncate
41349/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41350///
41351/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
41353#[inline]
41354#[target_feature(enable = "avx512f")]
41355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41356#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41357#[rustc_legacy_const_generics(2, 3)]
41358pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
41359    unsafe {
41360        static_assert_uimm_bits!(IMM8, 8);
41361        static_assert_mantissas_sae!(SAE);
41362        let a = a.as_f64x2();
41363        let b = b.as_f64x2();
41364        let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
41365        transmute(r)
41366    }
41367}
41368
41369/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41370/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41371/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41372/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41373/// * [`_MM_FROUND_TO_POS_INF`] : round up
41374/// * [`_MM_FROUND_TO_ZERO`] : truncate
41375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41376///
41377/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
41379#[inline]
41380#[target_feature(enable = "avx512f")]
41381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41382#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41383#[rustc_legacy_const_generics(4, 5)]
41384pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
41385    src: __m128d,
41386    k: __mmask8,
41387    a: __m128d,
41388    b: __m128d,
41389) -> __m128d {
41390    unsafe {
41391        static_assert_uimm_bits!(IMM8, 8);
41392        static_assert_mantissas_sae!(SAE);
41393        let a = a.as_f64x2();
41394        let b = b.as_f64x2();
41395        let src = src.as_f64x2();
41396        let r = vrndscalesd(a, b, src, k, IMM8, SAE);
41397        transmute(r)
41398    }
41399}
41400
41401/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41402/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41403/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41404/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41405/// * [`_MM_FROUND_TO_POS_INF`] : round up
41406/// * [`_MM_FROUND_TO_ZERO`] : truncate
41407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41408///
41409/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
41411#[inline]
41412#[target_feature(enable = "avx512f")]
41413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41414#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41415#[rustc_legacy_const_generics(3, 4)]
41416pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
41417    k: __mmask8,
41418    a: __m128d,
41419    b: __m128d,
41420) -> __m128d {
41421    unsafe {
41422        static_assert_uimm_bits!(IMM8, 8);
41423        static_assert_mantissas_sae!(SAE);
41424        let a = a.as_f64x2();
41425        let b = b.as_f64x2();
41426        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
41427        transmute(r)
41428    }
41429}
41430
41431/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41432///
41433/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41434/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41435/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41436/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41437/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41438/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41439///
41440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
41441#[inline]
41442#[target_feature(enable = "avx512f")]
41443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41444#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41445#[rustc_legacy_const_generics(2)]
41446pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
41447    unsafe {
41448        static_assert_rounding!(ROUNDING);
41449        let a = a.as_f32x4();
41450        let b = b.as_f32x4();
41451        let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
41452        transmute(r)
41453    }
41454}
41455
41456/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41457///
41458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41464///
41465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
41466#[inline]
41467#[target_feature(enable = "avx512f")]
41468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41469#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41470#[rustc_legacy_const_generics(4)]
41471pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
41472    src: __m128,
41473    k: __mmask8,
41474    a: __m128,
41475    b: __m128,
41476) -> __m128 {
41477    unsafe {
41478        static_assert_rounding!(ROUNDING);
41479        let a = a.as_f32x4();
41480        let b = b.as_f32x4();
41481        let src = src.as_f32x4();
41482        let r = vscalefss(a, b, src, k, ROUNDING);
41483        transmute(r)
41484    }
41485}
41486
41487/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41488///
41489/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41490/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41491/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41492/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41493/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41494/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41495///
41496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
41497#[inline]
41498#[target_feature(enable = "avx512f")]
41499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41500#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41501#[rustc_legacy_const_generics(3)]
41502pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
41503    unsafe {
41504        static_assert_rounding!(ROUNDING);
41505        let a = a.as_f32x4();
41506        let b = b.as_f32x4();
41507        let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
41508        transmute(r)
41509    }
41510}
41511
41512/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41513///
41514/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41515/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41516/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41517/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41518/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41519/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41520///
41521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
41522#[inline]
41523#[target_feature(enable = "avx512f")]
41524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41525#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41526#[rustc_legacy_const_generics(2)]
41527pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
41528    unsafe {
41529        static_assert_rounding!(ROUNDING);
41530        let a = a.as_f64x2();
41531        let b = b.as_f64x2();
41532        let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
41533        transmute(r)
41534    }
41535}
41536
41537/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41538///
41539/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41545///
41546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
41547#[inline]
41548#[target_feature(enable = "avx512f")]
41549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41550#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41551#[rustc_legacy_const_generics(4)]
41552pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
41553    src: __m128d,
41554    k: __mmask8,
41555    a: __m128d,
41556    b: __m128d,
41557) -> __m128d {
41558    unsafe {
41559        let a = a.as_f64x2();
41560        let b = b.as_f64x2();
41561        let src = src.as_f64x2();
41562        let r = vscalefsd(a, b, src, k, ROUNDING);
41563        transmute(r)
41564    }
41565}
41566
41567/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41568///
41569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41575///
41576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
41577#[inline]
41578#[target_feature(enable = "avx512f")]
41579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41580#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41581#[rustc_legacy_const_generics(3)]
41582pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
41583    k: __mmask8,
41584    a: __m128d,
41585    b: __m128d,
41586) -> __m128d {
41587    unsafe {
41588        static_assert_rounding!(ROUNDING);
41589        let a = a.as_f64x2();
41590        let b = b.as_f64x2();
41591        let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
41592        transmute(r)
41593    }
41594}
41595
41596/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41597///
41598/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41599/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41600/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41601/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41602/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41603/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41604///
41605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
41606#[inline]
41607#[target_feature(enable = "avx512f")]
41608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41609#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41610#[rustc_legacy_const_generics(3)]
41611pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
41612    unsafe {
41613        static_assert_rounding!(ROUNDING);
41614        let extracta: f32 = simd_extract!(a, 0);
41615        let extractb: f32 = simd_extract!(b, 0);
41616        let extractc: f32 = simd_extract!(c, 0);
41617        let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41618        simd_insert!(a, 0, r)
41619    }
41620}
41621
41622/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41623///
41624/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41625/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41626/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41627/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41628/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41630///
41631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
41632#[inline]
41633#[target_feature(enable = "avx512f")]
41634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41635#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41636#[rustc_legacy_const_generics(4)]
41637pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
41638    a: __m128,
41639    k: __mmask8,
41640    b: __m128,
41641    c: __m128,
41642) -> __m128 {
41643    unsafe {
41644        static_assert_rounding!(ROUNDING);
41645        let mut fmadd: f32 = simd_extract!(a, 0);
41646        if (k & 0b00000001) != 0 {
41647            let extractb: f32 = simd_extract!(b, 0);
41648            let extractc: f32 = simd_extract!(c, 0);
41649            fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
41650        }
41651        simd_insert!(a, 0, fmadd)
41652    }
41653}
41654
41655/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41656///
41657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41663///
41664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
41665#[inline]
41666#[target_feature(enable = "avx512f")]
41667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41668#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41669#[rustc_legacy_const_generics(4)]
41670pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
41671    k: __mmask8,
41672    a: __m128,
41673    b: __m128,
41674    c: __m128,
41675) -> __m128 {
41676    unsafe {
41677        static_assert_rounding!(ROUNDING);
41678        let mut fmadd: f32 = 0.;
41679        if (k & 0b00000001) != 0 {
41680            let extracta: f32 = simd_extract!(a, 0);
41681            let extractb: f32 = simd_extract!(b, 0);
41682            let extractc: f32 = simd_extract!(c, 0);
41683            fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41684        }
41685        simd_insert!(a, 0, fmadd)
41686    }
41687}
41688
41689/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
41690///
41691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41697///
41698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
41699#[inline]
41700#[target_feature(enable = "avx512f")]
41701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41702#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41703#[rustc_legacy_const_generics(4)]
41704pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
41705    a: __m128,
41706    b: __m128,
41707    c: __m128,
41708    k: __mmask8,
41709) -> __m128 {
41710    unsafe {
41711        static_assert_rounding!(ROUNDING);
41712        let mut fmadd: f32 = simd_extract!(c, 0);
41713        if (k & 0b00000001) != 0 {
41714            let extracta: f32 = simd_extract!(a, 0);
41715            let extractb: f32 = simd_extract!(b, 0);
41716            fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
41717        }
41718        simd_insert!(c, 0, fmadd)
41719    }
41720}
41721
41722/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41723///
41724/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41725/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41726/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41727/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41728/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41729/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41730///
41731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
41732#[inline]
41733#[target_feature(enable = "avx512f")]
41734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41735#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41736#[rustc_legacy_const_generics(3)]
41737pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
41738    unsafe {
41739        static_assert_rounding!(ROUNDING);
41740        let extracta: f64 = simd_extract!(a, 0);
41741        let extractb: f64 = simd_extract!(b, 0);
41742        let extractc: f64 = simd_extract!(c, 0);
41743        let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
41744        simd_insert!(a, 0, fmadd)
41745    }
41746}
41747
41748/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41749///
41750/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41751/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41752/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41753/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41754/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41755/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41756///
41757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
41758#[inline]
41759#[target_feature(enable = "avx512f")]
41760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41761#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41762#[rustc_legacy_const_generics(4)]
41763pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
41764    a: __m128d,
41765    k: __mmask8,
41766    b: __m128d,
41767    c: __m128d,
41768) -> __m128d {
41769    unsafe {
41770        static_assert_rounding!(ROUNDING);
41771        let mut fmadd: f64 = simd_extract!(a, 0);
41772        if (k & 0b00000001) != 0 {
41773            let extractb: f64 = simd_extract!(b, 0);
41774            let extractc: f64 = simd_extract!(c, 0);
41775            fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
41776        }
41777        simd_insert!(a, 0, fmadd)
41778    }
41779}
41780
41781/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41782///
41783/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41784/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41785/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41786/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41787/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41788/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41789///
41790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
41791#[inline]
41792#[target_feature(enable = "avx512f")]
41793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41794#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41795#[rustc_legacy_const_generics(4)]
41796pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
41797    k: __mmask8,
41798    a: __m128d,
41799    b: __m128d,
41800    c: __m128d,
41801) -> __m128d {
41802    unsafe {
41803        static_assert_rounding!(ROUNDING);
41804        let mut fmadd: f64 = 0.;
41805        if (k & 0b00000001) != 0 {
41806            let extracta: f64 = simd_extract!(a, 0);
41807            let extractb: f64 = simd_extract!(b, 0);
41808            let extractc: f64 = simd_extract!(c, 0);
41809            fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
41810        }
41811        simd_insert!(a, 0, fmadd)
41812    }
41813}
41814
41815/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
41816///
41817/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41818/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41819/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41820/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41821/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41822/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41823///
41824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
41825#[inline]
41826#[target_feature(enable = "avx512f")]
41827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41828#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41829#[rustc_legacy_const_generics(4)]
41830pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
41831    a: __m128d,
41832    b: __m128d,
41833    c: __m128d,
41834    k: __mmask8,
41835) -> __m128d {
41836    unsafe {
41837        static_assert_rounding!(ROUNDING);
41838        let mut fmadd: f64 = simd_extract!(c, 0);
41839        if (k & 0b00000001) != 0 {
41840            let extracta: f64 = simd_extract!(a, 0);
41841            let extractb: f64 = simd_extract!(b, 0);
41842            fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
41843        }
41844        simd_insert!(c, 0, fmadd)
41845    }
41846}
41847
41848/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41849///
41850/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41851/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41852/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41853/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41854/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41855/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41856///
41857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
41858#[inline]
41859#[target_feature(enable = "avx512f")]
41860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41861#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41862#[rustc_legacy_const_generics(3)]
41863pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
41864    unsafe {
41865        static_assert_rounding!(ROUNDING);
41866        let extracta: f32 = simd_extract!(a, 0);
41867        let extractb: f32 = simd_extract!(b, 0);
41868        let extractc: f32 = simd_extract!(c, 0);
41869        let extractc = -extractc;
41870        let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41871        simd_insert!(a, 0, fmsub)
41872    }
41873}
41874
41875/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41876///
41877/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41878/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41879/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41880/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41881/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41882/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41883///
41884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
41885#[inline]
41886#[target_feature(enable = "avx512f")]
41887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41888#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41889#[rustc_legacy_const_generics(4)]
41890pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
41891    a: __m128,
41892    k: __mmask8,
41893    b: __m128,
41894    c: __m128,
41895) -> __m128 {
41896    unsafe {
41897        static_assert_rounding!(ROUNDING);
41898        let mut fmsub: f32 = simd_extract!(a, 0);
41899        if (k & 0b00000001) != 0 {
41900            let extractb: f32 = simd_extract!(b, 0);
41901            let extractc: f32 = simd_extract!(c, 0);
41902            let extractc = -extractc;
41903            fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
41904        }
41905        simd_insert!(a, 0, fmsub)
41906    }
41907}
41908
41909/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41910///
41911/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41912/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41913/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41914/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41915/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41917///
41918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
41919#[inline]
41920#[target_feature(enable = "avx512f")]
41921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41922#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41923#[rustc_legacy_const_generics(4)]
41924pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
41925    k: __mmask8,
41926    a: __m128,
41927    b: __m128,
41928    c: __m128,
41929) -> __m128 {
41930    unsafe {
41931        static_assert_rounding!(ROUNDING);
41932        let mut fmsub: f32 = 0.;
41933        if (k & 0b00000001) != 0 {
41934            let extracta: f32 = simd_extract!(a, 0);
41935            let extractb: f32 = simd_extract!(b, 0);
41936            let extractc: f32 = simd_extract!(c, 0);
41937            let extractc = -extractc;
41938            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41939        }
41940        simd_insert!(a, 0, fmsub)
41941    }
41942}
41943
41944/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
41945///
41946/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41947/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41948/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41949/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41950/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41951/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41952///
41953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
41954#[inline]
41955#[target_feature(enable = "avx512f")]
41956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41957#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41958#[rustc_legacy_const_generics(4)]
41959pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
41960    a: __m128,
41961    b: __m128,
41962    c: __m128,
41963    k: __mmask8,
41964) -> __m128 {
41965    unsafe {
41966        static_assert_rounding!(ROUNDING);
41967        let mut fmsub: f32 = simd_extract!(c, 0);
41968        if (k & 0b00000001) != 0 {
41969            let extracta: f32 = simd_extract!(a, 0);
41970            let extractb: f32 = simd_extract!(b, 0);
41971            let extractc = -fmsub;
41972            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41973        }
41974        simd_insert!(c, 0, fmsub)
41975    }
41976}
41977
41978/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41979///
41980/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41981/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41982/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41983/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41984/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41985/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41986///
41987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
41988#[inline]
41989#[target_feature(enable = "avx512f")]
41990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41991#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41992#[rustc_legacy_const_generics(3)]
41993pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
41994    unsafe {
41995        static_assert_rounding!(ROUNDING);
41996        let extracta: f64 = simd_extract!(a, 0);
41997        let extractb: f64 = simd_extract!(b, 0);
41998        let extractc: f64 = simd_extract!(c, 0);
41999        let extractc = -extractc;
42000        let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42001        simd_insert!(a, 0, fmsub)
42002    }
42003}
42004
42005/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42006///
42007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42013///
42014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
42015#[inline]
42016#[target_feature(enable = "avx512f")]
42017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42018#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42019#[rustc_legacy_const_generics(4)]
42020pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
42021    a: __m128d,
42022    k: __mmask8,
42023    b: __m128d,
42024    c: __m128d,
42025) -> __m128d {
42026    unsafe {
42027        static_assert_rounding!(ROUNDING);
42028        let mut fmsub: f64 = simd_extract!(a, 0);
42029        if (k & 0b00000001) != 0 {
42030            let extractb: f64 = simd_extract!(b, 0);
42031            let extractc: f64 = simd_extract!(c, 0);
42032            let extractc = -extractc;
42033            fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
42034        }
42035        simd_insert!(a, 0, fmsub)
42036    }
42037}
42038
42039/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42040///
42041/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42042/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42043/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42044/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42045/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42046/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42047///
42048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
42049#[inline]
42050#[target_feature(enable = "avx512f")]
42051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42052#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42053#[rustc_legacy_const_generics(4)]
42054pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
42055    k: __mmask8,
42056    a: __m128d,
42057    b: __m128d,
42058    c: __m128d,
42059) -> __m128d {
42060    unsafe {
42061        static_assert_rounding!(ROUNDING);
42062        let mut fmsub: f64 = 0.;
42063        if (k & 0b00000001) != 0 {
42064            let extracta: f64 = simd_extract!(a, 0);
42065            let extractb: f64 = simd_extract!(b, 0);
42066            let extractc: f64 = simd_extract!(c, 0);
42067            let extractc = -extractc;
42068            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42069        }
42070        simd_insert!(a, 0, fmsub)
42071    }
42072}
42073
42074/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42075///
42076/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42077/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42078/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42079/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42080/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42081/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42082///
42083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
42084#[inline]
42085#[target_feature(enable = "avx512f")]
42086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42087#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42088#[rustc_legacy_const_generics(4)]
42089pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
42090    a: __m128d,
42091    b: __m128d,
42092    c: __m128d,
42093    k: __mmask8,
42094) -> __m128d {
42095    unsafe {
42096        static_assert_rounding!(ROUNDING);
42097        let mut fmsub: f64 = simd_extract!(c, 0);
42098        if (k & 0b00000001) != 0 {
42099            let extracta: f64 = simd_extract!(a, 0);
42100            let extractb: f64 = simd_extract!(b, 0);
42101            let extractc = -fmsub;
42102            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42103        }
42104        simd_insert!(c, 0, fmsub)
42105    }
42106}
42107
42108/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
42109///
42110/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42111/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42112/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42113/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42114/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42115/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42116///
42117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
42118#[inline]
42119#[target_feature(enable = "avx512f")]
42120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42121#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42122#[rustc_legacy_const_generics(3)]
42123pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
42124    unsafe {
42125        static_assert_rounding!(ROUNDING);
42126        let extracta: f32 = simd_extract!(a, 0);
42127        let extracta = -extracta;
42128        let extractb: f32 = simd_extract!(b, 0);
42129        let extractc: f32 = simd_extract!(c, 0);
42130        let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42131        simd_insert!(a, 0, fnmadd)
42132    }
42133}
42134
42135/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42136///
42137/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42138/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42139/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42140/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42141/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42142/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42143///
42144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
42145#[inline]
42146#[target_feature(enable = "avx512f")]
42147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42148#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42149#[rustc_legacy_const_generics(4)]
42150pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
42151    a: __m128,
42152    k: __mmask8,
42153    b: __m128,
42154    c: __m128,
42155) -> __m128 {
42156    unsafe {
42157        static_assert_rounding!(ROUNDING);
42158        let mut fnmadd: f32 = simd_extract!(a, 0);
42159        if (k & 0b00000001) != 0 {
42160            let extracta = -fnmadd;
42161            let extractb: f32 = simd_extract!(b, 0);
42162            let extractc: f32 = simd_extract!(c, 0);
42163            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42164        }
42165        simd_insert!(a, 0, fnmadd)
42166    }
42167}
42168
42169/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42170///
42171/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42172/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42173/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42174/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42175/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42176/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42177///
42178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
42179#[inline]
42180#[target_feature(enable = "avx512f")]
42181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42182#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42183#[rustc_legacy_const_generics(4)]
42184pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
42185    k: __mmask8,
42186    a: __m128,
42187    b: __m128,
42188    c: __m128,
42189) -> __m128 {
42190    unsafe {
42191        static_assert_rounding!(ROUNDING);
42192        let mut fnmadd: f32 = 0.;
42193        if (k & 0b00000001) != 0 {
42194            let extracta: f32 = simd_extract!(a, 0);
42195            let extracta = -extracta;
42196            let extractb: f32 = simd_extract!(b, 0);
42197            let extractc: f32 = simd_extract!(c, 0);
42198            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42199        }
42200        simd_insert!(a, 0, fnmadd)
42201    }
42202}
42203
42204/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
42205///
42206/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42207/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42208/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42209/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42210/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42211/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42212///
42213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
42214#[inline]
42215#[target_feature(enable = "avx512f")]
42216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42217#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42218#[rustc_legacy_const_generics(4)]
42219pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
42220    a: __m128,
42221    b: __m128,
42222    c: __m128,
42223    k: __mmask8,
42224) -> __m128 {
42225    unsafe {
42226        static_assert_rounding!(ROUNDING);
42227        let mut fnmadd: f32 = simd_extract!(c, 0);
42228        if (k & 0b00000001) != 0 {
42229            let extracta: f32 = simd_extract!(a, 0);
42230            let extracta = -extracta;
42231            let extractb: f32 = simd_extract!(b, 0);
42232            fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
42233        }
42234        simd_insert!(c, 0, fnmadd)
42235    }
42236}
42237
42238/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
42239///
42240/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42241/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42242/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42243/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42244/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42245/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42246///
42247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
42248#[inline]
42249#[target_feature(enable = "avx512f")]
42250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42251#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42252#[rustc_legacy_const_generics(3)]
42253pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
42254    unsafe {
42255        static_assert_rounding!(ROUNDING);
42256        let extracta: f64 = simd_extract!(a, 0);
42257        let extracta = -extracta;
42258        let extractb: f64 = simd_extract!(b, 0);
42259        let extractc: f64 = simd_extract!(c, 0);
42260        let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42261        simd_insert!(a, 0, fnmadd)
42262    }
42263}
42264
42265/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42266///
42267/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42273///
42274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
42275#[inline]
42276#[target_feature(enable = "avx512f")]
42277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42278#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42279#[rustc_legacy_const_generics(4)]
42280pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
42281    a: __m128d,
42282    k: __mmask8,
42283    b: __m128d,
42284    c: __m128d,
42285) -> __m128d {
42286    unsafe {
42287        static_assert_rounding!(ROUNDING);
42288        let mut fnmadd: f64 = simd_extract!(a, 0);
42289        if (k & 0b00000001) != 0 {
42290            let extracta = -fnmadd;
42291            let extractb: f64 = simd_extract!(b, 0);
42292            let extractc: f64 = simd_extract!(c, 0);
42293            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42294        }
42295        simd_insert!(a, 0, fnmadd)
42296    }
42297}
42298
42299/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42300///
42301/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42302/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42303/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42304/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42305/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42306/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42307///
42308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
42309#[inline]
42310#[target_feature(enable = "avx512f")]
42311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42312#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42313#[rustc_legacy_const_generics(4)]
42314pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
42315    k: __mmask8,
42316    a: __m128d,
42317    b: __m128d,
42318    c: __m128d,
42319) -> __m128d {
42320    unsafe {
42321        static_assert_rounding!(ROUNDING);
42322        let mut fnmadd: f64 = 0.;
42323        if (k & 0b00000001) != 0 {
42324            let extracta: f64 = simd_extract!(a, 0);
42325            let extracta = -extracta;
42326            let extractb: f64 = simd_extract!(b, 0);
42327            let extractc: f64 = simd_extract!(c, 0);
42328            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42329        }
42330        simd_insert!(a, 0, fnmadd)
42331    }
42332}
42333
42334/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42335///
42336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42342///
42343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
42344#[inline]
42345#[target_feature(enable = "avx512f")]
42346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42348#[rustc_legacy_const_generics(4)]
42349pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
42350    a: __m128d,
42351    b: __m128d,
42352    c: __m128d,
42353    k: __mmask8,
42354) -> __m128d {
42355    unsafe {
42356        static_assert_rounding!(ROUNDING);
42357        let mut fnmadd: f64 = simd_extract!(c, 0);
42358        if (k & 0b00000001) != 0 {
42359            let extracta: f64 = simd_extract!(a, 0);
42360            let extracta = -extracta;
42361            let extractb: f64 = simd_extract!(b, 0);
42362            fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
42363        }
42364        simd_insert!(c, 0, fnmadd)
42365    }
42366}
42367
42368/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
42369///
42370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42376///
42377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
42378#[inline]
42379#[target_feature(enable = "avx512f")]
42380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42381#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42382#[rustc_legacy_const_generics(3)]
42383pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
42384    unsafe {
42385        static_assert_rounding!(ROUNDING);
42386        let extracta: f32 = simd_extract!(a, 0);
42387        let extracta = -extracta;
42388        let extractb: f32 = simd_extract!(b, 0);
42389        let extractc: f32 = simd_extract!(c, 0);
42390        let extractc = -extractc;
42391        let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42392        simd_insert!(a, 0, fnmsub)
42393    }
42394}
42395
42396/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42397///
42398/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42399/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42400/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42401/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42402/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42403/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42404///
42405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
42406#[inline]
42407#[target_feature(enable = "avx512f")]
42408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42409#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42410#[rustc_legacy_const_generics(4)]
42411pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
42412    a: __m128,
42413    k: __mmask8,
42414    b: __m128,
42415    c: __m128,
42416) -> __m128 {
42417    unsafe {
42418        static_assert_rounding!(ROUNDING);
42419        let mut fnmsub: f32 = simd_extract!(a, 0);
42420        if (k & 0b00000001) != 0 {
42421            let extracta = -fnmsub;
42422            let extractb: f32 = simd_extract!(b, 0);
42423            let extractc: f32 = simd_extract!(c, 0);
42424            let extractc = -extractc;
42425            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42426        }
42427        simd_insert!(a, 0, fnmsub)
42428    }
42429}
42430
42431/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42432///
42433/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42434/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42435/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42436/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42437/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42438/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42439///
42440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
42441#[inline]
42442#[target_feature(enable = "avx512f")]
42443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42444#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42445#[rustc_legacy_const_generics(4)]
42446pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
42447    k: __mmask8,
42448    a: __m128,
42449    b: __m128,
42450    c: __m128,
42451) -> __m128 {
42452    unsafe {
42453        static_assert_rounding!(ROUNDING);
42454        let mut fnmsub: f32 = 0.;
42455        if (k & 0b00000001) != 0 {
42456            let extracta: f32 = simd_extract!(a, 0);
42457            let extracta = -extracta;
42458            let extractb: f32 = simd_extract!(b, 0);
42459            let extractc: f32 = simd_extract!(c, 0);
42460            let extractc = -extractc;
42461            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42462        }
42463        simd_insert!(a, 0, fnmsub)
42464    }
42465}
42466
42467/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
42468///
42469/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42470/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42471/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42472/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42473/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42474/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42475///
42476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
42477#[inline]
42478#[target_feature(enable = "avx512f")]
42479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42480#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42481#[rustc_legacy_const_generics(4)]
42482pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
42483    a: __m128,
42484    b: __m128,
42485    c: __m128,
42486    k: __mmask8,
42487) -> __m128 {
42488    unsafe {
42489        static_assert_rounding!(ROUNDING);
42490        let mut fnmsub: f32 = simd_extract!(c, 0);
42491        if (k & 0b00000001) != 0 {
42492            let extracta: f32 = simd_extract!(a, 0);
42493            let extracta = -extracta;
42494            let extractb: f32 = simd_extract!(b, 0);
42495            let extractc = -fnmsub;
42496            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42497        }
42498        simd_insert!(c, 0, fnmsub)
42499    }
42500}
42501
42502/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
42503///
42504/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42505/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42506/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42507/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42508/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42509/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42510///
42511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
42512#[inline]
42513#[target_feature(enable = "avx512f")]
42514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42515#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42516#[rustc_legacy_const_generics(3)]
42517pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
42518    unsafe {
42519        static_assert_rounding!(ROUNDING);
42520        let extracta: f64 = simd_extract!(a, 0);
42521        let extracta = -extracta;
42522        let extractb: f64 = simd_extract!(b, 0);
42523        let extractc: f64 = simd_extract!(c, 0);
42524        let extractc = -extractc;
42525        let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42526        simd_insert!(a, 0, fnmsub)
42527    }
42528}
42529
42530/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42531///
42532/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42533/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42534/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42535/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42536/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42537/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42538///
42539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
42540#[inline]
42541#[target_feature(enable = "avx512f")]
42542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42543#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42544#[rustc_legacy_const_generics(4)]
42545pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
42546    a: __m128d,
42547    k: __mmask8,
42548    b: __m128d,
42549    c: __m128d,
42550) -> __m128d {
42551    unsafe {
42552        static_assert_rounding!(ROUNDING);
42553        let mut fnmsub: f64 = simd_extract!(a, 0);
42554        if (k & 0b00000001) != 0 {
42555            let extracta = -fnmsub;
42556            let extractb: f64 = simd_extract!(b, 0);
42557            let extractc: f64 = simd_extract!(c, 0);
42558            let extractc = -extractc;
42559            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42560        }
42561        simd_insert!(a, 0, fnmsub)
42562    }
42563}
42564
42565/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42566///
42567/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42568/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42569/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42570/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42571/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42572/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42573///
42574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
42575#[inline]
42576#[target_feature(enable = "avx512f")]
42577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42578#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42579#[rustc_legacy_const_generics(4)]
42580pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
42581    k: __mmask8,
42582    a: __m128d,
42583    b: __m128d,
42584    c: __m128d,
42585) -> __m128d {
42586    unsafe {
42587        static_assert_rounding!(ROUNDING);
42588        let mut fnmsub: f64 = 0.;
42589        if (k & 0b00000001) != 0 {
42590            let extracta: f64 = simd_extract!(a, 0);
42591            let extracta = -extracta;
42592            let extractb: f64 = simd_extract!(b, 0);
42593            let extractc: f64 = simd_extract!(c, 0);
42594            let extractc = -extractc;
42595            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42596        }
42597        simd_insert!(a, 0, fnmsub)
42598    }
42599}
42600
42601/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42602///
42603/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42609///
42610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
42611#[inline]
42612#[target_feature(enable = "avx512f")]
42613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42614#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42615#[rustc_legacy_const_generics(4)]
42616pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
42617    a: __m128d,
42618    b: __m128d,
42619    c: __m128d,
42620    k: __mmask8,
42621) -> __m128d {
42622    unsafe {
42623        static_assert_rounding!(ROUNDING);
42624        let mut fnmsub: f64 = simd_extract!(c, 0);
42625        if (k & 0b00000001) != 0 {
42626            let extracta: f64 = simd_extract!(a, 0);
42627            let extracta = -extracta;
42628            let extractb: f64 = simd_extract!(b, 0);
42629            let extractc = -fnmsub;
42630            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42631        }
42632        simd_insert!(c, 0, fnmsub)
42633    }
42634}
42635
42636/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42637///
42638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
42639#[inline]
42640#[target_feature(enable = "avx512f")]
42641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42642#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42643#[rustc_legacy_const_generics(3)]
42644pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
42645    unsafe {
42646        static_assert_uimm_bits!(IMM8, 8);
42647        let a = a.as_f32x4();
42648        let b = b.as_f32x4();
42649        let c = c.as_i32x4();
42650        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
42651        let fixupimm: f32 = simd_extract!(r, 0);
42652        let r = simd_insert!(a, 0, fixupimm);
42653        transmute(r)
42654    }
42655}
42656
42657/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42658///
42659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
42660#[inline]
42661#[target_feature(enable = "avx512f")]
42662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42663#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42664#[rustc_legacy_const_generics(4)]
42665pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
42666    a: __m128,
42667    k: __mmask8,
42668    b: __m128,
42669    c: __m128i,
42670) -> __m128 {
42671    unsafe {
42672        static_assert_uimm_bits!(IMM8, 8);
42673        let a = a.as_f32x4();
42674        let b = b.as_f32x4();
42675        let c = c.as_i32x4();
42676        let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
42677        let fixupimm: f32 = simd_extract!(fixupimm, 0);
42678        let r = simd_insert!(a, 0, fixupimm);
42679        transmute(r)
42680    }
42681}
42682
42683/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42684///
42685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
42686#[inline]
42687#[target_feature(enable = "avx512f")]
42688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42689#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42690#[rustc_legacy_const_generics(4)]
42691pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
42692    k: __mmask8,
42693    a: __m128,
42694    b: __m128,
42695    c: __m128i,
42696) -> __m128 {
42697    unsafe {
42698        static_assert_uimm_bits!(IMM8, 8);
42699        let a = a.as_f32x4();
42700        let b = b.as_f32x4();
42701        let c = c.as_i32x4();
42702        let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
42703        let fixupimm: f32 = simd_extract!(fixupimm, 0);
42704        let r = simd_insert!(a, 0, fixupimm);
42705        transmute(r)
42706    }
42707}
42708
42709/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42710///
42711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
42712#[inline]
42713#[target_feature(enable = "avx512f")]
42714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42715#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42716#[rustc_legacy_const_generics(3)]
42717pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
42718    unsafe {
42719        static_assert_uimm_bits!(IMM8, 8);
42720        let a = a.as_f64x2();
42721        let b = b.as_f64x2();
42722        let c = c.as_i64x2();
42723        let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
42724        let fixupimm: f64 = simd_extract!(fixupimm, 0);
42725        let r = simd_insert!(a, 0, fixupimm);
42726        transmute(r)
42727    }
42728}
42729
42730/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42731///
42732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
42733#[inline]
42734#[target_feature(enable = "avx512f")]
42735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42736#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42737#[rustc_legacy_const_generics(4)]
42738pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
42739    a: __m128d,
42740    k: __mmask8,
42741    b: __m128d,
42742    c: __m128i,
42743) -> __m128d {
42744    unsafe {
42745        static_assert_uimm_bits!(IMM8, 8);
42746        let a = a.as_f64x2();
42747        let b = b.as_f64x2();
42748        let c = c.as_i64x2();
42749        let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
42750        let fixupimm: f64 = simd_extract!(fixupimm, 0);
42751        let r = simd_insert!(a, 0, fixupimm);
42752        transmute(r)
42753    }
42754}
42755
42756/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42757///
42758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
42759#[inline]
42760#[target_feature(enable = "avx512f")]
42761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42762#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42763#[rustc_legacy_const_generics(4)]
42764pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
42765    k: __mmask8,
42766    a: __m128d,
42767    b: __m128d,
42768    c: __m128i,
42769) -> __m128d {
42770    unsafe {
42771        static_assert_uimm_bits!(IMM8, 8);
42772        let a = a.as_f64x2();
42773        let b = b.as_f64x2();
42774        let c = c.as_i64x2();
42775        let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
42776        let fixupimm: f64 = simd_extract!(fixupimm, 0);
42777        let r = simd_insert!(a, 0, fixupimm);
42778        transmute(r)
42779    }
42780}
42781
42782/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42783/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42784///
42785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
42786#[inline]
42787#[target_feature(enable = "avx512f")]
42788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42789#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42790#[rustc_legacy_const_generics(3, 4)]
42791pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42792    a: __m128,
42793    b: __m128,
42794    c: __m128i,
42795) -> __m128 {
42796    unsafe {
42797        static_assert_uimm_bits!(IMM8, 8);
42798        static_assert_mantissas_sae!(SAE);
42799        let a = a.as_f32x4();
42800        let b = b.as_f32x4();
42801        let c = c.as_i32x4();
42802        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
42803        let fixupimm: f32 = simd_extract!(r, 0);
42804        let r = simd_insert!(a, 0, fixupimm);
42805        transmute(r)
42806    }
42807}
42808
42809/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42810/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42811///
42812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
42813#[inline]
42814#[target_feature(enable = "avx512f")]
42815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42816#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42817#[rustc_legacy_const_generics(4, 5)]
42818pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42819    a: __m128,
42820    k: __mmask8,
42821    b: __m128,
42822    c: __m128i,
42823) -> __m128 {
42824    unsafe {
42825        static_assert_uimm_bits!(IMM8, 8);
42826        static_assert_mantissas_sae!(SAE);
42827        let a = a.as_f32x4();
42828        let b = b.as_f32x4();
42829        let c = c.as_i32x4();
42830        let r = vfixupimmss(a, b, c, IMM8, k, SAE);
42831        let fixupimm: f32 = simd_extract!(r, 0);
42832        let r = simd_insert!(a, 0, fixupimm);
42833        transmute(r)
42834    }
42835}
42836
42837/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42838/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42839///
42840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
42841#[inline]
42842#[target_feature(enable = "avx512f")]
42843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42844#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42845#[rustc_legacy_const_generics(4, 5)]
42846pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42847    k: __mmask8,
42848    a: __m128,
42849    b: __m128,
42850    c: __m128i,
42851) -> __m128 {
42852    unsafe {
42853        static_assert_uimm_bits!(IMM8, 8);
42854        static_assert_mantissas_sae!(SAE);
42855        let a = a.as_f32x4();
42856        let b = b.as_f32x4();
42857        let c = c.as_i32x4();
42858        let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
42859        let fixupimm: f32 = simd_extract!(r, 0);
42860        let r = simd_insert!(a, 0, fixupimm);
42861        transmute(r)
42862    }
42863}
42864
42865/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42866/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42867///
42868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
42869#[inline]
42870#[target_feature(enable = "avx512f")]
42871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42872#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42873#[rustc_legacy_const_generics(3, 4)]
42874pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42875    a: __m128d,
42876    b: __m128d,
42877    c: __m128i,
42878) -> __m128d {
42879    unsafe {
42880        static_assert_uimm_bits!(IMM8, 8);
42881        static_assert_mantissas_sae!(SAE);
42882        let a = a.as_f64x2();
42883        let b = b.as_f64x2();
42884        let c = c.as_i64x2();
42885        let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
42886        let fixupimm: f64 = simd_extract!(r, 0);
42887        let r = simd_insert!(a, 0, fixupimm);
42888        transmute(r)
42889    }
42890}
42891
42892/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42893/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42894///
42895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
42896#[inline]
42897#[target_feature(enable = "avx512f")]
42898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42899#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42900#[rustc_legacy_const_generics(4, 5)]
42901pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42902    a: __m128d,
42903    k: __mmask8,
42904    b: __m128d,
42905    c: __m128i,
42906) -> __m128d {
42907    unsafe {
42908        static_assert_uimm_bits!(IMM8, 8);
42909        static_assert_mantissas_sae!(SAE);
42910        let a = a.as_f64x2();
42911        let b = b.as_f64x2();
42912        let c = c.as_i64x2();
42913        let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
42914        let fixupimm: f64 = simd_extract!(r, 0);
42915        let r = simd_insert!(a, 0, fixupimm);
42916        transmute(r)
42917    }
42918}
42919
42920/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42921/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42922///
42923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
42924#[inline]
42925#[target_feature(enable = "avx512f")]
42926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42927#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42928#[rustc_legacy_const_generics(4, 5)]
42929pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42930    k: __mmask8,
42931    a: __m128d,
42932    b: __m128d,
42933    c: __m128i,
42934) -> __m128d {
42935    unsafe {
42936        static_assert_uimm_bits!(IMM8, 8);
42937        static_assert_mantissas_sae!(SAE);
42938        let a = a.as_f64x2();
42939        let b = b.as_f64x2();
42940        let c = c.as_i64x2();
42941        let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
42942        let fixupimm: f64 = simd_extract!(r, 0);
42943        let r = simd_insert!(a, 0, fixupimm);
42944        transmute(r)
42945    }
42946}
42947
42948/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
42949///
42950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
42951#[inline]
42952#[target_feature(enable = "avx512f")]
42953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42954#[cfg_attr(test, assert_instr(vcvtss2sd))]
42955pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
42956    unsafe {
42957        transmute(vcvtss2sd(
42958            a.as_f64x2(),
42959            b.as_f32x4(),
42960            src.as_f64x2(),
42961            k,
42962            _MM_FROUND_CUR_DIRECTION,
42963        ))
42964    }
42965}
42966
42967/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
42968///
42969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
42970#[inline]
42971#[target_feature(enable = "avx512f")]
42972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42973#[cfg_attr(test, assert_instr(vcvtss2sd))]
42974pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
42975    unsafe {
42976        transmute(vcvtss2sd(
42977            a.as_f64x2(),
42978            b.as_f32x4(),
42979            f64x2::ZERO,
42980            k,
42981            _MM_FROUND_CUR_DIRECTION,
42982        ))
42983    }
42984}
42985
42986/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
42987///
42988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
42989#[inline]
42990#[target_feature(enable = "avx512f")]
42991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42992#[cfg_attr(test, assert_instr(vcvtsd2ss))]
42993pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
42994    unsafe {
42995        transmute(vcvtsd2ss(
42996            a.as_f32x4(),
42997            b.as_f64x2(),
42998            src.as_f32x4(),
42999            k,
43000            _MM_FROUND_CUR_DIRECTION,
43001        ))
43002    }
43003}
43004
43005/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
43006///
43007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
43008#[inline]
43009#[target_feature(enable = "avx512f")]
43010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43011#[cfg_attr(test, assert_instr(vcvtsd2ss))]
43012pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
43013    unsafe {
43014        transmute(vcvtsd2ss(
43015            a.as_f32x4(),
43016            b.as_f64x2(),
43017            f32x4::ZERO,
43018            k,
43019            _MM_FROUND_CUR_DIRECTION,
43020        ))
43021    }
43022}
43023
43024/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
43025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43026///
43027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
43028#[inline]
43029#[target_feature(enable = "avx512f")]
43030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43031#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43032#[rustc_legacy_const_generics(2)]
43033pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
43034    unsafe {
43035        static_assert_sae!(SAE);
43036        let a = a.as_f64x2();
43037        let b = b.as_f32x4();
43038        let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
43039        transmute(r)
43040    }
43041}
43042
43043/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
43044/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43045///
43046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
43047#[inline]
43048#[target_feature(enable = "avx512f")]
43049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43050#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43051#[rustc_legacy_const_generics(4)]
43052pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
43053    src: __m128d,
43054    k: __mmask8,
43055    a: __m128d,
43056    b: __m128,
43057) -> __m128d {
43058    unsafe {
43059        static_assert_sae!(SAE);
43060        let a = a.as_f64x2();
43061        let b = b.as_f32x4();
43062        let src = src.as_f64x2();
43063        let r = vcvtss2sd(a, b, src, k, SAE);
43064        transmute(r)
43065    }
43066}
43067
43068/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
43069/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43070///
43071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
43072#[inline]
43073#[target_feature(enable = "avx512f")]
43074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43075#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43076#[rustc_legacy_const_generics(3)]
43077pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
43078    unsafe {
43079        static_assert_sae!(SAE);
43080        let a = a.as_f64x2();
43081        let b = b.as_f32x4();
43082        let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
43083        transmute(r)
43084    }
43085}
43086
43087/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43088/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43089/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43090/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43091/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43092/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43093/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43094///
43095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
43096#[inline]
43097#[target_feature(enable = "avx512f")]
43098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43099#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43100#[rustc_legacy_const_generics(2)]
43101pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
43102    unsafe {
43103        static_assert_rounding!(ROUNDING);
43104        let a = a.as_f32x4();
43105        let b = b.as_f64x2();
43106        let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
43107        transmute(r)
43108    }
43109}
43110
43111/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
43112/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43113/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43114/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43115/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43116/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43117/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43118///
43119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
43120#[inline]
43121#[target_feature(enable = "avx512f")]
43122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43123#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43124#[rustc_legacy_const_generics(4)]
43125pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
43126    src: __m128,
43127    k: __mmask8,
43128    a: __m128,
43129    b: __m128d,
43130) -> __m128 {
43131    unsafe {
43132        static_assert_rounding!(ROUNDING);
43133        let a = a.as_f32x4();
43134        let b = b.as_f64x2();
43135        let src = src.as_f32x4();
43136        let r = vcvtsd2ss(a, b, src, k, ROUNDING);
43137        transmute(r)
43138    }
43139}
43140
43141/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
43142/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43143/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43144/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43145/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43146/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43147/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43148///
43149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
43150#[inline]
43151#[target_feature(enable = "avx512f")]
43152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43153#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43154#[rustc_legacy_const_generics(3)]
43155pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
43156    unsafe {
43157        static_assert_rounding!(ROUNDING);
43158        let a = a.as_f32x4();
43159        let b = b.as_f64x2();
43160        let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
43161        transmute(r)
43162    }
43163}
43164
43165/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43166/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43167/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43168/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43169/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43170/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43171/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43172///
43173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
43174#[inline]
43175#[target_feature(enable = "avx512f")]
43176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43177#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
43178#[rustc_legacy_const_generics(1)]
43179pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
43180    unsafe {
43181        static_assert_rounding!(ROUNDING);
43182        let a = a.as_f32x4();
43183        vcvtss2si(a, ROUNDING)
43184    }
43185}
43186
43187/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43188/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43189/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43190/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43191/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43192/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43193/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43194///
43195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
43196#[inline]
43197#[target_feature(enable = "avx512f")]
43198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43199#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
43200#[rustc_legacy_const_generics(1)]
43201pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
43202    unsafe {
43203        static_assert_rounding!(ROUNDING);
43204        let a = a.as_f32x4();
43205        vcvtss2si(a, ROUNDING)
43206    }
43207}
43208
43209/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
43210/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43211/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43212/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43213/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43214/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43215/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43216///
43217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
43218#[inline]
43219#[target_feature(enable = "avx512f")]
43220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43221#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
43222#[rustc_legacy_const_generics(1)]
43223pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
43224    unsafe {
43225        static_assert_rounding!(ROUNDING);
43226        let a = a.as_f32x4();
43227        vcvtss2usi(a, ROUNDING)
43228    }
43229}
43230
43231/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
43232///
43233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
43234#[inline]
43235#[target_feature(enable = "avx512f")]
43236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43237#[cfg_attr(test, assert_instr(vcvtss2si))]
43238pub fn _mm_cvtss_i32(a: __m128) -> i32 {
43239    unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43240}
43241
43242/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
43243///
43244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
43245#[inline]
43246#[target_feature(enable = "avx512f")]
43247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43248#[cfg_attr(test, assert_instr(vcvtss2usi))]
43249pub fn _mm_cvtss_u32(a: __m128) -> u32 {
43250    unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43251}
43252
43253/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43254/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43255/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43256/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43257/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43258/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43259/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43260///
43261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
43262#[inline]
43263#[target_feature(enable = "avx512f")]
43264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43265#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
43266#[rustc_legacy_const_generics(1)]
43267pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
43268    unsafe {
43269        static_assert_rounding!(ROUNDING);
43270        let a = a.as_f64x2();
43271        vcvtsd2si(a, ROUNDING)
43272    }
43273}
43274
43275/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43276/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43277/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43278/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43279/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43280/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43281/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43282///
43283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
43284#[inline]
43285#[target_feature(enable = "avx512f")]
43286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43287#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
43288#[rustc_legacy_const_generics(1)]
43289pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
43290    unsafe {
43291        static_assert_rounding!(ROUNDING);
43292        let a = a.as_f64x2();
43293        vcvtsd2si(a, ROUNDING)
43294    }
43295}
43296
43297/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
43298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43304///
43305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
43306#[inline]
43307#[target_feature(enable = "avx512f")]
43308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43309#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
43310#[rustc_legacy_const_generics(1)]
43311pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
43312    unsafe {
43313        static_assert_rounding!(ROUNDING);
43314        let a = a.as_f64x2();
43315        vcvtsd2usi(a, ROUNDING)
43316    }
43317}
43318
43319/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
43320///
43321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
43322#[inline]
43323#[target_feature(enable = "avx512f")]
43324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43325#[cfg_attr(test, assert_instr(vcvtsd2si))]
43326pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
43327    unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43328}
43329
43330/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
43331///
43332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
43333#[inline]
43334#[target_feature(enable = "avx512f")]
43335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43336#[cfg_attr(test, assert_instr(vcvtsd2usi))]
43337pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
43338    unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43339}
43340
43341/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43342///
43343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43349///
43350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
43351#[inline]
43352#[target_feature(enable = "avx512f")]
43353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43354#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
43355#[rustc_legacy_const_generics(2)]
43356pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
43357    unsafe {
43358        static_assert_rounding!(ROUNDING);
43359        let a = a.as_f32x4();
43360        let r = vcvtsi2ss(a, b, ROUNDING);
43361        transmute(r)
43362    }
43363}
43364
43365/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43366///
43367/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43368/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43369/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43370/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43371/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43372/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43373///
43374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
43375#[inline]
43376#[target_feature(enable = "avx512f")]
43377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43378#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
43379#[rustc_legacy_const_generics(2)]
43380pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
43381    unsafe {
43382        static_assert_rounding!(ROUNDING);
43383        let a = a.as_f32x4();
43384        let r = vcvtsi2ss(a, b, ROUNDING);
43385        transmute(r)
43386    }
43387}
43388
43389/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43390/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43391/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43392/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43393/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43394/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43396///
43397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
43398#[inline]
43399#[target_feature(enable = "avx512f")]
43400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43401#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
43402#[rustc_legacy_const_generics(2)]
43403pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
43404    unsafe {
43405        static_assert_rounding!(ROUNDING);
43406        let a = a.as_f32x4();
43407        let r = vcvtusi2ss(a, b, ROUNDING);
43408        transmute(r)
43409    }
43410}
43411
43412/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
43413///
43414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
43415#[inline]
43416#[target_feature(enable = "avx512f")]
43417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43418#[cfg_attr(test, assert_instr(vcvtsi2ss))]
43419#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43420pub const fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
43421    unsafe {
43422        let b = b as f32;
43423        simd_insert!(a, 0, b)
43424    }
43425}
43426
43427/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
43428///
43429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
43430#[inline]
43431#[target_feature(enable = "avx512f")]
43432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43433#[cfg_attr(test, assert_instr(vcvtsi2sd))]
43434#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43435pub const fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
43436    unsafe {
43437        let b = b as f64;
43438        simd_insert!(a, 0, b)
43439    }
43440}
43441
43442/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43443/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43444///
43445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
43446#[inline]
43447#[target_feature(enable = "avx512f")]
43448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43449#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
43450#[rustc_legacy_const_generics(1)]
43451pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
43452    unsafe {
43453        static_assert_sae!(SAE);
43454        let a = a.as_f32x4();
43455        vcvttss2si(a, SAE)
43456    }
43457}
43458
43459/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43460/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43461///
43462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
43463#[inline]
43464#[target_feature(enable = "avx512f")]
43465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43466#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
43467#[rustc_legacy_const_generics(1)]
43468pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
43469    unsafe {
43470        static_assert_sae!(SAE);
43471        let a = a.as_f32x4();
43472        vcvttss2si(a, SAE)
43473    }
43474}
43475
43476/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
43477/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43478///
43479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
43480#[inline]
43481#[target_feature(enable = "avx512f")]
43482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43483#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
43484#[rustc_legacy_const_generics(1)]
43485pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
43486    unsafe {
43487        static_assert_sae!(SAE);
43488        let a = a.as_f32x4();
43489        vcvttss2usi(a, SAE)
43490    }
43491}
43492
43493/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
43494///
43495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
43496#[inline]
43497#[target_feature(enable = "avx512f")]
43498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43499#[cfg_attr(test, assert_instr(vcvttss2si))]
43500pub fn _mm_cvttss_i32(a: __m128) -> i32 {
43501    unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43502}
43503
43504/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
43505///
43506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
43507#[inline]
43508#[target_feature(enable = "avx512f")]
43509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43510#[cfg_attr(test, assert_instr(vcvttss2usi))]
43511pub fn _mm_cvttss_u32(a: __m128) -> u32 {
43512    unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43513}
43514
43515/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43516/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43517///
43518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
43519#[inline]
43520#[target_feature(enable = "avx512f")]
43521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43522#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
43523#[rustc_legacy_const_generics(1)]
43524pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
43525    unsafe {
43526        static_assert_sae!(SAE);
43527        let a = a.as_f64x2();
43528        vcvttsd2si(a, SAE)
43529    }
43530}
43531
43532/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43533/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43534///
43535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
43536#[inline]
43537#[target_feature(enable = "avx512f")]
43538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43539#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
43540#[rustc_legacy_const_generics(1)]
43541pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
43542    unsafe {
43543        static_assert_sae!(SAE);
43544        let a = a.as_f64x2();
43545        vcvttsd2si(a, SAE)
43546    }
43547}
43548
43549/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
43550/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43551///
43552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
43553#[inline]
43554#[target_feature(enable = "avx512f")]
43555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43556#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
43557#[rustc_legacy_const_generics(1)]
43558pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
43559    unsafe {
43560        static_assert_sae!(SAE);
43561        let a = a.as_f64x2();
43562        vcvttsd2usi(a, SAE)
43563    }
43564}
43565
43566/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
43567///
43568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
43569#[inline]
43570#[target_feature(enable = "avx512f")]
43571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43572#[cfg_attr(test, assert_instr(vcvttsd2si))]
43573pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
43574    unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43575}
43576
43577/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
43578///
43579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
43580#[inline]
43581#[target_feature(enable = "avx512f")]
43582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43583#[cfg_attr(test, assert_instr(vcvttsd2usi))]
43584pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
43585    unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43586}
43587
43588/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
43589///
43590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
43591#[inline]
43592#[target_feature(enable = "avx512f")]
43593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43594#[cfg_attr(test, assert_instr(vcvtusi2ss))]
43595#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43596pub const fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
43597    unsafe {
43598        let b = b as f32;
43599        simd_insert!(a, 0, b)
43600    }
43601}
43602
43603/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
43604///
43605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
43606#[inline]
43607#[target_feature(enable = "avx512f")]
43608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43609#[cfg_attr(test, assert_instr(vcvtusi2sd))]
43610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43611pub const fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
43612    unsafe {
43613        let b = b as f64;
43614        simd_insert!(a, 0, b)
43615    }
43616}
43617
43618/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
43619/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43620///
43621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
43622#[inline]
43623#[target_feature(enable = "avx512f")]
43624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43625#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
43626#[rustc_legacy_const_generics(2, 3)]
43627pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
43628    unsafe {
43629        static_assert_uimm_bits!(IMM5, 5);
43630        static_assert_mantissas_sae!(SAE);
43631        let a = a.as_f32x4();
43632        let b = b.as_f32x4();
43633        vcomiss(a, b, IMM5, SAE)
43634    }
43635}
43636
43637/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
43638/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43639///
43640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
43641#[inline]
43642#[target_feature(enable = "avx512f")]
43643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43644#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
43645#[rustc_legacy_const_generics(2, 3)]
43646pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
43647    unsafe {
43648        static_assert_uimm_bits!(IMM5, 5);
43649        static_assert_mantissas_sae!(SAE);
43650        let a = a.as_f64x2();
43651        let b = b.as_f64x2();
43652        vcomisd(a, b, IMM5, SAE)
43653    }
43654}
43655
43656/// Equal
43657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43658pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
43659/// Less-than
43660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43661pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
43662/// Less-than-or-equal
43663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43664pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
43665/// False
43666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43667pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
43668/// Not-equal
43669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43670pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
43671/// Not less-than
43672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43673pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
43674/// Not less-than-or-equal
43675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43676pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
43677/// True
43678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43679pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
43680
43681/// interval [1, 2)
43682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43683pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
43684/// interval [0.5, 2)
43685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43686pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
43687/// interval [0.5, 1)
43688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43689pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
43690/// interval [0.75, 1.5)
43691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43692pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
43693
43694/// sign = sign(SRC)
43695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43696pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
43697/// sign = 0
43698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43699pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
43700/// DEST = NaN if sign(SRC) = 1
43701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43702pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
43703
43704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43705pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
43706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43707pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
43708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43709pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
43710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43711pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
43712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43713pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
43714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43715pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
43716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43717pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
43718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43719pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
43720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43721pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
43722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43723pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
43724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43725pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
43726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43727pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
43728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43729pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
43730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43731pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
43732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43733pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
43734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43735pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
43736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43737pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
43738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43739pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
43740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43741pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
43742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43743pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
43744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43745pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
43746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43747pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
43748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43749pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
43750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43751pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
43752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43753pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
43754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43755pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
43756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43757pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
43758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43759pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
43760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43761pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
43762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43763pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
43764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43765pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
43766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43767pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
43768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43769pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
43770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43771pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
43772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43773pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
43774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43775pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
43776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43777pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
43778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43779pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
43780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43781pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
43782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43783pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
43784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43785pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
43786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43787pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
43788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43789pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
43790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43791pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
43792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43793pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
43794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43795pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
43796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43797pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
43798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43799pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
43800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43801pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
43802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43803pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
43804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43805pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
43806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43807pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
43808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43809pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
43810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43811pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
43812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43813pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
43814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43815pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
43816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43817pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
43818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43819pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
43820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43821pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
43822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43823pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
43824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43825pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
43826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43827pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
43828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43829pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
43830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43831pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
43832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43833pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
43834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43835pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
43836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43837pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
43838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43839pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
43840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43841pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
43842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43843pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
43844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43845pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
43846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43847pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
43848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43849pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
43850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43851pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
43852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43853pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
43854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43855pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
43856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43857pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
43858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43859pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
43860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43861pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
43862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43863pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
43864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43865pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
43866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43867pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
43868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43869pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
43870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43871pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
43872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43873pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
43874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43875pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
43876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43877pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
43878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43879pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
43880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43881pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
43882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43883pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
43884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43885pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
43886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43887pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
43888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43889pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
43890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43891pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
43892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43893pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
43894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43895pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
43896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43897pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
43898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43899pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
43900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43901pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
43902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43903pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
43904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43905pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
43906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43907pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
43908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43909pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
43910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43911pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
43912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43913pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
43914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43915pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
43916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43917pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
43918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43919pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
43920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43921pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
43922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43923pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
43924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43925pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
43926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43927pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
43928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43929pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
43930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43931pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
43932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43933pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
43934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43935pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
43936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43937pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
43938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43939pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
43940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43941pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
43942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43943pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
43944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43945pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
43946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43947pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
43948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43949pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
43950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43951pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
43952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43953pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
43954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43955pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
43956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43957pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
43958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43959pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
43960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43961pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
43962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43963pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
43964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43965pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
43966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43967pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
43968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43969pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
43970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43971pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
43972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43973pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
43974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43975pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
43976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43977pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
43978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43979pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
43980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43981pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
43982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43983pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
43984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43985pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
43986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43987pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
43988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43989pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
43990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43991pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
43992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43993pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
43994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43995pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
43996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43997pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
43998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43999pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
44000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44001pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
44002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44003pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
44004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44005pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
44006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44007pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
44008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44009pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
44010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44011pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
44012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44013pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
44014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44015pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
44016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44017pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
44018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44019pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
44020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44021pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
44022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44023pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
44024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44025pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
44026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44027pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
44028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44029pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
44030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44031pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
44032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44033pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
44034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44035pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
44036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44037pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
44038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44039pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
44040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44041pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
44042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44043pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
44044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44045pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
44046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44047pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
44048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44049pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
44050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44051pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
44052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44053pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
44054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44055pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
44056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44057pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
44058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44059pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
44060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44061pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
44062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44063pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
44064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44065pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
44066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44067pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
44068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44069pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
44070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44071pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
44072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44073pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
44074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44075pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
44076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44077pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
44078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44079pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
44080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44081pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
44082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44083pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
44084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44085pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
44086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44087pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
44088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44089pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
44090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44091pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
44092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44093pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
44094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44095pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
44096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44097pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
44098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44099pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
44100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44101pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
44102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44103pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
44104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44105pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
44106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44107pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
44108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44109pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
44110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44111pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
44112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44113pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
44114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44115pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
44116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44117pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
44118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44119pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
44120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44121pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
44122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44123pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
44124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44125pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
44126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44127pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
44128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44129pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
44130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44131pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
44132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44133pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
44134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44135pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
44136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44137pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
44138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44139pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
44140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44141pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
44142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44143pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
44144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44145pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
44146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44147pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
44148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44149pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
44150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44151pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
44152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44153pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
44154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44155pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
44156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44157pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
44158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44159pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
44160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44161pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
44162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44163pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
44164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44165pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
44166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44167pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
44168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44169pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
44170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44171pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
44172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44173pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
44174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44175pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
44176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44177pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
44178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44179pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
44180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44181pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
44182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44183pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
44184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44185pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
44186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44187pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
44188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44189pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
44190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44191pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
44192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44193pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
44194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44195pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
44196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44197pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
44198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44199pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
44200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44201pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
44202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44203pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
44204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44205pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
44206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44207pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
44208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44209pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
44210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44211pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
44212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44213pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
44214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44215pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
44216
44217#[allow(improper_ctypes)]
44218unsafe extern "C" {
44219    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
44220    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
44221    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
44222    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
44223
44224    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
44225    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
44226    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
44227    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
44228
44229    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
44230    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
44231    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
44232    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
44233
44234    #[link_name = "llvm.x86.avx512.add.ps.512"]
44235    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44236    #[link_name = "llvm.x86.avx512.add.pd.512"]
44237    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44238    #[link_name = "llvm.x86.avx512.sub.ps.512"]
44239    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44240    #[link_name = "llvm.x86.avx512.sub.pd.512"]
44241    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44242    #[link_name = "llvm.x86.avx512.mul.ps.512"]
44243    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44244    #[link_name = "llvm.x86.avx512.mul.pd.512"]
44245    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44246    #[link_name = "llvm.x86.avx512.div.ps.512"]
44247    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44248    #[link_name = "llvm.x86.avx512.div.pd.512"]
44249    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44250
44251    #[link_name = "llvm.x86.avx512.max.ps.512"]
44252    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
44253    #[link_name = "llvm.x86.avx512.max.pd.512"]
44254    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
44255    #[link_name = "llvm.x86.avx512.min.ps.512"]
44256    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
44257    #[link_name = "llvm.x86.avx512.min.pd.512"]
44258    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
44259
44260    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
44261    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
44262
44263    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
44264    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44265    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
44266    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44267
44268    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
44269    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
44270    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
44271    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44272    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
44273    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44274
44275    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
44276    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
44277    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
44278    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
44279    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
44280    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
44281
44282    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
44283    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
44284    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
44285    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
44286    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
44287    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
44288
44289    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
44290    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
44291    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
44292    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
44293    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
44294    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44295
44296    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
44297    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
44298    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
44299    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
44300    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
44301    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44302
44303    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
44304    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
44305    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
44306    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
44307    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
44308    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
44309
44310    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
44311    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
44312    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
44313    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
44314    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
44315    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
44316
44317    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
44318    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
44319    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
44320    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
44321    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
44322    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
44323
44324    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
44325    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
44326    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
44327    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
44328    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
44329    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
44330
44331    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
44332    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
44333    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
44334    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
44335    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
44336    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
44337
44338    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
44339    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
44340    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
44341    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
44342    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
44343    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
44344
44345    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
44346    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
44347    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
44348    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
44349    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
44350    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
44351
44352    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
44353    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
44354    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
44355    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
44356    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
44357    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
44358
44359    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
44360    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
44361    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
44362    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44363    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
44364    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44365
44366    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
44367    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
44368    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
44369    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44370    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
44371    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44372
44373    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
44374    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
44375    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
44376    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44377    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
44378    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44379
44380    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
44381    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
44382    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
44383    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44384    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
44385    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44386
44387    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
44388    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
44389
44390    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
44391    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
44392    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
44393    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
44394    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
44395    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
44396
44397    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
44398    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
44399    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps"]
44400    fn vcvtpd2ps128(a: f64x2, src: f32x4, mask: u8) -> f32x4;
44401    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
44402    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
44403
44404    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.128"]
44405    fn vcvtpd2dq128(a: f64x2, src: i32x4, k: u8) -> i32x4;
44406    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
44407    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
44408
44409    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
44410    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
44411    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
44412    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
44413    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
44414    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
44415
44416    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
44417    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
44418    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
44419    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
44420
44421    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
44422    fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
44423    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
44424    fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
44425    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
44426    fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
44427
44428    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
44429    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
44430
44431    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
44432    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
44433    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
44434    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
44435    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
44436    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
44437
44438    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
44439    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
44440    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
44441    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
44442    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
44443    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
44444
44445    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
44446    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
44447    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
44448    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
44449    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
44450    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
44451
44452    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
44453    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
44454    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
44455    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
44456    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
44457    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
44458
44459    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
44460    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
44461    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
44462    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
44463    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
44464    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
44465
44466    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
44467    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
44468    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
44469    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
44470    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
44471    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
44472    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
44473    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
44474    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
44475    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
44476
44477    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
44478    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44479    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
44480    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44481    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
44482    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44483
44484    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
44485    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44486    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
44487    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44488    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
44489    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44490
44491    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
44492    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44493    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
44494    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44495    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
44496    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44497
44498    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
44499    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44500    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
44501    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44502    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
44503    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44504
44505    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
44506    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44507    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
44508    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44509    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
44510    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44511
44512    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
44513    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44514    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
44515    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44516    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
44517    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44518
44519    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
44520    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44521    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
44522    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44523    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
44524    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44525
44526    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
44527    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44528    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
44529    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44530    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
44531    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44532
44533    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
44534    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44535    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
44536    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44537    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
44538    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44539
44540    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
44541    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44542    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
44543    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44544    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
44545    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44546
44547    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
44548    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44549    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
44550    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44551    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
44552    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44553
44554    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
44555    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44556    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
44557    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44558    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
44559    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44560
44561    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
44562    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44563    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
44564    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44565    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
44566    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44567
44568    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
44569    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44570    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
44571    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44572    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
44573    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44574
44575    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
44576    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44577    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
44578    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44579    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
44580    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44581
44582    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
44583    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
44584
44585    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
44586    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
44587    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
44588    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
44589    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
44590    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
44591
44592    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
44593    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
44594    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
44595    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
44596    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
44597    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
44598
44599    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
44600    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
44601    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
44602    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
44603    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
44604    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
44605
44606    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
44607    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
44608    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
44609    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
44610    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
44611    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
44612
44613    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
44614    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
44615    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
44616    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
44617    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
44618    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
44619
44620    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
44621    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
44622    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
44623    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
44624    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
44625    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
44626
44627    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
44628    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
44629    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
44630    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
44631    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
44632    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
44633
44634    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
44635    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
44636    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
44637    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
44638    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
44639    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
44640
44641    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
44642    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
44643    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
44644    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
44645    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
44646    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
44647
44648    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
44649    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
44650    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
44651    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
44652    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
44653    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
44654
44655    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
44656    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
44657    #[link_name = "llvm.x86.avx512.gather.dps.512"]
44658    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
44659    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
44660    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
44661    #[link_name = "llvm.x86.avx512.gather.qps.512"]
44662    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
44663    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
44664    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
44665    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
44666    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
44667    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
44668    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
44669    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
44670    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
44671
44672    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
44673    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
44674    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
44675    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
44676    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
44677    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
44678    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
44679    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
44680    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
44681    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
44682
44683    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
44684    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
44685    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
44686    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
44687    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
44688    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
44689
44690    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
44691    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
44692    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
44693    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
44694    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
44695    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
44696    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
44697    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
44698    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
44699    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
44700    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
44701    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
44702    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
44703    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
44704    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
44705    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
44706
44707    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
44708    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
44709    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
44710    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
44711    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
44712    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
44713    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
44714    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
44715    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
44716    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
44717    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
44718    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
44719    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
44720    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
44721    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
44722    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
44723
44724    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
44725    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
44726    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
44727    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
44728    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
44729    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
44730    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
44731    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
44732    #[link_name = "llvm.x86.avx512.gather3div4.si"]
44733    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
44734    #[link_name = "llvm.x86.avx512.gather3div2.di"]
44735    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
44736    #[link_name = "llvm.x86.avx512.gather3div2.df"]
44737    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
44738    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
44739    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
44740
44741    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
44742    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
44743    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
44744    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
44745    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
44746    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
44747    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
44748    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
44749    #[link_name = "llvm.x86.avx512.gather3div8.si"]
44750    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
44751    #[link_name = "llvm.x86.avx512.gather3div4.di"]
44752    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
44753    #[link_name = "llvm.x86.avx512.gather3div4.df"]
44754    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
44755    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
44756    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
44757
44758    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
44759    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
44760    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
44761    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
44762
44763    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
44764    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
44765    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
44766    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
44767    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
44768    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
44769
44770    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
44771    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
44772    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
44773    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
44774    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
44775    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
44776
44777    #[link_name = "llvm.x86.avx512.psll.d.512"]
44778    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
44779    #[link_name = "llvm.x86.avx512.psrl.d.512"]
44780    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
44781    #[link_name = "llvm.x86.avx512.psll.q.512"]
44782    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
44783    #[link_name = "llvm.x86.avx512.psrl.q.512"]
44784    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
44785
44786    #[link_name = "llvm.x86.avx512.psra.d.512"]
44787    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
44788
44789    #[link_name = "llvm.x86.avx512.psra.q.512"]
44790    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
44791    #[link_name = "llvm.x86.avx512.psra.q.256"]
44792    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
44793    #[link_name = "llvm.x86.avx512.psra.q.128"]
44794    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
44795
44796    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
44797    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
44798    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
44799    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
44800
44801    #[link_name = "llvm.x86.avx512.permvar.si.512"]
44802    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
44803
44804    #[link_name = "llvm.x86.avx512.permvar.di.512"]
44805    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
44806    #[link_name = "llvm.x86.avx512.permvar.di.256"]
44807    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
44808
44809    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
44810    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
44811
44812    #[link_name = "llvm.x86.avx512.permvar.df.512"]
44813    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
44814    #[link_name = "llvm.x86.avx512.permvar.df.256"]
44815    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
44816
44817    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
44818    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
44819    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
44820    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
44821    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
44822    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
44823
44824    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
44825    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
44826    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
44827    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
44828    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
44829    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
44830
44831    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
44832    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
44833    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
44834    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
44835    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
44836    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
44837
44838    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
44839    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
44840    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
44841    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
44842    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
44843    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
44844
44845    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
44846    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
44847    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
44848    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
44849    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
44850    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
44851
44852    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
44853    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
44854    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
44855    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
44856    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
44857    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
44858
44859    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
44860    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
44861    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
44862    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
44863    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
44864    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
44865
44866    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
44867    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
44868    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
44869    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
44870    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
44871    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
44872
44873    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
44874    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
44875    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
44876    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
44877    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
44878    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
44879
44880    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
44881    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
44882    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
44883    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
44884    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
44885    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
44886
44887    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
44888    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
44889    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
44890    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
44891    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
44892    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
44893
44894    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
44895    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
44896    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
44897    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
44898    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
44899    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
44900
44901    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
44902    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
44903    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
44904    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
44905    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
44906    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
44907
44908    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
44909    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
44910    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
44911    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
44912    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
44913    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
44914
44915    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
44916    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
44917    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
44918    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
44919    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
44920    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
44921
44922    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
44923    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
44924    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
44925    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
44926    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
44927    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
44928
44929    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
44930    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44931    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
44932    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44933    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
44934    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44935    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
44936    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44937    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
44938    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44939    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
44940    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44941    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
44942    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44943    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
44944    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44945    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
44946    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44947    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
44948    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44949    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
44950    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44951    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
44952    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44953    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
44954    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
44955    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
44956    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
44957    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
44958    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44959    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
44960    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44961    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
44962    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
44963    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
44964    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
44965
44966    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
44967    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44968    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
44969    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44970    #[link_name = "llvm.x86.avx512.rcp14.ss"]
44971    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44972    #[link_name = "llvm.x86.avx512.rcp14.sd"]
44973    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44974
44975    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
44976    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
44977    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
44978    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
44979    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
44980    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44981    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
44982    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44983
44984    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
44985    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
44986    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
44987    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
44988
44989    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
44990    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
44991    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
44992    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
44993    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
44994    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
44995    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
44996    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
44997
44998    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
44999    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
45000    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
45001    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
45002
45003    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
45004    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
45005    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
45006    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
45007
45008    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
45009    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
45010    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
45011    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
45012
45013    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
45014    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
45015
45016    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
45017    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
45018
45019    #[link_name = "llvm.x86.avx512.cvttss2si"]
45020    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
45021    #[link_name = "llvm.x86.avx512.cvttss2usi"]
45022    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
45023
45024    #[link_name = "llvm.x86.avx512.cvttsd2si"]
45025    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
45026    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
45027    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
45028
45029    #[link_name = "llvm.x86.avx512.vcomi.ss"]
45030    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
45031    #[link_name = "llvm.x86.avx512.vcomi.sd"]
45032    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
45033
45034    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
45035    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
45036    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
45037    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
45038    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
45039    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
45040    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
45041    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
45042    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
45043    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
45044    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
45045    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
45046    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
45047    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
45048    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
45049    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
45050    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
45051    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
45052    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
45053    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
45054    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
45055    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
45056    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
45057    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
45058
45059}
45060
45061#[cfg(test)]
45062mod tests {
45063    use crate::core_arch::assert_eq_const as assert_eq;
45064
45065    use stdarch_test::simd_test;
45066
45067    use crate::core_arch::x86::*;
45068    use crate::hint::black_box;
45069    use crate::mem::{self};
45070
45071    #[simd_test(enable = "avx512f")]
45072    const fn test_mm512_abs_epi32() {
45073        #[rustfmt::skip]
45074        let a = _mm512_setr_epi32(
45075            0, 1, -1, i32::MAX,
45076            i32::MIN, 100, -100, -32,
45077            0, 1, -1, i32::MAX,
45078            i32::MIN, 100, -100, -32,
45079        );
45080        let r = _mm512_abs_epi32(a);
45081        #[rustfmt::skip]
45082        let e = _mm512_setr_epi32(
45083            0, 1, 1, i32::MAX,
45084            i32::MAX.wrapping_add(1), 100, 100, 32,
45085            0, 1, 1, i32::MAX,
45086            i32::MAX.wrapping_add(1), 100, 100, 32,
45087        );
45088        assert_eq_m512i(r, e);
45089    }
45090
45091    #[simd_test(enable = "avx512f")]
45092    const fn test_mm512_mask_abs_epi32() {
45093        #[rustfmt::skip]
45094        let a = _mm512_setr_epi32(
45095            0, 1, -1, i32::MAX,
45096            i32::MIN, 100, -100, -32,
45097            0, 1, -1, i32::MAX,
45098            i32::MIN, 100, -100, -32,
45099        );
45100        let r = _mm512_mask_abs_epi32(a, 0, a);
45101        assert_eq_m512i(r, a);
45102        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
45103        #[rustfmt::skip]
45104        let e = _mm512_setr_epi32(
45105            0, 1, 1, i32::MAX,
45106            i32::MAX.wrapping_add(1), 100, 100, 32,
45107            0, 1, -1, i32::MAX,
45108            i32::MIN, 100, -100, -32,
45109        );
45110        assert_eq_m512i(r, e);
45111    }
45112
45113    #[simd_test(enable = "avx512f")]
45114    const fn test_mm512_maskz_abs_epi32() {
45115        #[rustfmt::skip]
45116        let a = _mm512_setr_epi32(
45117            0, 1, -1, i32::MAX,
45118            i32::MIN, 100, -100, -32,
45119            0, 1, -1, i32::MAX,
45120            i32::MIN, 100, -100, -32,
45121        );
45122        let r = _mm512_maskz_abs_epi32(0, a);
45123        assert_eq_m512i(r, _mm512_setzero_si512());
45124        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
45125        #[rustfmt::skip]
45126        let e = _mm512_setr_epi32(
45127            0, 1, 1, i32::MAX,
45128            i32::MAX.wrapping_add(1), 100, 100, 32,
45129            0, 0, 0, 0,
45130            0, 0, 0, 0,
45131        );
45132        assert_eq_m512i(r, e);
45133    }
45134
45135    #[simd_test(enable = "avx512f,avx512vl")]
45136    const fn test_mm256_mask_abs_epi32() {
45137        #[rustfmt::skip]
45138        let a = _mm256_setr_epi32(
45139            0, 1, -1, i32::MAX,
45140            i32::MIN, 100, -100, -32,
45141        );
45142        let r = _mm256_mask_abs_epi32(a, 0, a);
45143        assert_eq_m256i(r, a);
45144        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
45145        #[rustfmt::skip]
45146        let e = _mm256_setr_epi32(
45147            0, 1, 1, i32::MAX,
45148            i32::MAX.wrapping_add(1), 100, -100, -32,
45149        );
45150        assert_eq_m256i(r, e);
45151    }
45152
45153    #[simd_test(enable = "avx512f,avx512vl")]
45154    const fn test_mm256_maskz_abs_epi32() {
45155        #[rustfmt::skip]
45156        let a = _mm256_setr_epi32(
45157            0, 1, -1, i32::MAX,
45158            i32::MIN, 100, -100, -32,
45159        );
45160        let r = _mm256_maskz_abs_epi32(0, a);
45161        assert_eq_m256i(r, _mm256_setzero_si256());
45162        let r = _mm256_maskz_abs_epi32(0b00001111, a);
45163        #[rustfmt::skip]
45164        let e = _mm256_setr_epi32(
45165            0, 1, 1, i32::MAX,
45166            0, 0, 0, 0,
45167        );
45168        assert_eq_m256i(r, e);
45169    }
45170
45171    #[simd_test(enable = "avx512f,avx512vl")]
45172    const fn test_mm_mask_abs_epi32() {
45173        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
45174        let r = _mm_mask_abs_epi32(a, 0, a);
45175        assert_eq_m128i(r, a);
45176        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
45177        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
45178        assert_eq_m128i(r, e);
45179    }
45180
45181    #[simd_test(enable = "avx512f,avx512vl")]
45182    const fn test_mm_maskz_abs_epi32() {
45183        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
45184        let r = _mm_maskz_abs_epi32(0, a);
45185        assert_eq_m128i(r, _mm_setzero_si128());
45186        let r = _mm_maskz_abs_epi32(0b00001111, a);
45187        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
45188        assert_eq_m128i(r, e);
45189    }
45190
45191    #[simd_test(enable = "avx512f")]
45192    const fn test_mm512_abs_ps() {
45193        #[rustfmt::skip]
45194        let a = _mm512_setr_ps(
45195            0., 1., -1., f32::MAX,
45196            f32::MIN, 100., -100., -32.,
45197            0., 1., -1., f32::MAX,
45198            f32::MIN, 100., -100., -32.,
45199        );
45200        let r = _mm512_abs_ps(a);
45201        #[rustfmt::skip]
45202        let e = _mm512_setr_ps(
45203            0., 1., 1., f32::MAX,
45204            f32::MAX, 100., 100., 32.,
45205            0., 1., 1., f32::MAX,
45206            f32::MAX, 100., 100., 32.,
45207        );
45208        assert_eq_m512(r, e);
45209    }
45210
45211    #[simd_test(enable = "avx512f")]
45212    const fn test_mm512_mask_abs_ps() {
45213        #[rustfmt::skip]
45214        let a = _mm512_setr_ps(
45215            0., 1., -1., f32::MAX,
45216            f32::MIN, 100., -100., -32.,
45217            0., 1., -1., f32::MAX,
45218            f32::MIN, 100., -100., -32.,
45219        );
45220        let r = _mm512_mask_abs_ps(a, 0, a);
45221        assert_eq_m512(r, a);
45222        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
45223        #[rustfmt::skip]
45224        let e = _mm512_setr_ps(
45225            0., 1., 1., f32::MAX,
45226            f32::MAX, 100., 100., 32.,
45227            0., 1., -1., f32::MAX,
45228            f32::MIN, 100., -100., -32.,
45229        );
45230        assert_eq_m512(r, e);
45231    }
45232
45233    #[simd_test(enable = "avx512f")]
45234    const fn test_mm512_mask_mov_epi32() {
45235        let src = _mm512_set1_epi32(1);
45236        let a = _mm512_set1_epi32(2);
45237        let r = _mm512_mask_mov_epi32(src, 0, a);
45238        assert_eq_m512i(r, src);
45239        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
45240        assert_eq_m512i(r, a);
45241    }
45242
45243    #[simd_test(enable = "avx512f")]
45244    const fn test_mm512_maskz_mov_epi32() {
45245        let a = _mm512_set1_epi32(2);
45246        let r = _mm512_maskz_mov_epi32(0, a);
45247        assert_eq_m512i(r, _mm512_setzero_si512());
45248        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
45249        assert_eq_m512i(r, a);
45250    }
45251
45252    #[simd_test(enable = "avx512f,avx512vl")]
45253    const fn test_mm256_mask_mov_epi32() {
45254        let src = _mm256_set1_epi32(1);
45255        let a = _mm256_set1_epi32(2);
45256        let r = _mm256_mask_mov_epi32(src, 0, a);
45257        assert_eq_m256i(r, src);
45258        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
45259        assert_eq_m256i(r, a);
45260    }
45261
45262    #[simd_test(enable = "avx512f,avx512vl")]
45263    const fn test_mm256_maskz_mov_epi32() {
45264        let a = _mm256_set1_epi32(2);
45265        let r = _mm256_maskz_mov_epi32(0, a);
45266        assert_eq_m256i(r, _mm256_setzero_si256());
45267        let r = _mm256_maskz_mov_epi32(0b11111111, a);
45268        assert_eq_m256i(r, a);
45269    }
45270
45271    #[simd_test(enable = "avx512f,avx512vl")]
45272    const fn test_mm_mask_mov_epi32() {
45273        let src = _mm_set1_epi32(1);
45274        let a = _mm_set1_epi32(2);
45275        let r = _mm_mask_mov_epi32(src, 0, a);
45276        assert_eq_m128i(r, src);
45277        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
45278        assert_eq_m128i(r, a);
45279    }
45280
45281    #[simd_test(enable = "avx512f,avx512vl")]
45282    const fn test_mm_maskz_mov_epi32() {
45283        let a = _mm_set1_epi32(2);
45284        let r = _mm_maskz_mov_epi32(0, a);
45285        assert_eq_m128i(r, _mm_setzero_si128());
45286        let r = _mm_maskz_mov_epi32(0b00001111, a);
45287        assert_eq_m128i(r, a);
45288    }
45289
45290    #[simd_test(enable = "avx512f")]
45291    const fn test_mm512_mask_mov_ps() {
45292        let src = _mm512_set1_ps(1.);
45293        let a = _mm512_set1_ps(2.);
45294        let r = _mm512_mask_mov_ps(src, 0, a);
45295        assert_eq_m512(r, src);
45296        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
45297        assert_eq_m512(r, a);
45298    }
45299
45300    #[simd_test(enable = "avx512f")]
45301    const fn test_mm512_maskz_mov_ps() {
45302        let a = _mm512_set1_ps(2.);
45303        let r = _mm512_maskz_mov_ps(0, a);
45304        assert_eq_m512(r, _mm512_setzero_ps());
45305        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
45306        assert_eq_m512(r, a);
45307    }
45308
45309    #[simd_test(enable = "avx512f,avx512vl")]
45310    const fn test_mm256_mask_mov_ps() {
45311        let src = _mm256_set1_ps(1.);
45312        let a = _mm256_set1_ps(2.);
45313        let r = _mm256_mask_mov_ps(src, 0, a);
45314        assert_eq_m256(r, src);
45315        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
45316        assert_eq_m256(r, a);
45317    }
45318
45319    #[simd_test(enable = "avx512f,avx512vl")]
45320    const fn test_mm256_maskz_mov_ps() {
45321        let a = _mm256_set1_ps(2.);
45322        let r = _mm256_maskz_mov_ps(0, a);
45323        assert_eq_m256(r, _mm256_setzero_ps());
45324        let r = _mm256_maskz_mov_ps(0b11111111, a);
45325        assert_eq_m256(r, a);
45326    }
45327
45328    #[simd_test(enable = "avx512f,avx512vl")]
45329    const fn test_mm_mask_mov_ps() {
45330        let src = _mm_set1_ps(1.);
45331        let a = _mm_set1_ps(2.);
45332        let r = _mm_mask_mov_ps(src, 0, a);
45333        assert_eq_m128(r, src);
45334        let r = _mm_mask_mov_ps(src, 0b00001111, a);
45335        assert_eq_m128(r, a);
45336    }
45337
45338    #[simd_test(enable = "avx512f,avx512vl")]
45339    const fn test_mm_maskz_mov_ps() {
45340        let a = _mm_set1_ps(2.);
45341        let r = _mm_maskz_mov_ps(0, a);
45342        assert_eq_m128(r, _mm_setzero_ps());
45343        let r = _mm_maskz_mov_ps(0b00001111, a);
45344        assert_eq_m128(r, a);
45345    }
45346
45347    #[simd_test(enable = "avx512f")]
45348    const fn test_mm512_add_epi32() {
45349        #[rustfmt::skip]
45350        let a = _mm512_setr_epi32(
45351            0, 1, -1, i32::MAX,
45352            i32::MIN, 100, -100, -32,
45353            0, 1, -1, i32::MAX,
45354            i32::MIN, 100, -100, -32,
45355        );
45356        let b = _mm512_set1_epi32(1);
45357        let r = _mm512_add_epi32(a, b);
45358        #[rustfmt::skip]
45359        let e = _mm512_setr_epi32(
45360            1, 2, 0, i32::MIN,
45361            i32::MIN + 1, 101, -99, -31,
45362            1, 2, 0, i32::MIN,
45363            i32::MIN + 1, 101, -99, -31,
45364        );
45365        assert_eq_m512i(r, e);
45366    }
45367
45368    #[simd_test(enable = "avx512f")]
45369    const fn test_mm512_mask_add_epi32() {
45370        #[rustfmt::skip]
45371        let a = _mm512_setr_epi32(
45372            0, 1, -1, i32::MAX,
45373            i32::MIN, 100, -100, -32,
45374            0, 1, -1, i32::MAX,
45375            i32::MIN, 100, -100, -32,
45376        );
45377        let b = _mm512_set1_epi32(1);
45378        let r = _mm512_mask_add_epi32(a, 0, a, b);
45379        assert_eq_m512i(r, a);
45380        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
45381        #[rustfmt::skip]
45382        let e = _mm512_setr_epi32(
45383            1, 2, 0, i32::MIN,
45384            i32::MIN + 1, 101, -99, -31,
45385            0, 1, -1, i32::MAX,
45386            i32::MIN, 100, -100, -32,
45387        );
45388        assert_eq_m512i(r, e);
45389    }
45390
45391    #[simd_test(enable = "avx512f")]
45392    const fn test_mm512_maskz_add_epi32() {
45393        #[rustfmt::skip]
45394        let a = _mm512_setr_epi32(
45395            0, 1, -1, i32::MAX,
45396            i32::MIN, 100, -100, -32,
45397            0, 1, -1, i32::MAX,
45398            i32::MIN, 100, -100, -32,
45399        );
45400        let b = _mm512_set1_epi32(1);
45401        let r = _mm512_maskz_add_epi32(0, a, b);
45402        assert_eq_m512i(r, _mm512_setzero_si512());
45403        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
45404        #[rustfmt::skip]
45405        let e = _mm512_setr_epi32(
45406            1, 2, 0, i32::MIN,
45407            i32::MIN + 1, 101, -99, -31,
45408            0, 0, 0, 0,
45409            0, 0, 0, 0,
45410        );
45411        assert_eq_m512i(r, e);
45412    }
45413
45414    #[simd_test(enable = "avx512f,avx512vl")]
45415    const fn test_mm256_mask_add_epi32() {
45416        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45417        let b = _mm256_set1_epi32(1);
45418        let r = _mm256_mask_add_epi32(a, 0, a, b);
45419        assert_eq_m256i(r, a);
45420        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
45421        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
45422        assert_eq_m256i(r, e);
45423    }
45424
45425    #[simd_test(enable = "avx512f,avx512vl")]
45426    const fn test_mm256_maskz_add_epi32() {
45427        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45428        let b = _mm256_set1_epi32(1);
45429        let r = _mm256_maskz_add_epi32(0, a, b);
45430        assert_eq_m256i(r, _mm256_setzero_si256());
45431        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
45432        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
45433        assert_eq_m256i(r, e);
45434    }
45435
45436    #[simd_test(enable = "avx512f,avx512vl")]
45437    const fn test_mm_mask_add_epi32() {
45438        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45439        let b = _mm_set1_epi32(1);
45440        let r = _mm_mask_add_epi32(a, 0, a, b);
45441        assert_eq_m128i(r, a);
45442        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
45443        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
45444        assert_eq_m128i(r, e);
45445    }
45446
45447    #[simd_test(enable = "avx512f,avx512vl")]
45448    const fn test_mm_maskz_add_epi32() {
45449        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
45450        let b = _mm_set1_epi32(1);
45451        let r = _mm_maskz_add_epi32(0, a, b);
45452        assert_eq_m128i(r, _mm_setzero_si128());
45453        let r = _mm_maskz_add_epi32(0b00001111, a, b);
45454        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
45455        assert_eq_m128i(r, e);
45456    }
45457
45458    #[simd_test(enable = "avx512f")]
45459    const fn test_mm512_add_ps() {
45460        #[rustfmt::skip]
45461        let a = _mm512_setr_ps(
45462            0., 1., -1., f32::MAX,
45463            f32::MIN, 100., -100., -32.,
45464            0., 1., -1., f32::MAX,
45465            f32::MIN, 100., -100., -32.,
45466        );
45467        let b = _mm512_set1_ps(1.);
45468        let r = _mm512_add_ps(a, b);
45469        #[rustfmt::skip]
45470        let e = _mm512_setr_ps(
45471            1., 2., 0., f32::MAX,
45472            f32::MIN + 1., 101., -99., -31.,
45473            1., 2., 0., f32::MAX,
45474            f32::MIN + 1., 101., -99., -31.,
45475        );
45476        assert_eq_m512(r, e);
45477    }
45478
45479    #[simd_test(enable = "avx512f")]
45480    const fn test_mm512_mask_add_ps() {
45481        #[rustfmt::skip]
45482        let a = _mm512_setr_ps(
45483            0., 1., -1., f32::MAX,
45484            f32::MIN, 100., -100., -32.,
45485            0., 1., -1., f32::MAX,
45486            f32::MIN, 100., -100., -32.,
45487        );
45488        let b = _mm512_set1_ps(1.);
45489        let r = _mm512_mask_add_ps(a, 0, a, b);
45490        assert_eq_m512(r, a);
45491        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
45492        #[rustfmt::skip]
45493        let e = _mm512_setr_ps(
45494            1., 2., 0., f32::MAX,
45495            f32::MIN + 1., 101., -99., -31.,
45496            0., 1., -1., f32::MAX,
45497            f32::MIN, 100., -100., -32.,
45498        );
45499        assert_eq_m512(r, e);
45500    }
45501
45502    #[simd_test(enable = "avx512f")]
45503    const fn test_mm512_maskz_add_ps() {
45504        #[rustfmt::skip]
45505        let a = _mm512_setr_ps(
45506            0., 1., -1., f32::MAX,
45507            f32::MIN, 100., -100., -32.,
45508            0., 1., -1., f32::MAX,
45509            f32::MIN, 100., -100., -32.,
45510        );
45511        let b = _mm512_set1_ps(1.);
45512        let r = _mm512_maskz_add_ps(0, a, b);
45513        assert_eq_m512(r, _mm512_setzero_ps());
45514        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
45515        #[rustfmt::skip]
45516        let e = _mm512_setr_ps(
45517            1., 2., 0., f32::MAX,
45518            f32::MIN + 1., 101., -99., -31.,
45519            0., 0., 0., 0.,
45520            0., 0., 0., 0.,
45521        );
45522        assert_eq_m512(r, e);
45523    }
45524
45525    #[simd_test(enable = "avx512f,avx512vl")]
45526    const fn test_mm256_mask_add_ps() {
45527        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45528        let b = _mm256_set1_ps(1.);
45529        let r = _mm256_mask_add_ps(a, 0, a, b);
45530        assert_eq_m256(r, a);
45531        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
45532        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
45533        assert_eq_m256(r, e);
45534    }
45535
45536    #[simd_test(enable = "avx512f,avx512vl")]
45537    const fn test_mm256_maskz_add_ps() {
45538        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45539        let b = _mm256_set1_ps(1.);
45540        let r = _mm256_maskz_add_ps(0, a, b);
45541        assert_eq_m256(r, _mm256_setzero_ps());
45542        let r = _mm256_maskz_add_ps(0b11111111, a, b);
45543        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
45544        assert_eq_m256(r, e);
45545    }
45546
45547    #[simd_test(enable = "avx512f,avx512vl")]
45548    const fn test_mm_mask_add_ps() {
45549        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45550        let b = _mm_set1_ps(1.);
45551        let r = _mm_mask_add_ps(a, 0, a, b);
45552        assert_eq_m128(r, a);
45553        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
45554        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
45555        assert_eq_m128(r, e);
45556    }
45557
45558    #[simd_test(enable = "avx512f,avx512vl")]
45559    const fn test_mm_maskz_add_ps() {
45560        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45561        let b = _mm_set1_ps(1.);
45562        let r = _mm_maskz_add_ps(0, a, b);
45563        assert_eq_m128(r, _mm_setzero_ps());
45564        let r = _mm_maskz_add_ps(0b00001111, a, b);
45565        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
45566        assert_eq_m128(r, e);
45567    }
45568
45569    #[simd_test(enable = "avx512f")]
45570    const fn test_mm512_sub_epi32() {
45571        #[rustfmt::skip]
45572        let a = _mm512_setr_epi32(
45573            0, 1, -1, i32::MAX,
45574            i32::MIN, 100, -100, -32,
45575            0, 1, -1, i32::MAX,
45576            i32::MIN, 100, -100, -32,
45577        );
45578        let b = _mm512_set1_epi32(1);
45579        let r = _mm512_sub_epi32(a, b);
45580        #[rustfmt::skip]
45581        let e = _mm512_setr_epi32(
45582            -1, 0, -2, i32::MAX - 1,
45583            i32::MAX, 99, -101, -33,
45584            -1, 0, -2, i32::MAX - 1,
45585            i32::MAX, 99, -101, -33,
45586        );
45587        assert_eq_m512i(r, e);
45588    }
45589
45590    #[simd_test(enable = "avx512f")]
45591    const fn test_mm512_mask_sub_epi32() {
45592        #[rustfmt::skip]
45593        let a = _mm512_setr_epi32(
45594            0, 1, -1, i32::MAX,
45595            i32::MIN, 100, -100, -32,
45596            0, 1, -1, i32::MAX,
45597            i32::MIN, 100, -100, -32,
45598        );
45599        let b = _mm512_set1_epi32(1);
45600        let r = _mm512_mask_sub_epi32(a, 0, a, b);
45601        assert_eq_m512i(r, a);
45602        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
45603        #[rustfmt::skip]
45604        let e = _mm512_setr_epi32(
45605            -1, 0, -2, i32::MAX - 1,
45606            i32::MAX, 99, -101, -33,
45607            0, 1, -1, i32::MAX,
45608            i32::MIN, 100, -100, -32,
45609        );
45610        assert_eq_m512i(r, e);
45611    }
45612
45613    #[simd_test(enable = "avx512f")]
45614    const fn test_mm512_maskz_sub_epi32() {
45615        #[rustfmt::skip]
45616        let a = _mm512_setr_epi32(
45617            0, 1, -1, i32::MAX,
45618            i32::MIN, 100, -100, -32,
45619            0, 1, -1, i32::MAX,
45620            i32::MIN, 100, -100, -32,
45621        );
45622        let b = _mm512_set1_epi32(1);
45623        let r = _mm512_maskz_sub_epi32(0, a, b);
45624        assert_eq_m512i(r, _mm512_setzero_si512());
45625        let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
45626        #[rustfmt::skip]
45627        let e = _mm512_setr_epi32(
45628            -1, 0, -2, i32::MAX - 1,
45629            i32::MAX, 99, -101, -33,
45630            0, 0, 0, 0,
45631            0, 0, 0, 0,
45632        );
45633        assert_eq_m512i(r, e);
45634    }
45635
45636    #[simd_test(enable = "avx512f,avx512vl")]
45637    const fn test_mm256_mask_sub_epi32() {
45638        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45639        let b = _mm256_set1_epi32(1);
45640        let r = _mm256_mask_sub_epi32(a, 0, a, b);
45641        assert_eq_m256i(r, a);
45642        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
45643        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
45644        assert_eq_m256i(r, e);
45645    }
45646
45647    #[simd_test(enable = "avx512f,avx512vl")]
45648    const fn test_mm256_maskz_sub_epi32() {
45649        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45650        let b = _mm256_set1_epi32(1);
45651        let r = _mm256_maskz_sub_epi32(0, a, b);
45652        assert_eq_m256i(r, _mm256_setzero_si256());
45653        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
45654        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
45655        assert_eq_m256i(r, e);
45656    }
45657
45658    #[simd_test(enable = "avx512f,avx512vl")]
45659    const fn test_mm_mask_sub_epi32() {
45660        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45661        let b = _mm_set1_epi32(1);
45662        let r = _mm_mask_sub_epi32(a, 0, a, b);
45663        assert_eq_m128i(r, a);
45664        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
45665        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
45666        assert_eq_m128i(r, e);
45667    }
45668
45669    #[simd_test(enable = "avx512f,avx512vl")]
45670    const fn test_mm_maskz_sub_epi32() {
45671        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45672        let b = _mm_set1_epi32(1);
45673        let r = _mm_maskz_sub_epi32(0, a, b);
45674        assert_eq_m128i(r, _mm_setzero_si128());
45675        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
45676        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
45677        assert_eq_m128i(r, e);
45678    }
45679
45680    #[simd_test(enable = "avx512f")]
45681    const fn test_mm512_sub_ps() {
45682        #[rustfmt::skip]
45683        let a = _mm512_setr_ps(
45684            0., 1., -1., f32::MAX,
45685            f32::MIN, 100., -100., -32.,
45686            0., 1., -1., f32::MAX,
45687            f32::MIN, 100., -100., -32.,
45688        );
45689        let b = _mm512_set1_ps(1.);
45690        let r = _mm512_sub_ps(a, b);
45691        #[rustfmt::skip]
45692        let e = _mm512_setr_ps(
45693            -1., 0., -2., f32::MAX - 1.,
45694            f32::MIN, 99., -101., -33.,
45695            -1., 0., -2., f32::MAX - 1.,
45696            f32::MIN, 99., -101., -33.,
45697        );
45698        assert_eq_m512(r, e);
45699    }
45700
45701    #[simd_test(enable = "avx512f")]
45702    const fn test_mm512_mask_sub_ps() {
45703        #[rustfmt::skip]
45704        let a = _mm512_setr_ps(
45705            0., 1., -1., f32::MAX,
45706            f32::MIN, 100., -100., -32.,
45707            0., 1., -1., f32::MAX,
45708            f32::MIN, 100., -100., -32.,
45709        );
45710        let b = _mm512_set1_ps(1.);
45711        let r = _mm512_mask_sub_ps(a, 0, a, b);
45712        assert_eq_m512(r, a);
45713        let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
45714        #[rustfmt::skip]
45715        let e = _mm512_setr_ps(
45716            -1., 0., -2., f32::MAX - 1.,
45717            f32::MIN, 99., -101., -33.,
45718            0., 1., -1., f32::MAX,
45719            f32::MIN, 100., -100., -32.,
45720        );
45721        assert_eq_m512(r, e);
45722    }
45723
45724    #[simd_test(enable = "avx512f")]
45725    const fn test_mm512_maskz_sub_ps() {
45726        #[rustfmt::skip]
45727        let a = _mm512_setr_ps(
45728            0., 1., -1., f32::MAX,
45729            f32::MIN, 100., -100., -32.,
45730            0., 1., -1., f32::MAX,
45731            f32::MIN, 100., -100., -32.,
45732        );
45733        let b = _mm512_set1_ps(1.);
45734        let r = _mm512_maskz_sub_ps(0, a, b);
45735        assert_eq_m512(r, _mm512_setzero_ps());
45736        let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
45737        #[rustfmt::skip]
45738        let e = _mm512_setr_ps(
45739            -1., 0., -2., f32::MAX - 1.,
45740            f32::MIN, 99., -101., -33.,
45741            0., 0., 0., 0.,
45742            0., 0., 0., 0.,
45743        );
45744        assert_eq_m512(r, e);
45745    }
45746
45747    #[simd_test(enable = "avx512f,avx512vl")]
45748    const fn test_mm256_mask_sub_ps() {
45749        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45750        let b = _mm256_set1_ps(1.);
45751        let r = _mm256_mask_sub_ps(a, 0, a, b);
45752        assert_eq_m256(r, a);
45753        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
45754        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
45755        assert_eq_m256(r, e);
45756    }
45757
45758    #[simd_test(enable = "avx512f,avx512vl")]
45759    const fn test_mm256_maskz_sub_ps() {
45760        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45761        let b = _mm256_set1_ps(1.);
45762        let r = _mm256_maskz_sub_ps(0, a, b);
45763        assert_eq_m256(r, _mm256_setzero_ps());
45764        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
45765        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
45766        assert_eq_m256(r, e);
45767    }
45768
45769    #[simd_test(enable = "avx512f,avx512vl")]
45770    const fn test_mm_mask_sub_ps() {
45771        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45772        let b = _mm_set1_ps(1.);
45773        let r = _mm_mask_sub_ps(a, 0, a, b);
45774        assert_eq_m128(r, a);
45775        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
45776        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
45777        assert_eq_m128(r, e);
45778    }
45779
45780    #[simd_test(enable = "avx512f,avx512vl")]
45781    const fn test_mm_maskz_sub_ps() {
45782        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45783        let b = _mm_set1_ps(1.);
45784        let r = _mm_maskz_sub_ps(0, a, b);
45785        assert_eq_m128(r, _mm_setzero_ps());
45786        let r = _mm_maskz_sub_ps(0b00001111, a, b);
45787        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
45788        assert_eq_m128(r, e);
45789    }
45790
45791    #[simd_test(enable = "avx512f")]
45792    const fn test_mm512_mullo_epi32() {
45793        #[rustfmt::skip]
45794        let a = _mm512_setr_epi32(
45795            0, 1, -1, i32::MAX,
45796            i32::MIN, 100, -100, -32,
45797            0, 1, -1, i32::MAX,
45798            i32::MIN, 100, -100, -32,
45799        );
45800        let b = _mm512_set1_epi32(2);
45801        let r = _mm512_mullo_epi32(a, b);
45802        let e = _mm512_setr_epi32(
45803            0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
45804        );
45805        assert_eq_m512i(r, e);
45806    }
45807
45808    #[simd_test(enable = "avx512f")]
45809    const fn test_mm512_mask_mullo_epi32() {
45810        #[rustfmt::skip]
45811        let a = _mm512_setr_epi32(
45812            0, 1, -1, i32::MAX,
45813            i32::MIN, 100, -100, -32,
45814            0, 1, -1, i32::MAX,
45815            i32::MIN, 100, -100, -32,
45816        );
45817        let b = _mm512_set1_epi32(2);
45818        let r = _mm512_mask_mullo_epi32(a, 0, a, b);
45819        assert_eq_m512i(r, a);
45820        let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
45821        #[rustfmt::skip]
45822        let e = _mm512_setr_epi32(
45823            0, 2, -2, -2,
45824            0, 200, -200, -64,
45825            0, 1, -1, i32::MAX,
45826            i32::MIN, 100, -100, -32,
45827        );
45828        assert_eq_m512i(r, e);
45829    }
45830
45831    #[simd_test(enable = "avx512f")]
45832    const fn test_mm512_maskz_mullo_epi32() {
45833        #[rustfmt::skip]
45834        let a = _mm512_setr_epi32(
45835            0, 1, -1, i32::MAX,
45836            i32::MIN, 100, -100, -32,
45837            0, 1, -1, i32::MAX,
45838            i32::MIN, 100, -100, -32,
45839        );
45840        let b = _mm512_set1_epi32(2);
45841        let r = _mm512_maskz_mullo_epi32(0, a, b);
45842        assert_eq_m512i(r, _mm512_setzero_si512());
45843        let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
45844        let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
45845        assert_eq_m512i(r, e);
45846    }
45847
45848    #[simd_test(enable = "avx512f,avx512vl")]
45849    const fn test_mm256_mask_mullo_epi32() {
45850        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45851        let b = _mm256_set1_epi32(2);
45852        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
45853        assert_eq_m256i(r, a);
45854        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
45855        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
45856        assert_eq_m256i(r, e);
45857    }
45858
45859    #[simd_test(enable = "avx512f,avx512vl")]
45860    const fn test_mm256_maskz_mullo_epi32() {
45861        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45862        let b = _mm256_set1_epi32(2);
45863        let r = _mm256_maskz_mullo_epi32(0, a, b);
45864        assert_eq_m256i(r, _mm256_setzero_si256());
45865        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
45866        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
45867        assert_eq_m256i(r, e);
45868    }
45869
45870    #[simd_test(enable = "avx512f,avx512vl")]
45871    const fn test_mm_mask_mullo_epi32() {
45872        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45873        let b = _mm_set1_epi32(2);
45874        let r = _mm_mask_mullo_epi32(a, 0, a, b);
45875        assert_eq_m128i(r, a);
45876        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
45877        let e = _mm_set_epi32(2, -2, -2, 0);
45878        assert_eq_m128i(r, e);
45879    }
45880
45881    #[simd_test(enable = "avx512f,avx512vl")]
45882    const fn test_mm_maskz_mullo_epi32() {
45883        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45884        let b = _mm_set1_epi32(2);
45885        let r = _mm_maskz_mullo_epi32(0, a, b);
45886        assert_eq_m128i(r, _mm_setzero_si128());
45887        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
45888        let e = _mm_set_epi32(2, -2, -2, 0);
45889        assert_eq_m128i(r, e);
45890    }
45891
45892    #[simd_test(enable = "avx512f")]
45893    const fn test_mm512_mul_ps() {
45894        #[rustfmt::skip]
45895        let a = _mm512_setr_ps(
45896            0., 1., -1., f32::MAX,
45897            f32::MIN, 100., -100., -32.,
45898            0., 1., -1., f32::MAX,
45899            f32::MIN, 100., -100., -32.,
45900        );
45901        let b = _mm512_set1_ps(2.);
45902        let r = _mm512_mul_ps(a, b);
45903        #[rustfmt::skip]
45904        let e = _mm512_setr_ps(
45905            0., 2., -2., f32::INFINITY,
45906            f32::NEG_INFINITY, 200., -200., -64.,
45907            0., 2., -2., f32::INFINITY,
45908            f32::NEG_INFINITY, 200., -200.,
45909            -64.,
45910        );
45911        assert_eq_m512(r, e);
45912    }
45913
45914    #[simd_test(enable = "avx512f")]
45915    const fn test_mm512_mask_mul_ps() {
45916        #[rustfmt::skip]
45917        let a = _mm512_setr_ps(
45918            0., 1., -1., f32::MAX,
45919            f32::MIN, 100., -100., -32.,
45920            0., 1., -1., f32::MAX,
45921            f32::MIN, 100., -100., -32.,
45922        );
45923        let b = _mm512_set1_ps(2.);
45924        let r = _mm512_mask_mul_ps(a, 0, a, b);
45925        assert_eq_m512(r, a);
45926        let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
45927        #[rustfmt::skip]
45928        let e = _mm512_setr_ps(
45929            0., 2., -2., f32::INFINITY,
45930            f32::NEG_INFINITY, 200., -200., -64.,
45931            0., 1., -1., f32::MAX,
45932            f32::MIN, 100., -100., -32.,
45933        );
45934        assert_eq_m512(r, e);
45935    }
45936
45937    #[simd_test(enable = "avx512f")]
45938    const fn test_mm512_maskz_mul_ps() {
45939        #[rustfmt::skip]
45940        let a = _mm512_setr_ps(
45941            0., 1., -1., f32::MAX,
45942            f32::MIN, 100., -100., -32.,
45943            0., 1., -1., f32::MAX,
45944            f32::MIN, 100., -100., -32.,
45945        );
45946        let b = _mm512_set1_ps(2.);
45947        let r = _mm512_maskz_mul_ps(0, a, b);
45948        assert_eq_m512(r, _mm512_setzero_ps());
45949        let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
45950        #[rustfmt::skip]
45951        let e = _mm512_setr_ps(
45952            0., 2., -2., f32::INFINITY,
45953            f32::NEG_INFINITY, 200., -200., -64.,
45954            0., 0., 0., 0.,
45955            0., 0., 0., 0.,
45956        );
45957        assert_eq_m512(r, e);
45958    }
45959
45960    #[simd_test(enable = "avx512f,avx512vl")]
45961    const fn test_mm256_mask_mul_ps() {
45962        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45963        let b = _mm256_set1_ps(2.);
45964        let r = _mm256_mask_mul_ps(a, 0, a, b);
45965        assert_eq_m256(r, a);
45966        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
45967        #[rustfmt::skip]
45968        let e = _mm256_set_ps(
45969            0., 2., -2., f32::INFINITY,
45970            f32::NEG_INFINITY, 200., -200., -64.,
45971        );
45972        assert_eq_m256(r, e);
45973    }
45974
45975    #[simd_test(enable = "avx512f,avx512vl")]
45976    const fn test_mm256_maskz_mul_ps() {
45977        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45978        let b = _mm256_set1_ps(2.);
45979        let r = _mm256_maskz_mul_ps(0, a, b);
45980        assert_eq_m256(r, _mm256_setzero_ps());
45981        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
45982        #[rustfmt::skip]
45983        let e = _mm256_set_ps(
45984            0., 2., -2., f32::INFINITY,
45985            f32::NEG_INFINITY, 200., -200., -64.,
45986        );
45987        assert_eq_m256(r, e);
45988    }
45989
45990    #[simd_test(enable = "avx512f,avx512vl")]
45991    const fn test_mm_mask_mul_ps() {
45992        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45993        let b = _mm_set1_ps(2.);
45994        let r = _mm_mask_mul_ps(a, 0, a, b);
45995        assert_eq_m128(r, a);
45996        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
45997        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
45998        assert_eq_m128(r, e);
45999    }
46000
46001    #[simd_test(enable = "avx512f,avx512vl")]
46002    const fn test_mm_maskz_mul_ps() {
46003        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
46004        let b = _mm_set1_ps(2.);
46005        let r = _mm_maskz_mul_ps(0, a, b);
46006        assert_eq_m128(r, _mm_setzero_ps());
46007        let r = _mm_maskz_mul_ps(0b00001111, a, b);
46008        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
46009        assert_eq_m128(r, e);
46010    }
46011
46012    #[simd_test(enable = "avx512f")]
46013    const fn test_mm512_div_ps() {
46014        let a = _mm512_setr_ps(
46015            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46016        );
46017        let b = _mm512_setr_ps(
46018            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46019        );
46020        let r = _mm512_div_ps(a, b);
46021        #[rustfmt::skip]
46022        let e = _mm512_setr_ps(
46023            0., 0.5, -0.5, -1.,
46024            50., f32::INFINITY, -50., -16.,
46025            0., 0.5, -0.5, 500.,
46026            f32::NEG_INFINITY, 50., -50., -16.,
46027        );
46028        assert_eq_m512(r, e); // 0/0 = NAN
46029    }
46030
46031    #[simd_test(enable = "avx512f")]
46032    const fn test_mm512_mask_div_ps() {
46033        let a = _mm512_setr_ps(
46034            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46035        );
46036        let b = _mm512_setr_ps(
46037            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46038        );
46039        let r = _mm512_mask_div_ps(a, 0, a, b);
46040        assert_eq_m512(r, a);
46041        let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
46042        #[rustfmt::skip]
46043        let e = _mm512_setr_ps(
46044            0., 0.5, -0.5, -1.,
46045            50., f32::INFINITY, -50., -16.,
46046            0., 1., -1., 1000.,
46047            -131., 100., -100., -32.,
46048        );
46049        assert_eq_m512(r, e);
46050    }
46051
46052    #[simd_test(enable = "avx512f")]
46053    const fn test_mm512_maskz_div_ps() {
46054        let a = _mm512_setr_ps(
46055            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46056        );
46057        let b = _mm512_setr_ps(
46058            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46059        );
46060        let r = _mm512_maskz_div_ps(0, a, b);
46061        assert_eq_m512(r, _mm512_setzero_ps());
46062        let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
46063        #[rustfmt::skip]
46064        let e = _mm512_setr_ps(
46065            0., 0.5, -0.5, -1.,
46066            50., f32::INFINITY, -50., -16.,
46067            0., 0., 0., 0.,
46068            0., 0., 0., 0.,
46069        );
46070        assert_eq_m512(r, e);
46071    }
46072
46073    #[simd_test(enable = "avx512f,avx512vl")]
46074    const fn test_mm256_mask_div_ps() {
46075        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
46076        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
46077        let r = _mm256_mask_div_ps(a, 0, a, b);
46078        assert_eq_m256(r, a);
46079        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
46080        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
46081        assert_eq_m256(r, e);
46082    }
46083
46084    #[simd_test(enable = "avx512f,avx512vl")]
46085    const fn test_mm256_maskz_div_ps() {
46086        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
46087        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
46088        let r = _mm256_maskz_div_ps(0, a, b);
46089        assert_eq_m256(r, _mm256_setzero_ps());
46090        let r = _mm256_maskz_div_ps(0b11111111, a, b);
46091        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
46092        assert_eq_m256(r, e);
46093    }
46094
46095    #[simd_test(enable = "avx512f,avx512vl")]
46096    const fn test_mm_mask_div_ps() {
46097        let a = _mm_set_ps(100., 100., -100., -32.);
46098        let b = _mm_set_ps(2., 0., 2., 2.);
46099        let r = _mm_mask_div_ps(a, 0, a, b);
46100        assert_eq_m128(r, a);
46101        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
46102        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
46103        assert_eq_m128(r, e);
46104    }
46105
46106    #[simd_test(enable = "avx512f,avx512vl")]
46107    const fn test_mm_maskz_div_ps() {
46108        let a = _mm_set_ps(100., 100., -100., -32.);
46109        let b = _mm_set_ps(2., 0., 2., 2.);
46110        let r = _mm_maskz_div_ps(0, a, b);
46111        assert_eq_m128(r, _mm_setzero_ps());
46112        let r = _mm_maskz_div_ps(0b00001111, a, b);
46113        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
46114        assert_eq_m128(r, e);
46115    }
46116
46117    #[simd_test(enable = "avx512f")]
46118    const fn test_mm512_max_epi32() {
46119        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46120        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46121        let r = _mm512_max_epi32(a, b);
46122        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46123        assert_eq_m512i(r, e);
46124    }
46125
46126    #[simd_test(enable = "avx512f")]
46127    const fn test_mm512_mask_max_epi32() {
46128        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46129        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46130        let r = _mm512_mask_max_epi32(a, 0, a, b);
46131        assert_eq_m512i(r, a);
46132        let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
46133        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46134        assert_eq_m512i(r, e);
46135    }
46136
46137    #[simd_test(enable = "avx512f")]
46138    const fn test_mm512_maskz_max_epi32() {
46139        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46140        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46141        let r = _mm512_maskz_max_epi32(0, a, b);
46142        assert_eq_m512i(r, _mm512_setzero_si512());
46143        let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
46144        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
46145        assert_eq_m512i(r, e);
46146    }
46147
46148    #[simd_test(enable = "avx512f,avx512vl")]
46149    const fn test_mm256_mask_max_epi32() {
46150        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46151        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46152        let r = _mm256_mask_max_epi32(a, 0, a, b);
46153        assert_eq_m256i(r, a);
46154        let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
46155        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46156        assert_eq_m256i(r, e);
46157    }
46158
46159    #[simd_test(enable = "avx512f,avx512vl")]
46160    const fn test_mm256_maskz_max_epi32() {
46161        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46162        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46163        let r = _mm256_maskz_max_epi32(0, a, b);
46164        assert_eq_m256i(r, _mm256_setzero_si256());
46165        let r = _mm256_maskz_max_epi32(0b11111111, a, b);
46166        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46167        assert_eq_m256i(r, e);
46168    }
46169
46170    #[simd_test(enable = "avx512f,avx512vl")]
46171    const fn test_mm_mask_max_epi32() {
46172        let a = _mm_set_epi32(0, 1, 2, 3);
46173        let b = _mm_set_epi32(3, 2, 1, 0);
46174        let r = _mm_mask_max_epi32(a, 0, a, b);
46175        assert_eq_m128i(r, a);
46176        let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
46177        let e = _mm_set_epi32(3, 2, 2, 3);
46178        assert_eq_m128i(r, e);
46179    }
46180
46181    #[simd_test(enable = "avx512f,avx512vl")]
46182    const fn test_mm_maskz_max_epi32() {
46183        let a = _mm_set_epi32(0, 1, 2, 3);
46184        let b = _mm_set_epi32(3, 2, 1, 0);
46185        let r = _mm_maskz_max_epi32(0, a, b);
46186        assert_eq_m128i(r, _mm_setzero_si128());
46187        let r = _mm_maskz_max_epi32(0b00001111, a, b);
46188        let e = _mm_set_epi32(3, 2, 2, 3);
46189        assert_eq_m128i(r, e);
46190    }
46191
46192    #[simd_test(enable = "avx512f")]
46193    fn test_mm512_max_ps() {
46194        let a = _mm512_setr_ps(
46195            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46196        );
46197        let b = _mm512_setr_ps(
46198            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46199        );
46200        let r = _mm512_max_ps(a, b);
46201        let e = _mm512_setr_ps(
46202            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
46203        );
46204        assert_eq_m512(r, e);
46205    }
46206
46207    #[simd_test(enable = "avx512f")]
46208    fn test_mm512_mask_max_ps() {
46209        let a = _mm512_setr_ps(
46210            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46211        );
46212        let b = _mm512_setr_ps(
46213            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46214        );
46215        let r = _mm512_mask_max_ps(a, 0, a, b);
46216        assert_eq_m512(r, a);
46217        let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
46218        let e = _mm512_setr_ps(
46219            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
46220        );
46221        assert_eq_m512(r, e);
46222    }
46223
46224    #[simd_test(enable = "avx512f")]
46225    fn test_mm512_maskz_max_ps() {
46226        let a = _mm512_setr_ps(
46227            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46228        );
46229        let b = _mm512_setr_ps(
46230            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46231        );
46232        let r = _mm512_maskz_max_ps(0, a, b);
46233        assert_eq_m512(r, _mm512_setzero_ps());
46234        let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
46235        let e = _mm512_setr_ps(
46236            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46237        );
46238        assert_eq_m512(r, e);
46239    }
46240
46241    #[simd_test(enable = "avx512f,avx512vl")]
46242    fn test_mm256_mask_max_ps() {
46243        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46244        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46245        let r = _mm256_mask_max_ps(a, 0, a, b);
46246        assert_eq_m256(r, a);
46247        let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
46248        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
46249        assert_eq_m256(r, e);
46250    }
46251
46252    #[simd_test(enable = "avx512f,avx512vl")]
46253    fn test_mm256_maskz_max_ps() {
46254        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46255        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46256        let r = _mm256_maskz_max_ps(0, a, b);
46257        assert_eq_m256(r, _mm256_setzero_ps());
46258        let r = _mm256_maskz_max_ps(0b11111111, a, b);
46259        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
46260        assert_eq_m256(r, e);
46261    }
46262
46263    #[simd_test(enable = "avx512f,avx512vl")]
46264    fn test_mm_mask_max_ps() {
46265        let a = _mm_set_ps(0., 1., 2., 3.);
46266        let b = _mm_set_ps(3., 2., 1., 0.);
46267        let r = _mm_mask_max_ps(a, 0, a, b);
46268        assert_eq_m128(r, a);
46269        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
46270        let e = _mm_set_ps(3., 2., 2., 3.);
46271        assert_eq_m128(r, e);
46272    }
46273
46274    #[simd_test(enable = "avx512f,avx512vl")]
46275    fn test_mm_maskz_max_ps() {
46276        let a = _mm_set_ps(0., 1., 2., 3.);
46277        let b = _mm_set_ps(3., 2., 1., 0.);
46278        let r = _mm_maskz_max_ps(0, a, b);
46279        assert_eq_m128(r, _mm_setzero_ps());
46280        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
46281        let e = _mm_set_ps(3., 2., 2., 3.);
46282        assert_eq_m128(r, e);
46283    }
46284
46285    #[simd_test(enable = "avx512f")]
46286    const fn test_mm512_max_epu32() {
46287        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46288        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46289        let r = _mm512_max_epu32(a, b);
46290        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46291        assert_eq_m512i(r, e);
46292    }
46293
46294    #[simd_test(enable = "avx512f")]
46295    const fn test_mm512_mask_max_epu32() {
46296        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46297        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46298        let r = _mm512_mask_max_epu32(a, 0, a, b);
46299        assert_eq_m512i(r, a);
46300        let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
46301        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46302        assert_eq_m512i(r, e);
46303    }
46304
46305    #[simd_test(enable = "avx512f")]
46306    const fn test_mm512_maskz_max_epu32() {
46307        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46308        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46309        let r = _mm512_maskz_max_epu32(0, a, b);
46310        assert_eq_m512i(r, _mm512_setzero_si512());
46311        let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
46312        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
46313        assert_eq_m512i(r, e);
46314    }
46315
46316    #[simd_test(enable = "avx512f,avx512vl")]
46317    const fn test_mm256_mask_max_epu32() {
46318        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46319        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46320        let r = _mm256_mask_max_epu32(a, 0, a, b);
46321        assert_eq_m256i(r, a);
46322        let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
46323        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46324        assert_eq_m256i(r, e);
46325    }
46326
46327    #[simd_test(enable = "avx512f,avx512vl")]
46328    const fn test_mm256_maskz_max_epu32() {
46329        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46330        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46331        let r = _mm256_maskz_max_epu32(0, a, b);
46332        assert_eq_m256i(r, _mm256_setzero_si256());
46333        let r = _mm256_maskz_max_epu32(0b11111111, a, b);
46334        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46335        assert_eq_m256i(r, e);
46336    }
46337
46338    #[simd_test(enable = "avx512f,avx512vl")]
46339    const fn test_mm_mask_max_epu32() {
46340        let a = _mm_set_epi32(0, 1, 2, 3);
46341        let b = _mm_set_epi32(3, 2, 1, 0);
46342        let r = _mm_mask_max_epu32(a, 0, a, b);
46343        assert_eq_m128i(r, a);
46344        let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
46345        let e = _mm_set_epi32(3, 2, 2, 3);
46346        assert_eq_m128i(r, e);
46347    }
46348
46349    #[simd_test(enable = "avx512f,avx512vl")]
46350    const fn test_mm_maskz_max_epu32() {
46351        let a = _mm_set_epi32(0, 1, 2, 3);
46352        let b = _mm_set_epi32(3, 2, 1, 0);
46353        let r = _mm_maskz_max_epu32(0, a, b);
46354        assert_eq_m128i(r, _mm_setzero_si128());
46355        let r = _mm_maskz_max_epu32(0b00001111, a, b);
46356        let e = _mm_set_epi32(3, 2, 2, 3);
46357        assert_eq_m128i(r, e);
46358    }
46359
46360    #[simd_test(enable = "avx512f")]
46361    const fn test_mm512_min_epi32() {
46362        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46363        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46364        let r = _mm512_min_epi32(a, b);
46365        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
46366        assert_eq_m512i(r, e);
46367    }
46368
46369    #[simd_test(enable = "avx512f")]
46370    const fn test_mm512_mask_min_epi32() {
46371        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46372        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46373        let r = _mm512_mask_min_epi32(a, 0, a, b);
46374        assert_eq_m512i(r, a);
46375        let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
46376        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46377        assert_eq_m512i(r, e);
46378    }
46379
46380    #[simd_test(enable = "avx512f")]
46381    const fn test_mm512_maskz_min_epi32() {
46382        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46383        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46384        let r = _mm512_maskz_min_epi32(0, a, b);
46385        assert_eq_m512i(r, _mm512_setzero_si512());
46386        let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
46387        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
46388        assert_eq_m512i(r, e);
46389    }
46390
46391    #[simd_test(enable = "avx512f,avx512vl")]
46392    const fn test_mm256_mask_min_epi32() {
46393        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46394        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46395        let r = _mm256_mask_min_epi32(a, 0, a, b);
46396        assert_eq_m256i(r, a);
46397        let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
46398        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46399        assert_eq_m256i(r, e);
46400    }
46401
46402    #[simd_test(enable = "avx512f,avx512vl")]
46403    const fn test_mm256_maskz_min_epi32() {
46404        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46405        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46406        let r = _mm256_maskz_min_epi32(0, a, b);
46407        assert_eq_m256i(r, _mm256_setzero_si256());
46408        let r = _mm256_maskz_min_epi32(0b11111111, a, b);
46409        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46410        assert_eq_m256i(r, e);
46411    }
46412
46413    #[simd_test(enable = "avx512f,avx512vl")]
46414    const fn test_mm_mask_min_epi32() {
46415        let a = _mm_set_epi32(0, 1, 2, 3);
46416        let b = _mm_set_epi32(3, 2, 1, 0);
46417        let r = _mm_mask_min_epi32(a, 0, a, b);
46418        assert_eq_m128i(r, a);
46419        let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
46420        let e = _mm_set_epi32(0, 1, 1, 0);
46421        assert_eq_m128i(r, e);
46422    }
46423
46424    #[simd_test(enable = "avx512f,avx512vl")]
46425    const fn test_mm_maskz_min_epi32() {
46426        let a = _mm_set_epi32(0, 1, 2, 3);
46427        let b = _mm_set_epi32(3, 2, 1, 0);
46428        let r = _mm_maskz_min_epi32(0, a, b);
46429        assert_eq_m128i(r, _mm_setzero_si128());
46430        let r = _mm_maskz_min_epi32(0b00001111, a, b);
46431        let e = _mm_set_epi32(0, 1, 1, 0);
46432        assert_eq_m128i(r, e);
46433    }
46434
46435    #[simd_test(enable = "avx512f")]
46436    fn test_mm512_min_ps() {
46437        let a = _mm512_setr_ps(
46438            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46439        );
46440        let b = _mm512_setr_ps(
46441            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46442        );
46443        let r = _mm512_min_ps(a, b);
46444        let e = _mm512_setr_ps(
46445            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
46446        );
46447        assert_eq_m512(r, e);
46448    }
46449
46450    #[simd_test(enable = "avx512f")]
46451    fn test_mm512_mask_min_ps() {
46452        let a = _mm512_setr_ps(
46453            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46454        );
46455        let b = _mm512_setr_ps(
46456            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46457        );
46458        let r = _mm512_mask_min_ps(a, 0, a, b);
46459        assert_eq_m512(r, a);
46460        let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
46461        let e = _mm512_setr_ps(
46462            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46463        );
46464        assert_eq_m512(r, e);
46465    }
46466
46467    #[simd_test(enable = "avx512f")]
46468    fn test_mm512_maskz_min_ps() {
46469        let a = _mm512_setr_ps(
46470            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46471        );
46472        let b = _mm512_setr_ps(
46473            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46474        );
46475        let r = _mm512_maskz_min_ps(0, a, b);
46476        assert_eq_m512(r, _mm512_setzero_ps());
46477        let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
46478        let e = _mm512_setr_ps(
46479            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
46480        );
46481        assert_eq_m512(r, e);
46482    }
46483
46484    #[simd_test(enable = "avx512f,avx512vl")]
46485    fn test_mm256_mask_min_ps() {
46486        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46487        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46488        let r = _mm256_mask_min_ps(a, 0, a, b);
46489        assert_eq_m256(r, a);
46490        let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
46491        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
46492        assert_eq_m256(r, e);
46493    }
46494
46495    #[simd_test(enable = "avx512f,avx512vl")]
46496    fn test_mm256_maskz_min_ps() {
46497        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46498        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46499        let r = _mm256_maskz_min_ps(0, a, b);
46500        assert_eq_m256(r, _mm256_setzero_ps());
46501        let r = _mm256_maskz_min_ps(0b11111111, a, b);
46502        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
46503        assert_eq_m256(r, e);
46504    }
46505
46506    #[simd_test(enable = "avx512f,avx512vl")]
46507    fn test_mm_mask_min_ps() {
46508        let a = _mm_set_ps(0., 1., 2., 3.);
46509        let b = _mm_set_ps(3., 2., 1., 0.);
46510        let r = _mm_mask_min_ps(a, 0, a, b);
46511        assert_eq_m128(r, a);
46512        let r = _mm_mask_min_ps(a, 0b00001111, a, b);
46513        let e = _mm_set_ps(0., 1., 1., 0.);
46514        assert_eq_m128(r, e);
46515    }
46516
46517    #[simd_test(enable = "avx512f,avx512vl")]
46518    fn test_mm_maskz_min_ps() {
46519        let a = _mm_set_ps(0., 1., 2., 3.);
46520        let b = _mm_set_ps(3., 2., 1., 0.);
46521        let r = _mm_maskz_min_ps(0, a, b);
46522        assert_eq_m128(r, _mm_setzero_ps());
46523        let r = _mm_maskz_min_ps(0b00001111, a, b);
46524        let e = _mm_set_ps(0., 1., 1., 0.);
46525        assert_eq_m128(r, e);
46526    }
46527
46528    #[simd_test(enable = "avx512f")]
46529    const fn test_mm512_min_epu32() {
46530        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46531        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46532        let r = _mm512_min_epu32(a, b);
46533        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
46534        assert_eq_m512i(r, e);
46535    }
46536
46537    #[simd_test(enable = "avx512f")]
46538    const fn test_mm512_mask_min_epu32() {
46539        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46540        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46541        let r = _mm512_mask_min_epu32(a, 0, a, b);
46542        assert_eq_m512i(r, a);
46543        let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
46544        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46545        assert_eq_m512i(r, e);
46546    }
46547
46548    #[simd_test(enable = "avx512f")]
46549    const fn test_mm512_maskz_min_epu32() {
46550        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46551        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46552        let r = _mm512_maskz_min_epu32(0, a, b);
46553        assert_eq_m512i(r, _mm512_setzero_si512());
46554        let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
46555        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
46556        assert_eq_m512i(r, e);
46557    }
46558
46559    #[simd_test(enable = "avx512f,avx512vl")]
46560    const fn test_mm256_mask_min_epu32() {
46561        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46562        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46563        let r = _mm256_mask_min_epu32(a, 0, a, b);
46564        assert_eq_m256i(r, a);
46565        let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
46566        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46567        assert_eq_m256i(r, e);
46568    }
46569
46570    #[simd_test(enable = "avx512f,avx512vl")]
46571    const fn test_mm256_maskz_min_epu32() {
46572        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46573        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46574        let r = _mm256_maskz_min_epu32(0, a, b);
46575        assert_eq_m256i(r, _mm256_setzero_si256());
46576        let r = _mm256_maskz_min_epu32(0b11111111, a, b);
46577        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46578        assert_eq_m256i(r, e);
46579    }
46580
46581    #[simd_test(enable = "avx512f,avx512vl")]
46582    const fn test_mm_mask_min_epu32() {
46583        let a = _mm_set_epi32(0, 1, 2, 3);
46584        let b = _mm_set_epi32(3, 2, 1, 0);
46585        let r = _mm_mask_min_epu32(a, 0, a, b);
46586        assert_eq_m128i(r, a);
46587        let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
46588        let e = _mm_set_epi32(0, 1, 1, 0);
46589        assert_eq_m128i(r, e);
46590    }
46591
46592    #[simd_test(enable = "avx512f,avx512vl")]
46593    const fn test_mm_maskz_min_epu32() {
46594        let a = _mm_set_epi32(0, 1, 2, 3);
46595        let b = _mm_set_epi32(3, 2, 1, 0);
46596        let r = _mm_maskz_min_epu32(0, a, b);
46597        assert_eq_m128i(r, _mm_setzero_si128());
46598        let r = _mm_maskz_min_epu32(0b00001111, a, b);
46599        let e = _mm_set_epi32(0, 1, 1, 0);
46600        assert_eq_m128i(r, e);
46601    }
46602
46603    #[simd_test(enable = "avx512f")]
46604    fn test_mm512_sqrt_ps() {
46605        let a = _mm512_setr_ps(
46606            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46607        );
46608        let r = _mm512_sqrt_ps(a);
46609        let e = _mm512_setr_ps(
46610            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46611        );
46612        assert_eq_m512(r, e);
46613    }
46614
46615    #[simd_test(enable = "avx512f")]
46616    fn test_mm512_mask_sqrt_ps() {
46617        let a = _mm512_setr_ps(
46618            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46619        );
46620        let r = _mm512_mask_sqrt_ps(a, 0, a);
46621        assert_eq_m512(r, a);
46622        let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
46623        let e = _mm512_setr_ps(
46624            0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
46625        );
46626        assert_eq_m512(r, e);
46627    }
46628
46629    #[simd_test(enable = "avx512f")]
46630    fn test_mm512_maskz_sqrt_ps() {
46631        let a = _mm512_setr_ps(
46632            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46633        );
46634        let r = _mm512_maskz_sqrt_ps(0, a);
46635        assert_eq_m512(r, _mm512_setzero_ps());
46636        let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
46637        let e = _mm512_setr_ps(
46638            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
46639        );
46640        assert_eq_m512(r, e);
46641    }
46642
46643    #[simd_test(enable = "avx512f,avx512vl")]
46644    fn test_mm256_mask_sqrt_ps() {
46645        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
46646        let r = _mm256_mask_sqrt_ps(a, 0, a);
46647        assert_eq_m256(r, a);
46648        let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
46649        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46650        assert_eq_m256(r, e);
46651    }
46652
46653    #[simd_test(enable = "avx512f,avx512vl")]
46654    fn test_mm256_maskz_sqrt_ps() {
46655        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
46656        let r = _mm256_maskz_sqrt_ps(0, a);
46657        assert_eq_m256(r, _mm256_setzero_ps());
46658        let r = _mm256_maskz_sqrt_ps(0b11111111, a);
46659        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46660        assert_eq_m256(r, e);
46661    }
46662
46663    #[simd_test(enable = "avx512f,avx512vl")]
46664    fn test_mm_mask_sqrt_ps() {
46665        let a = _mm_set_ps(0., 1., 4., 9.);
46666        let r = _mm_mask_sqrt_ps(a, 0, a);
46667        assert_eq_m128(r, a);
46668        let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
46669        let e = _mm_set_ps(0., 1., 2., 3.);
46670        assert_eq_m128(r, e);
46671    }
46672
46673    #[simd_test(enable = "avx512f,avx512vl")]
46674    fn test_mm_maskz_sqrt_ps() {
46675        let a = _mm_set_ps(0., 1., 4., 9.);
46676        let r = _mm_maskz_sqrt_ps(0, a);
46677        assert_eq_m128(r, _mm_setzero_ps());
46678        let r = _mm_maskz_sqrt_ps(0b00001111, a);
46679        let e = _mm_set_ps(0., 1., 2., 3.);
46680        assert_eq_m128(r, e);
46681    }
46682
46683    #[simd_test(enable = "avx512f")]
46684    const fn test_mm512_fmadd_ps() {
46685        let a = _mm512_set1_ps(1.);
46686        let b = _mm512_setr_ps(
46687            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46688        );
46689        let c = _mm512_set1_ps(1.);
46690        let r = _mm512_fmadd_ps(a, b, c);
46691        let e = _mm512_setr_ps(
46692            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
46693        );
46694        assert_eq_m512(r, e);
46695    }
46696
46697    #[simd_test(enable = "avx512f")]
46698    const fn test_mm512_mask_fmadd_ps() {
46699        let a = _mm512_set1_ps(1.);
46700        let b = _mm512_setr_ps(
46701            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46702        );
46703        let c = _mm512_set1_ps(1.);
46704        let r = _mm512_mask_fmadd_ps(a, 0, b, c);
46705        assert_eq_m512(r, a);
46706        let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
46707        let e = _mm512_setr_ps(
46708            1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46709        );
46710        assert_eq_m512(r, e);
46711    }
46712
46713    #[simd_test(enable = "avx512f")]
46714    const fn test_mm512_maskz_fmadd_ps() {
46715        let a = _mm512_set1_ps(1.);
46716        let b = _mm512_setr_ps(
46717            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46718        );
46719        let c = _mm512_set1_ps(1.);
46720        let r = _mm512_maskz_fmadd_ps(0, a, b, c);
46721        assert_eq_m512(r, _mm512_setzero_ps());
46722        let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
46723        let e = _mm512_setr_ps(
46724            1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46725        );
46726        assert_eq_m512(r, e);
46727    }
46728
46729    #[simd_test(enable = "avx512f")]
46730    const fn test_mm512_mask3_fmadd_ps() {
46731        let a = _mm512_set1_ps(1.);
46732        let b = _mm512_setr_ps(
46733            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46734        );
46735        let c = _mm512_set1_ps(2.);
46736        let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
46737        assert_eq_m512(r, c);
46738        let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
46739        let e = _mm512_setr_ps(
46740            2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
46741        );
46742        assert_eq_m512(r, e);
46743    }
46744
46745    #[simd_test(enable = "avx512f,avx512vl")]
46746    const fn test_mm256_mask_fmadd_ps() {
46747        let a = _mm256_set1_ps(1.);
46748        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46749        let c = _mm256_set1_ps(1.);
46750        let r = _mm256_mask_fmadd_ps(a, 0, b, c);
46751        assert_eq_m256(r, a);
46752        let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
46753        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46754        assert_eq_m256(r, e);
46755    }
46756
46757    #[simd_test(enable = "avx512f,avx512vl")]
46758    const fn test_mm256_maskz_fmadd_ps() {
46759        let a = _mm256_set1_ps(1.);
46760        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46761        let c = _mm256_set1_ps(1.);
46762        let r = _mm256_maskz_fmadd_ps(0, a, b, c);
46763        assert_eq_m256(r, _mm256_setzero_ps());
46764        let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
46765        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46766        assert_eq_m256(r, e);
46767    }
46768
46769    #[simd_test(enable = "avx512f,avx512vl")]
46770    const fn test_mm256_mask3_fmadd_ps() {
46771        let a = _mm256_set1_ps(1.);
46772        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46773        let c = _mm256_set1_ps(1.);
46774        let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
46775        assert_eq_m256(r, c);
46776        let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
46777        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46778        assert_eq_m256(r, e);
46779    }
46780
46781    #[simd_test(enable = "avx512f,avx512vl")]
46782    const fn test_mm_mask_fmadd_ps() {
46783        let a = _mm_set1_ps(1.);
46784        let b = _mm_set_ps(0., 1., 2., 3.);
46785        let c = _mm_set1_ps(1.);
46786        let r = _mm_mask_fmadd_ps(a, 0, b, c);
46787        assert_eq_m128(r, a);
46788        let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
46789        let e = _mm_set_ps(1., 2., 3., 4.);
46790        assert_eq_m128(r, e);
46791    }
46792
46793    #[simd_test(enable = "avx512f,avx512vl")]
46794    const fn test_mm_maskz_fmadd_ps() {
46795        let a = _mm_set1_ps(1.);
46796        let b = _mm_set_ps(0., 1., 2., 3.);
46797        let c = _mm_set1_ps(1.);
46798        let r = _mm_maskz_fmadd_ps(0, a, b, c);
46799        assert_eq_m128(r, _mm_setzero_ps());
46800        let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
46801        let e = _mm_set_ps(1., 2., 3., 4.);
46802        assert_eq_m128(r, e);
46803    }
46804
46805    #[simd_test(enable = "avx512f,avx512vl")]
46806    const fn test_mm_mask3_fmadd_ps() {
46807        let a = _mm_set1_ps(1.);
46808        let b = _mm_set_ps(0., 1., 2., 3.);
46809        let c = _mm_set1_ps(1.);
46810        let r = _mm_mask3_fmadd_ps(a, b, c, 0);
46811        assert_eq_m128(r, c);
46812        let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
46813        let e = _mm_set_ps(1., 2., 3., 4.);
46814        assert_eq_m128(r, e);
46815    }
46816
46817    #[simd_test(enable = "avx512f")]
46818    const fn test_mm512_fmsub_ps() {
46819        let a = _mm512_setr_ps(
46820            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
46821        );
46822        let b = _mm512_setr_ps(
46823            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46824        );
46825        let c = _mm512_setr_ps(
46826            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
46827        );
46828        let r = _mm512_fmsub_ps(a, b, c);
46829        let e = _mm512_setr_ps(
46830            -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
46831        );
46832        assert_eq_m512(r, e);
46833    }
46834
46835    #[simd_test(enable = "avx512f")]
46836    const fn test_mm512_mask_fmsub_ps() {
46837        let a = _mm512_set1_ps(1.);
46838        let b = _mm512_setr_ps(
46839            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46840        );
46841        let c = _mm512_set1_ps(1.);
46842        let r = _mm512_mask_fmsub_ps(a, 0, b, c);
46843        assert_eq_m512(r, a);
46844        let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
46845        let e = _mm512_setr_ps(
46846            -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
46847        );
46848        assert_eq_m512(r, e);
46849    }
46850
46851    #[simd_test(enable = "avx512f")]
46852    const fn test_mm512_maskz_fmsub_ps() {
46853        let a = _mm512_set1_ps(1.);
46854        let b = _mm512_setr_ps(
46855            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46856        );
46857        let c = _mm512_set1_ps(1.);
46858        let r = _mm512_maskz_fmsub_ps(0, a, b, c);
46859        assert_eq_m512(r, _mm512_setzero_ps());
46860        let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
46861        let e = _mm512_setr_ps(
46862            -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
46863        );
46864        assert_eq_m512(r, e);
46865    }
46866
46867    #[simd_test(enable = "avx512f")]
46868    const fn test_mm512_mask3_fmsub_ps() {
46869        let a = _mm512_set1_ps(1.);
46870        let b = _mm512_setr_ps(
46871            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46872        );
46873        let c = _mm512_setr_ps(
46874            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
46875        );
46876        let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
46877        assert_eq_m512(r, c);
46878        let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
46879        let e = _mm512_setr_ps(
46880            -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
46881        );
46882        assert_eq_m512(r, e);
46883    }
46884
46885    #[simd_test(enable = "avx512f,avx512vl")]
46886    const fn test_mm256_mask_fmsub_ps() {
46887        let a = _mm256_set1_ps(1.);
46888        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46889        let c = _mm256_set1_ps(1.);
46890        let r = _mm256_mask_fmsub_ps(a, 0, b, c);
46891        assert_eq_m256(r, a);
46892        let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
46893        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46894        assert_eq_m256(r, e);
46895    }
46896
46897    #[simd_test(enable = "avx512f,avx512vl")]
46898    const fn test_mm256_maskz_fmsub_ps() {
46899        let a = _mm256_set1_ps(1.);
46900        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46901        let c = _mm256_set1_ps(1.);
46902        let r = _mm256_maskz_fmsub_ps(0, a, b, c);
46903        assert_eq_m256(r, _mm256_setzero_ps());
46904        let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
46905        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46906        assert_eq_m256(r, e);
46907    }
46908
46909    #[simd_test(enable = "avx512f,avx512vl")]
46910    const fn test_mm256_mask3_fmsub_ps() {
46911        let a = _mm256_set1_ps(1.);
46912        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46913        let c = _mm256_set1_ps(1.);
46914        let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
46915        assert_eq_m256(r, c);
46916        let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
46917        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46918        assert_eq_m256(r, e);
46919    }
46920
46921    #[simd_test(enable = "avx512f,avx512vl")]
46922    const fn test_mm_mask_fmsub_ps() {
46923        let a = _mm_set1_ps(1.);
46924        let b = _mm_set_ps(0., 1., 2., 3.);
46925        let c = _mm_set1_ps(1.);
46926        let r = _mm_mask_fmsub_ps(a, 0, b, c);
46927        assert_eq_m128(r, a);
46928        let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
46929        let e = _mm_set_ps(-1., 0., 1., 2.);
46930        assert_eq_m128(r, e);
46931    }
46932
46933    #[simd_test(enable = "avx512f,avx512vl")]
46934    const fn test_mm_maskz_fmsub_ps() {
46935        let a = _mm_set1_ps(1.);
46936        let b = _mm_set_ps(0., 1., 2., 3.);
46937        let c = _mm_set1_ps(1.);
46938        let r = _mm_maskz_fmsub_ps(0, a, b, c);
46939        assert_eq_m128(r, _mm_setzero_ps());
46940        let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
46941        let e = _mm_set_ps(-1., 0., 1., 2.);
46942        assert_eq_m128(r, e);
46943    }
46944
46945    #[simd_test(enable = "avx512f,avx512vl")]
46946    const fn test_mm_mask3_fmsub_ps() {
46947        let a = _mm_set1_ps(1.);
46948        let b = _mm_set_ps(0., 1., 2., 3.);
46949        let c = _mm_set1_ps(1.);
46950        let r = _mm_mask3_fmsub_ps(a, b, c, 0);
46951        assert_eq_m128(r, c);
46952        let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
46953        let e = _mm_set_ps(-1., 0., 1., 2.);
46954        assert_eq_m128(r, e);
46955    }
46956
46957    #[simd_test(enable = "avx512f")]
46958    const fn test_mm512_fmaddsub_ps() {
46959        let a = _mm512_set1_ps(1.);
46960        let b = _mm512_setr_ps(
46961            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46962        );
46963        let c = _mm512_set1_ps(1.);
46964        let r = _mm512_fmaddsub_ps(a, b, c);
46965        let e = _mm512_setr_ps(
46966            -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
46967        );
46968        assert_eq_m512(r, e);
46969    }
46970
46971    #[simd_test(enable = "avx512f")]
46972    const fn test_mm512_mask_fmaddsub_ps() {
46973        let a = _mm512_set1_ps(1.);
46974        let b = _mm512_setr_ps(
46975            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46976        );
46977        let c = _mm512_set1_ps(1.);
46978        let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
46979        assert_eq_m512(r, a);
46980        let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
46981        let e = _mm512_setr_ps(
46982            -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46983        );
46984        assert_eq_m512(r, e);
46985    }
46986
46987    #[simd_test(enable = "avx512f")]
46988    const fn test_mm512_maskz_fmaddsub_ps() {
46989        let a = _mm512_set1_ps(1.);
46990        let b = _mm512_setr_ps(
46991            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46992        );
46993        let c = _mm512_set1_ps(1.);
46994        let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
46995        assert_eq_m512(r, _mm512_setzero_ps());
46996        let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
46997        let e = _mm512_setr_ps(
46998            -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46999        );
47000        assert_eq_m512(r, e);
47001    }
47002
47003    #[simd_test(enable = "avx512f")]
47004    const fn test_mm512_mask3_fmaddsub_ps() {
47005        let a = _mm512_set1_ps(1.);
47006        let b = _mm512_setr_ps(
47007            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47008        );
47009        let c = _mm512_setr_ps(
47010            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47011        );
47012        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
47013        assert_eq_m512(r, c);
47014        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
47015        let e = _mm512_setr_ps(
47016            -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
47017        );
47018        assert_eq_m512(r, e);
47019    }
47020
47021    #[simd_test(enable = "avx512f,avx512vl")]
47022    const fn test_mm256_mask_fmaddsub_ps() {
47023        let a = _mm256_set1_ps(1.);
47024        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47025        let c = _mm256_set1_ps(1.);
47026        let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
47027        assert_eq_m256(r, a);
47028        let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
47029        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47030        assert_eq_m256(r, e);
47031    }
47032
47033    #[simd_test(enable = "avx512f,avx512vl")]
47034    const fn test_mm256_maskz_fmaddsub_ps() {
47035        let a = _mm256_set1_ps(1.);
47036        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47037        let c = _mm256_set1_ps(1.);
47038        let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
47039        assert_eq_m256(r, _mm256_setzero_ps());
47040        let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
47041        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47042        assert_eq_m256(r, e);
47043    }
47044
47045    #[simd_test(enable = "avx512f,avx512vl")]
47046    const fn test_mm256_mask3_fmaddsub_ps() {
47047        let a = _mm256_set1_ps(1.);
47048        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47049        let c = _mm256_set1_ps(1.);
47050        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
47051        assert_eq_m256(r, c);
47052        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
47053        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47054        assert_eq_m256(r, e);
47055    }
47056
47057    #[simd_test(enable = "avx512f,avx512vl")]
47058    const fn test_mm_mask_fmaddsub_ps() {
47059        let a = _mm_set1_ps(1.);
47060        let b = _mm_set_ps(0., 1., 2., 3.);
47061        let c = _mm_set1_ps(1.);
47062        let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
47063        assert_eq_m128(r, a);
47064        let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
47065        let e = _mm_set_ps(1., 0., 3., 2.);
47066        assert_eq_m128(r, e);
47067    }
47068
47069    #[simd_test(enable = "avx512f,avx512vl")]
47070    const fn test_mm_maskz_fmaddsub_ps() {
47071        let a = _mm_set1_ps(1.);
47072        let b = _mm_set_ps(0., 1., 2., 3.);
47073        let c = _mm_set1_ps(1.);
47074        let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
47075        assert_eq_m128(r, _mm_setzero_ps());
47076        let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
47077        let e = _mm_set_ps(1., 0., 3., 2.);
47078        assert_eq_m128(r, e);
47079    }
47080
47081    #[simd_test(enable = "avx512f,avx512vl")]
47082    const fn test_mm_mask3_fmaddsub_ps() {
47083        let a = _mm_set1_ps(1.);
47084        let b = _mm_set_ps(0., 1., 2., 3.);
47085        let c = _mm_set1_ps(1.);
47086        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
47087        assert_eq_m128(r, c);
47088        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
47089        let e = _mm_set_ps(1., 0., 3., 2.);
47090        assert_eq_m128(r, e);
47091    }
47092
47093    #[simd_test(enable = "avx512f")]
47094    const fn test_mm512_fmsubadd_ps() {
47095        let a = _mm512_setr_ps(
47096            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
47097        );
47098        let b = _mm512_setr_ps(
47099            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47100        );
47101        let c = _mm512_setr_ps(
47102            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
47103        );
47104        let r = _mm512_fmsubadd_ps(a, b, c);
47105        let e = _mm512_setr_ps(
47106            1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
47107        );
47108        assert_eq_m512(r, e);
47109    }
47110
47111    #[simd_test(enable = "avx512f")]
47112    const fn test_mm512_mask_fmsubadd_ps() {
47113        let a = _mm512_set1_ps(1.);
47114        let b = _mm512_setr_ps(
47115            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47116        );
47117        let c = _mm512_set1_ps(1.);
47118        let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
47119        assert_eq_m512(r, a);
47120        let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
47121        let e = _mm512_setr_ps(
47122            1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
47123        );
47124        assert_eq_m512(r, e);
47125    }
47126
47127    #[simd_test(enable = "avx512f")]
47128    const fn test_mm512_maskz_fmsubadd_ps() {
47129        let a = _mm512_set1_ps(1.);
47130        let b = _mm512_setr_ps(
47131            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47132        );
47133        let c = _mm512_set1_ps(1.);
47134        let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
47135        assert_eq_m512(r, _mm512_setzero_ps());
47136        let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
47137        let e = _mm512_setr_ps(
47138            1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
47139        );
47140        assert_eq_m512(r, e);
47141    }
47142
47143    #[simd_test(enable = "avx512f")]
47144    const fn test_mm512_mask3_fmsubadd_ps() {
47145        let a = _mm512_set1_ps(1.);
47146        let b = _mm512_setr_ps(
47147            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47148        );
47149        let c = _mm512_setr_ps(
47150            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47151        );
47152        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
47153        assert_eq_m512(r, c);
47154        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
47155        let e = _mm512_setr_ps(
47156            1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
47157        );
47158        assert_eq_m512(r, e);
47159    }
47160
47161    #[simd_test(enable = "avx512f,avx512vl")]
47162    const fn test_mm256_mask_fmsubadd_ps() {
47163        let a = _mm256_set1_ps(1.);
47164        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47165        let c = _mm256_set1_ps(1.);
47166        let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
47167        assert_eq_m256(r, a);
47168        let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
47169        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47170        assert_eq_m256(r, e);
47171    }
47172
47173    #[simd_test(enable = "avx512f,avx512vl")]
47174    const fn test_mm256_maskz_fmsubadd_ps() {
47175        let a = _mm256_set1_ps(1.);
47176        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47177        let c = _mm256_set1_ps(1.);
47178        let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
47179        assert_eq_m256(r, _mm256_setzero_ps());
47180        let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
47181        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47182        assert_eq_m256(r, e);
47183    }
47184
47185    #[simd_test(enable = "avx512f,avx512vl")]
47186    const fn test_mm256_mask3_fmsubadd_ps() {
47187        let a = _mm256_set1_ps(1.);
47188        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47189        let c = _mm256_set1_ps(1.);
47190        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
47191        assert_eq_m256(r, c);
47192        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
47193        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47194        assert_eq_m256(r, e);
47195    }
47196
47197    #[simd_test(enable = "avx512f,avx512vl")]
47198    const fn test_mm_mask_fmsubadd_ps() {
47199        let a = _mm_set1_ps(1.);
47200        let b = _mm_set_ps(0., 1., 2., 3.);
47201        let c = _mm_set1_ps(1.);
47202        let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
47203        assert_eq_m128(r, a);
47204        let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
47205        let e = _mm_set_ps(-1., 2., 1., 4.);
47206        assert_eq_m128(r, e);
47207    }
47208
47209    #[simd_test(enable = "avx512f,avx512vl")]
47210    const fn test_mm_maskz_fmsubadd_ps() {
47211        let a = _mm_set1_ps(1.);
47212        let b = _mm_set_ps(0., 1., 2., 3.);
47213        let c = _mm_set1_ps(1.);
47214        let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
47215        assert_eq_m128(r, _mm_setzero_ps());
47216        let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
47217        let e = _mm_set_ps(-1., 2., 1., 4.);
47218        assert_eq_m128(r, e);
47219    }
47220
47221    #[simd_test(enable = "avx512f,avx512vl")]
47222    const fn test_mm_mask3_fmsubadd_ps() {
47223        let a = _mm_set1_ps(1.);
47224        let b = _mm_set_ps(0., 1., 2., 3.);
47225        let c = _mm_set1_ps(1.);
47226        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
47227        assert_eq_m128(r, c);
47228        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
47229        let e = _mm_set_ps(-1., 2., 1., 4.);
47230        assert_eq_m128(r, e);
47231    }
47232
47233    #[simd_test(enable = "avx512f")]
47234    const fn test_mm512_fnmadd_ps() {
47235        let a = _mm512_set1_ps(1.);
47236        let b = _mm512_setr_ps(
47237            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47238        );
47239        let c = _mm512_set1_ps(1.);
47240        let r = _mm512_fnmadd_ps(a, b, c);
47241        let e = _mm512_setr_ps(
47242            1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
47243        );
47244        assert_eq_m512(r, e);
47245    }
47246
47247    #[simd_test(enable = "avx512f")]
47248    const fn test_mm512_mask_fnmadd_ps() {
47249        let a = _mm512_set1_ps(1.);
47250        let b = _mm512_setr_ps(
47251            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47252        );
47253        let c = _mm512_set1_ps(1.);
47254        let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
47255        assert_eq_m512(r, a);
47256        let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
47257        let e = _mm512_setr_ps(
47258            1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
47259        );
47260        assert_eq_m512(r, e);
47261    }
47262
47263    #[simd_test(enable = "avx512f")]
47264    const fn test_mm512_maskz_fnmadd_ps() {
47265        let a = _mm512_set1_ps(1.);
47266        let b = _mm512_setr_ps(
47267            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47268        );
47269        let c = _mm512_set1_ps(1.);
47270        let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
47271        assert_eq_m512(r, _mm512_setzero_ps());
47272        let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
47273        let e = _mm512_setr_ps(
47274            1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
47275        );
47276        assert_eq_m512(r, e);
47277    }
47278
47279    #[simd_test(enable = "avx512f")]
47280    const fn test_mm512_mask3_fnmadd_ps() {
47281        let a = _mm512_set1_ps(1.);
47282        let b = _mm512_setr_ps(
47283            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47284        );
47285        let c = _mm512_setr_ps(
47286            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47287        );
47288        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
47289        assert_eq_m512(r, c);
47290        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
47291        let e = _mm512_setr_ps(
47292            1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
47293        );
47294        assert_eq_m512(r, e);
47295    }
47296
47297    #[simd_test(enable = "avx512f,avx512vl")]
47298    const fn test_mm256_mask_fnmadd_ps() {
47299        let a = _mm256_set1_ps(1.);
47300        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47301        let c = _mm256_set1_ps(1.);
47302        let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
47303        assert_eq_m256(r, a);
47304        let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
47305        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47306        assert_eq_m256(r, e);
47307    }
47308
47309    #[simd_test(enable = "avx512f,avx512vl")]
47310    const fn test_mm256_maskz_fnmadd_ps() {
47311        let a = _mm256_set1_ps(1.);
47312        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47313        let c = _mm256_set1_ps(1.);
47314        let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
47315        assert_eq_m256(r, _mm256_setzero_ps());
47316        let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
47317        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47318        assert_eq_m256(r, e);
47319    }
47320
47321    #[simd_test(enable = "avx512f,avx512vl")]
47322    const fn test_mm256_mask3_fnmadd_ps() {
47323        let a = _mm256_set1_ps(1.);
47324        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47325        let c = _mm256_set1_ps(1.);
47326        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
47327        assert_eq_m256(r, c);
47328        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
47329        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47330        assert_eq_m256(r, e);
47331    }
47332
47333    #[simd_test(enable = "avx512f,avx512vl")]
47334    const fn test_mm_mask_fnmadd_ps() {
47335        let a = _mm_set1_ps(1.);
47336        let b = _mm_set_ps(0., 1., 2., 3.);
47337        let c = _mm_set1_ps(1.);
47338        let r = _mm_mask_fnmadd_ps(a, 0, b, c);
47339        assert_eq_m128(r, a);
47340        let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
47341        let e = _mm_set_ps(1., 0., -1., -2.);
47342        assert_eq_m128(r, e);
47343    }
47344
47345    #[simd_test(enable = "avx512f,avx512vl")]
47346    const fn test_mm_maskz_fnmadd_ps() {
47347        let a = _mm_set1_ps(1.);
47348        let b = _mm_set_ps(0., 1., 2., 3.);
47349        let c = _mm_set1_ps(1.);
47350        let r = _mm_maskz_fnmadd_ps(0, a, b, c);
47351        assert_eq_m128(r, _mm_setzero_ps());
47352        let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
47353        let e = _mm_set_ps(1., 0., -1., -2.);
47354        assert_eq_m128(r, e);
47355    }
47356
47357    #[simd_test(enable = "avx512f,avx512vl")]
47358    const fn test_mm_mask3_fnmadd_ps() {
47359        let a = _mm_set1_ps(1.);
47360        let b = _mm_set_ps(0., 1., 2., 3.);
47361        let c = _mm_set1_ps(1.);
47362        let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
47363        assert_eq_m128(r, c);
47364        let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
47365        let e = _mm_set_ps(1., 0., -1., -2.);
47366        assert_eq_m128(r, e);
47367    }
47368
47369    #[simd_test(enable = "avx512f")]
47370    const fn test_mm512_fnmsub_ps() {
47371        let a = _mm512_set1_ps(1.);
47372        let b = _mm512_setr_ps(
47373            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47374        );
47375        let c = _mm512_set1_ps(1.);
47376        let r = _mm512_fnmsub_ps(a, b, c);
47377        let e = _mm512_setr_ps(
47378            -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
47379        );
47380        assert_eq_m512(r, e);
47381    }
47382
47383    #[simd_test(enable = "avx512f")]
47384    const fn test_mm512_mask_fnmsub_ps() {
47385        let a = _mm512_set1_ps(1.);
47386        let b = _mm512_setr_ps(
47387            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47388        );
47389        let c = _mm512_set1_ps(1.);
47390        let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
47391        assert_eq_m512(r, a);
47392        let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
47393        let e = _mm512_setr_ps(
47394            -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
47395        );
47396        assert_eq_m512(r, e);
47397    }
47398
47399    #[simd_test(enable = "avx512f")]
47400    const fn test_mm512_maskz_fnmsub_ps() {
47401        let a = _mm512_set1_ps(1.);
47402        let b = _mm512_setr_ps(
47403            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47404        );
47405        let c = _mm512_set1_ps(1.);
47406        let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
47407        assert_eq_m512(r, _mm512_setzero_ps());
47408        let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
47409        let e = _mm512_setr_ps(
47410            -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
47411        );
47412        assert_eq_m512(r, e);
47413    }
47414
47415    #[simd_test(enable = "avx512f")]
47416    const fn test_mm512_mask3_fnmsub_ps() {
47417        let a = _mm512_set1_ps(1.);
47418        let b = _mm512_setr_ps(
47419            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47420        );
47421        let c = _mm512_setr_ps(
47422            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47423        );
47424        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
47425        assert_eq_m512(r, c);
47426        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
47427        let e = _mm512_setr_ps(
47428            -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
47429        );
47430        assert_eq_m512(r, e);
47431    }
47432
47433    #[simd_test(enable = "avx512f,avx512vl")]
47434    const fn test_mm256_mask_fnmsub_ps() {
47435        let a = _mm256_set1_ps(1.);
47436        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47437        let c = _mm256_set1_ps(1.);
47438        let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
47439        assert_eq_m256(r, a);
47440        let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
47441        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47442        assert_eq_m256(r, e);
47443    }
47444
47445    #[simd_test(enable = "avx512f,avx512vl")]
47446    const fn test_mm256_maskz_fnmsub_ps() {
47447        let a = _mm256_set1_ps(1.);
47448        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47449        let c = _mm256_set1_ps(1.);
47450        let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
47451        assert_eq_m256(r, _mm256_setzero_ps());
47452        let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
47453        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47454        assert_eq_m256(r, e);
47455    }
47456
47457    #[simd_test(enable = "avx512f,avx512vl")]
47458    const fn test_mm256_mask3_fnmsub_ps() {
47459        let a = _mm256_set1_ps(1.);
47460        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47461        let c = _mm256_set1_ps(1.);
47462        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
47463        assert_eq_m256(r, c);
47464        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
47465        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47466        assert_eq_m256(r, e);
47467    }
47468
47469    #[simd_test(enable = "avx512f,avx512vl")]
47470    const fn test_mm_mask_fnmsub_ps() {
47471        let a = _mm_set1_ps(1.);
47472        let b = _mm_set_ps(0., 1., 2., 3.);
47473        let c = _mm_set1_ps(1.);
47474        let r = _mm_mask_fnmsub_ps(a, 0, b, c);
47475        assert_eq_m128(r, a);
47476        let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
47477        let e = _mm_set_ps(-1., -2., -3., -4.);
47478        assert_eq_m128(r, e);
47479    }
47480
47481    #[simd_test(enable = "avx512f,avx512vl")]
47482    const fn test_mm_maskz_fnmsub_ps() {
47483        let a = _mm_set1_ps(1.);
47484        let b = _mm_set_ps(0., 1., 2., 3.);
47485        let c = _mm_set1_ps(1.);
47486        let r = _mm_maskz_fnmsub_ps(0, a, b, c);
47487        assert_eq_m128(r, _mm_setzero_ps());
47488        let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
47489        let e = _mm_set_ps(-1., -2., -3., -4.);
47490        assert_eq_m128(r, e);
47491    }
47492
47493    #[simd_test(enable = "avx512f,avx512vl")]
47494    const fn test_mm_mask3_fnmsub_ps() {
47495        let a = _mm_set1_ps(1.);
47496        let b = _mm_set_ps(0., 1., 2., 3.);
47497        let c = _mm_set1_ps(1.);
47498        let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
47499        assert_eq_m128(r, c);
47500        let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
47501        let e = _mm_set_ps(-1., -2., -3., -4.);
47502        assert_eq_m128(r, e);
47503    }
47504
47505    #[simd_test(enable = "avx512f")]
47506    fn test_mm512_rcp14_ps() {
47507        let a = _mm512_set1_ps(3.);
47508        let r = _mm512_rcp14_ps(a);
47509        let e = _mm512_set1_ps(0.33333206);
47510        assert_eq_m512(r, e);
47511    }
47512
47513    #[simd_test(enable = "avx512f")]
47514    fn test_mm512_mask_rcp14_ps() {
47515        let a = _mm512_set1_ps(3.);
47516        let r = _mm512_mask_rcp14_ps(a, 0, a);
47517        assert_eq_m512(r, a);
47518        let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
47519        let e = _mm512_setr_ps(
47520            3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
47521            0.33333206, 0.33333206, 0.33333206, 0.33333206,
47522        );
47523        assert_eq_m512(r, e);
47524    }
47525
47526    #[simd_test(enable = "avx512f")]
47527    fn test_mm512_maskz_rcp14_ps() {
47528        let a = _mm512_set1_ps(3.);
47529        let r = _mm512_maskz_rcp14_ps(0, a);
47530        assert_eq_m512(r, _mm512_setzero_ps());
47531        let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
47532        let e = _mm512_setr_ps(
47533            0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
47534            0.33333206, 0.33333206, 0.33333206, 0.33333206,
47535        );
47536        assert_eq_m512(r, e);
47537    }
47538
47539    #[simd_test(enable = "avx512f,avx512vl")]
47540    fn test_mm256_rcp14_ps() {
47541        let a = _mm256_set1_ps(3.);
47542        let r = _mm256_rcp14_ps(a);
47543        let e = _mm256_set1_ps(0.33333206);
47544        assert_eq_m256(r, e);
47545    }
47546
47547    #[simd_test(enable = "avx512f,avx512vl")]
47548    fn test_mm256_mask_rcp14_ps() {
47549        let a = _mm256_set1_ps(3.);
47550        let r = _mm256_mask_rcp14_ps(a, 0, a);
47551        assert_eq_m256(r, a);
47552        let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
47553        let e = _mm256_set1_ps(0.33333206);
47554        assert_eq_m256(r, e);
47555    }
47556
47557    #[simd_test(enable = "avx512f,avx512vl")]
47558    fn test_mm256_maskz_rcp14_ps() {
47559        let a = _mm256_set1_ps(3.);
47560        let r = _mm256_maskz_rcp14_ps(0, a);
47561        assert_eq_m256(r, _mm256_setzero_ps());
47562        let r = _mm256_maskz_rcp14_ps(0b11111111, a);
47563        let e = _mm256_set1_ps(0.33333206);
47564        assert_eq_m256(r, e);
47565    }
47566
47567    #[simd_test(enable = "avx512f,avx512vl")]
47568    fn test_mm_rcp14_ps() {
47569        let a = _mm_set1_ps(3.);
47570        let r = _mm_rcp14_ps(a);
47571        let e = _mm_set1_ps(0.33333206);
47572        assert_eq_m128(r, e);
47573    }
47574
47575    #[simd_test(enable = "avx512f,avx512vl")]
47576    fn test_mm_mask_rcp14_ps() {
47577        let a = _mm_set1_ps(3.);
47578        let r = _mm_mask_rcp14_ps(a, 0, a);
47579        assert_eq_m128(r, a);
47580        let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
47581        let e = _mm_set1_ps(0.33333206);
47582        assert_eq_m128(r, e);
47583    }
47584
47585    #[simd_test(enable = "avx512f,avx512vl")]
47586    fn test_mm_maskz_rcp14_ps() {
47587        let a = _mm_set1_ps(3.);
47588        let r = _mm_maskz_rcp14_ps(0, a);
47589        assert_eq_m128(r, _mm_setzero_ps());
47590        let r = _mm_maskz_rcp14_ps(0b00001111, a);
47591        let e = _mm_set1_ps(0.33333206);
47592        assert_eq_m128(r, e);
47593    }
47594
47595    #[simd_test(enable = "avx512f")]
47596    fn test_mm512_rsqrt14_ps() {
47597        let a = _mm512_set1_ps(3.);
47598        let r = _mm512_rsqrt14_ps(a);
47599        let e = _mm512_set1_ps(0.5773392);
47600        assert_eq_m512(r, e);
47601    }
47602
47603    #[simd_test(enable = "avx512f")]
47604    fn test_mm512_mask_rsqrt14_ps() {
47605        let a = _mm512_set1_ps(3.);
47606        let r = _mm512_mask_rsqrt14_ps(a, 0, a);
47607        assert_eq_m512(r, a);
47608        let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
47609        let e = _mm512_setr_ps(
47610            3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
47611            0.5773392, 0.5773392, 0.5773392,
47612        );
47613        assert_eq_m512(r, e);
47614    }
47615
47616    #[simd_test(enable = "avx512f")]
47617    fn test_mm512_maskz_rsqrt14_ps() {
47618        let a = _mm512_set1_ps(3.);
47619        let r = _mm512_maskz_rsqrt14_ps(0, a);
47620        assert_eq_m512(r, _mm512_setzero_ps());
47621        let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
47622        let e = _mm512_setr_ps(
47623            0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
47624            0.5773392, 0.5773392, 0.5773392,
47625        );
47626        assert_eq_m512(r, e);
47627    }
47628
47629    #[simd_test(enable = "avx512f,avx512vl")]
47630    fn test_mm256_rsqrt14_ps() {
47631        let a = _mm256_set1_ps(3.);
47632        let r = _mm256_rsqrt14_ps(a);
47633        let e = _mm256_set1_ps(0.5773392);
47634        assert_eq_m256(r, e);
47635    }
47636
47637    #[simd_test(enable = "avx512f,avx512vl")]
47638    fn test_mm256_mask_rsqrt14_ps() {
47639        let a = _mm256_set1_ps(3.);
47640        let r = _mm256_mask_rsqrt14_ps(a, 0, a);
47641        assert_eq_m256(r, a);
47642        let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
47643        let e = _mm256_set1_ps(0.5773392);
47644        assert_eq_m256(r, e);
47645    }
47646
47647    #[simd_test(enable = "avx512f,avx512vl")]
47648    fn test_mm256_maskz_rsqrt14_ps() {
47649        let a = _mm256_set1_ps(3.);
47650        let r = _mm256_maskz_rsqrt14_ps(0, a);
47651        assert_eq_m256(r, _mm256_setzero_ps());
47652        let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
47653        let e = _mm256_set1_ps(0.5773392);
47654        assert_eq_m256(r, e);
47655    }
47656
47657    #[simd_test(enable = "avx512f,avx512vl")]
47658    fn test_mm_rsqrt14_ps() {
47659        let a = _mm_set1_ps(3.);
47660        let r = _mm_rsqrt14_ps(a);
47661        let e = _mm_set1_ps(0.5773392);
47662        assert_eq_m128(r, e);
47663    }
47664
47665    #[simd_test(enable = "avx512f,avx512vl")]
47666    fn test_mm_mask_rsqrt14_ps() {
47667        let a = _mm_set1_ps(3.);
47668        let r = _mm_mask_rsqrt14_ps(a, 0, a);
47669        assert_eq_m128(r, a);
47670        let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
47671        let e = _mm_set1_ps(0.5773392);
47672        assert_eq_m128(r, e);
47673    }
47674
47675    #[simd_test(enable = "avx512f,avx512vl")]
47676    fn test_mm_maskz_rsqrt14_ps() {
47677        let a = _mm_set1_ps(3.);
47678        let r = _mm_maskz_rsqrt14_ps(0, a);
47679        assert_eq_m128(r, _mm_setzero_ps());
47680        let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
47681        let e = _mm_set1_ps(0.5773392);
47682        assert_eq_m128(r, e);
47683    }
47684
47685    #[simd_test(enable = "avx512f")]
47686    fn test_mm512_getexp_ps() {
47687        let a = _mm512_set1_ps(3.);
47688        let r = _mm512_getexp_ps(a);
47689        let e = _mm512_set1_ps(1.);
47690        assert_eq_m512(r, e);
47691    }
47692
47693    #[simd_test(enable = "avx512f")]
47694    fn test_mm512_mask_getexp_ps() {
47695        let a = _mm512_set1_ps(3.);
47696        let r = _mm512_mask_getexp_ps(a, 0, a);
47697        assert_eq_m512(r, a);
47698        let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
47699        let e = _mm512_setr_ps(
47700            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47701        );
47702        assert_eq_m512(r, e);
47703    }
47704
47705    #[simd_test(enable = "avx512f")]
47706    fn test_mm512_maskz_getexp_ps() {
47707        let a = _mm512_set1_ps(3.);
47708        let r = _mm512_maskz_getexp_ps(0, a);
47709        assert_eq_m512(r, _mm512_setzero_ps());
47710        let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
47711        let e = _mm512_setr_ps(
47712            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47713        );
47714        assert_eq_m512(r, e);
47715    }
47716
47717    #[simd_test(enable = "avx512f,avx512vl")]
47718    fn test_mm256_getexp_ps() {
47719        let a = _mm256_set1_ps(3.);
47720        let r = _mm256_getexp_ps(a);
47721        let e = _mm256_set1_ps(1.);
47722        assert_eq_m256(r, e);
47723    }
47724
47725    #[simd_test(enable = "avx512f,avx512vl")]
47726    fn test_mm256_mask_getexp_ps() {
47727        let a = _mm256_set1_ps(3.);
47728        let r = _mm256_mask_getexp_ps(a, 0, a);
47729        assert_eq_m256(r, a);
47730        let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
47731        let e = _mm256_set1_ps(1.);
47732        assert_eq_m256(r, e);
47733    }
47734
47735    #[simd_test(enable = "avx512f,avx512vl")]
47736    fn test_mm256_maskz_getexp_ps() {
47737        let a = _mm256_set1_ps(3.);
47738        let r = _mm256_maskz_getexp_ps(0, a);
47739        assert_eq_m256(r, _mm256_setzero_ps());
47740        let r = _mm256_maskz_getexp_ps(0b11111111, a);
47741        let e = _mm256_set1_ps(1.);
47742        assert_eq_m256(r, e);
47743    }
47744
47745    #[simd_test(enable = "avx512f,avx512vl")]
47746    fn test_mm_getexp_ps() {
47747        let a = _mm_set1_ps(3.);
47748        let r = _mm_getexp_ps(a);
47749        let e = _mm_set1_ps(1.);
47750        assert_eq_m128(r, e);
47751    }
47752
47753    #[simd_test(enable = "avx512f,avx512vl")]
47754    fn test_mm_mask_getexp_ps() {
47755        let a = _mm_set1_ps(3.);
47756        let r = _mm_mask_getexp_ps(a, 0, a);
47757        assert_eq_m128(r, a);
47758        let r = _mm_mask_getexp_ps(a, 0b00001111, a);
47759        let e = _mm_set1_ps(1.);
47760        assert_eq_m128(r, e);
47761    }
47762
47763    #[simd_test(enable = "avx512f,avx512vl")]
47764    fn test_mm_maskz_getexp_ps() {
47765        let a = _mm_set1_ps(3.);
47766        let r = _mm_maskz_getexp_ps(0, a);
47767        assert_eq_m128(r, _mm_setzero_ps());
47768        let r = _mm_maskz_getexp_ps(0b00001111, a);
47769        let e = _mm_set1_ps(1.);
47770        assert_eq_m128(r, e);
47771    }
47772
47773    #[simd_test(enable = "avx512f")]
47774    fn test_mm512_roundscale_ps() {
47775        let a = _mm512_set1_ps(1.1);
47776        let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
47777        let e = _mm512_set1_ps(1.0);
47778        assert_eq_m512(r, e);
47779    }
47780
47781    #[simd_test(enable = "avx512f")]
47782    fn test_mm512_mask_roundscale_ps() {
47783        let a = _mm512_set1_ps(1.1);
47784        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47785        let e = _mm512_set1_ps(1.1);
47786        assert_eq_m512(r, e);
47787        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
47788        let e = _mm512_set1_ps(1.0);
47789        assert_eq_m512(r, e);
47790    }
47791
47792    #[simd_test(enable = "avx512f")]
47793    fn test_mm512_maskz_roundscale_ps() {
47794        let a = _mm512_set1_ps(1.1);
47795        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47796        assert_eq_m512(r, _mm512_setzero_ps());
47797        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
47798        let e = _mm512_set1_ps(1.0);
47799        assert_eq_m512(r, e);
47800    }
47801
47802    #[simd_test(enable = "avx512f,avx512vl")]
47803    fn test_mm256_roundscale_ps() {
47804        let a = _mm256_set1_ps(1.1);
47805        let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
47806        let e = _mm256_set1_ps(1.0);
47807        assert_eq_m256(r, e);
47808    }
47809
47810    #[simd_test(enable = "avx512f,avx512vl")]
47811    fn test_mm256_mask_roundscale_ps() {
47812        let a = _mm256_set1_ps(1.1);
47813        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47814        let e = _mm256_set1_ps(1.1);
47815        assert_eq_m256(r, e);
47816        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
47817        let e = _mm256_set1_ps(1.0);
47818        assert_eq_m256(r, e);
47819    }
47820
47821    #[simd_test(enable = "avx512f,avx512vl")]
47822    fn test_mm256_maskz_roundscale_ps() {
47823        let a = _mm256_set1_ps(1.1);
47824        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47825        assert_eq_m256(r, _mm256_setzero_ps());
47826        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
47827        let e = _mm256_set1_ps(1.0);
47828        assert_eq_m256(r, e);
47829    }
47830
47831    #[simd_test(enable = "avx512f,avx512vl")]
47832    fn test_mm_roundscale_ps() {
47833        let a = _mm_set1_ps(1.1);
47834        let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
47835        let e = _mm_set1_ps(1.0);
47836        assert_eq_m128(r, e);
47837    }
47838
47839    #[simd_test(enable = "avx512f,avx512vl")]
47840    fn test_mm_mask_roundscale_ps() {
47841        let a = _mm_set1_ps(1.1);
47842        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47843        let e = _mm_set1_ps(1.1);
47844        assert_eq_m128(r, e);
47845        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
47846        let e = _mm_set1_ps(1.0);
47847        assert_eq_m128(r, e);
47848    }
47849
47850    #[simd_test(enable = "avx512f,avx512vl")]
47851    fn test_mm_maskz_roundscale_ps() {
47852        let a = _mm_set1_ps(1.1);
47853        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47854        assert_eq_m128(r, _mm_setzero_ps());
47855        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
47856        let e = _mm_set1_ps(1.0);
47857        assert_eq_m128(r, e);
47858    }
47859
47860    #[simd_test(enable = "avx512f")]
47861    fn test_mm512_scalef_ps() {
47862        let a = _mm512_set1_ps(1.);
47863        let b = _mm512_set1_ps(3.);
47864        let r = _mm512_scalef_ps(a, b);
47865        let e = _mm512_set1_ps(8.);
47866        assert_eq_m512(r, e);
47867    }
47868
47869    #[simd_test(enable = "avx512f")]
47870    fn test_mm512_mask_scalef_ps() {
47871        let a = _mm512_set1_ps(1.);
47872        let b = _mm512_set1_ps(3.);
47873        let r = _mm512_mask_scalef_ps(a, 0, a, b);
47874        assert_eq_m512(r, a);
47875        let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
47876        let e = _mm512_set_ps(
47877            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47878        );
47879        assert_eq_m512(r, e);
47880    }
47881
47882    #[simd_test(enable = "avx512f")]
47883    fn test_mm512_maskz_scalef_ps() {
47884        let a = _mm512_set1_ps(1.);
47885        let b = _mm512_set1_ps(3.);
47886        let r = _mm512_maskz_scalef_ps(0, a, b);
47887        assert_eq_m512(r, _mm512_setzero_ps());
47888        let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
47889        let e = _mm512_set_ps(
47890            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47891        );
47892        assert_eq_m512(r, e);
47893    }
47894
47895    #[simd_test(enable = "avx512f,avx512vl")]
47896    fn test_mm256_scalef_ps() {
47897        let a = _mm256_set1_ps(1.);
47898        let b = _mm256_set1_ps(3.);
47899        let r = _mm256_scalef_ps(a, b);
47900        let e = _mm256_set1_ps(8.);
47901        assert_eq_m256(r, e);
47902    }
47903
47904    #[simd_test(enable = "avx512f,avx512vl")]
47905    fn test_mm256_mask_scalef_ps() {
47906        let a = _mm256_set1_ps(1.);
47907        let b = _mm256_set1_ps(3.);
47908        let r = _mm256_mask_scalef_ps(a, 0, a, b);
47909        assert_eq_m256(r, a);
47910        let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
47911        let e = _mm256_set1_ps(8.);
47912        assert_eq_m256(r, e);
47913    }
47914
47915    #[simd_test(enable = "avx512f,avx512vl")]
47916    fn test_mm256_maskz_scalef_ps() {
47917        let a = _mm256_set1_ps(1.);
47918        let b = _mm256_set1_ps(3.);
47919        let r = _mm256_maskz_scalef_ps(0, a, b);
47920        assert_eq_m256(r, _mm256_setzero_ps());
47921        let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
47922        let e = _mm256_set1_ps(8.);
47923        assert_eq_m256(r, e);
47924    }
47925
47926    #[simd_test(enable = "avx512f,avx512vl")]
47927    fn test_mm_scalef_ps() {
47928        let a = _mm_set1_ps(1.);
47929        let b = _mm_set1_ps(3.);
47930        let r = _mm_scalef_ps(a, b);
47931        let e = _mm_set1_ps(8.);
47932        assert_eq_m128(r, e);
47933    }
47934
47935    #[simd_test(enable = "avx512f,avx512vl")]
47936    fn test_mm_mask_scalef_ps() {
47937        let a = _mm_set1_ps(1.);
47938        let b = _mm_set1_ps(3.);
47939        let r = _mm_mask_scalef_ps(a, 0, a, b);
47940        assert_eq_m128(r, a);
47941        let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
47942        let e = _mm_set1_ps(8.);
47943        assert_eq_m128(r, e);
47944    }
47945
47946    #[simd_test(enable = "avx512f,avx512vl")]
47947    fn test_mm_maskz_scalef_ps() {
47948        let a = _mm_set1_ps(1.);
47949        let b = _mm_set1_ps(3.);
47950        let r = _mm_maskz_scalef_ps(0, a, b);
47951        assert_eq_m128(r, _mm_setzero_ps());
47952        let r = _mm_maskz_scalef_ps(0b00001111, a, b);
47953        let e = _mm_set1_ps(8.);
47954        assert_eq_m128(r, e);
47955    }
47956
47957    #[simd_test(enable = "avx512f")]
47958    fn test_mm512_fixupimm_ps() {
47959        let a = _mm512_set1_ps(f32::NAN);
47960        let b = _mm512_set1_ps(f32::MAX);
47961        let c = _mm512_set1_epi32(i32::MAX);
47962        //let r = _mm512_fixupimm_ps(a, b, c, 5);
47963        let r = _mm512_fixupimm_ps::<5>(a, b, c);
47964        let e = _mm512_set1_ps(0.0);
47965        assert_eq_m512(r, e);
47966    }
47967
47968    #[simd_test(enable = "avx512f")]
47969    fn test_mm512_mask_fixupimm_ps() {
47970        #[rustfmt::skip]
47971        let a = _mm512_set_ps(
47972            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47973            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47974            1., 1., 1., 1.,
47975            1., 1., 1., 1.,
47976        );
47977        let b = _mm512_set1_ps(f32::MAX);
47978        let c = _mm512_set1_epi32(i32::MAX);
47979        let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
47980        let e = _mm512_set_ps(
47981            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47982        );
47983        assert_eq_m512(r, e);
47984    }
47985
47986    #[simd_test(enable = "avx512f")]
47987    fn test_mm512_maskz_fixupimm_ps() {
47988        #[rustfmt::skip]
47989        let a = _mm512_set_ps(
47990            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47991            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47992            1., 1., 1., 1.,
47993            1., 1., 1., 1.,
47994        );
47995        let b = _mm512_set1_ps(f32::MAX);
47996        let c = _mm512_set1_epi32(i32::MAX);
47997        let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
47998        let e = _mm512_set_ps(
47999            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
48000        );
48001        assert_eq_m512(r, e);
48002    }
48003
48004    #[simd_test(enable = "avx512f,avx512vl")]
48005    fn test_mm256_fixupimm_ps() {
48006        let a = _mm256_set1_ps(f32::NAN);
48007        let b = _mm256_set1_ps(f32::MAX);
48008        let c = _mm256_set1_epi32(i32::MAX);
48009        let r = _mm256_fixupimm_ps::<5>(a, b, c);
48010        let e = _mm256_set1_ps(0.0);
48011        assert_eq_m256(r, e);
48012    }
48013
48014    #[simd_test(enable = "avx512f,avx512vl")]
48015    fn test_mm256_mask_fixupimm_ps() {
48016        let a = _mm256_set1_ps(f32::NAN);
48017        let b = _mm256_set1_ps(f32::MAX);
48018        let c = _mm256_set1_epi32(i32::MAX);
48019        let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
48020        let e = _mm256_set1_ps(0.0);
48021        assert_eq_m256(r, e);
48022    }
48023
48024    #[simd_test(enable = "avx512f,avx512vl")]
48025    fn test_mm256_maskz_fixupimm_ps() {
48026        let a = _mm256_set1_ps(f32::NAN);
48027        let b = _mm256_set1_ps(f32::MAX);
48028        let c = _mm256_set1_epi32(i32::MAX);
48029        let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
48030        let e = _mm256_set1_ps(0.0);
48031        assert_eq_m256(r, e);
48032    }
48033
48034    #[simd_test(enable = "avx512f,avx512vl")]
48035    fn test_mm_fixupimm_ps() {
48036        let a = _mm_set1_ps(f32::NAN);
48037        let b = _mm_set1_ps(f32::MAX);
48038        let c = _mm_set1_epi32(i32::MAX);
48039        let r = _mm_fixupimm_ps::<5>(a, b, c);
48040        let e = _mm_set1_ps(0.0);
48041        assert_eq_m128(r, e);
48042    }
48043
48044    #[simd_test(enable = "avx512f,avx512vl")]
48045    fn test_mm_mask_fixupimm_ps() {
48046        let a = _mm_set1_ps(f32::NAN);
48047        let b = _mm_set1_ps(f32::MAX);
48048        let c = _mm_set1_epi32(i32::MAX);
48049        let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
48050        let e = _mm_set1_ps(0.0);
48051        assert_eq_m128(r, e);
48052    }
48053
48054    #[simd_test(enable = "avx512f,avx512vl")]
48055    fn test_mm_maskz_fixupimm_ps() {
48056        let a = _mm_set1_ps(f32::NAN);
48057        let b = _mm_set1_ps(f32::MAX);
48058        let c = _mm_set1_epi32(i32::MAX);
48059        let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
48060        let e = _mm_set1_ps(0.0);
48061        assert_eq_m128(r, e);
48062    }
48063
48064    #[simd_test(enable = "avx512f")]
48065    fn test_mm512_ternarylogic_epi32() {
48066        let a = _mm512_set4_epi32(0b100, 0b110, 0b001, 0b101);
48067        let b = _mm512_set4_epi32(0b010, 0b011, 0b001, 0b110);
48068        let c = _mm512_set4_epi32(0b001, 0b000, 0b001, 0b111);
48069
48070        // Identity of A.
48071        let r = _mm512_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48072        assert_eq_m512i(r, a);
48073
48074        // Bitwise xor.
48075        let r = _mm512_ternarylogic_epi32::<0b10010110>(a, b, c);
48076        let e = _mm512_set4_epi32(0b111, 0b101, 0b001, 0b100);
48077        assert_eq_m512i(r, e);
48078        assert_eq_m512i(r, _mm512_xor_si512(_mm512_xor_si512(a, b), c));
48079
48080        // Majority (2 or more bits set).
48081        let r = _mm512_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48082        let e = _mm512_set4_epi32(0b000, 0b010, 0b001, 0b111);
48083        assert_eq_m512i(r, e);
48084    }
48085
48086    #[simd_test(enable = "avx512f")]
48087    fn test_mm512_mask_ternarylogic_epi32() {
48088        let src = _mm512_set1_epi32(1 << 2);
48089        let a = _mm512_set1_epi32(1 << 1);
48090        let b = _mm512_set1_epi32(1 << 0);
48091        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48092        assert_eq_m512i(r, src);
48093        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
48094        let e = _mm512_set1_epi32(0);
48095        assert_eq_m512i(r, e);
48096    }
48097
48098    #[simd_test(enable = "avx512f")]
48099    fn test_mm512_maskz_ternarylogic_epi32() {
48100        let a = _mm512_set1_epi32(1 << 2);
48101        let b = _mm512_set1_epi32(1 << 1);
48102        let c = _mm512_set1_epi32(1 << 0);
48103        let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48104        assert_eq_m512i(r, _mm512_setzero_si512());
48105        let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
48106        let e = _mm512_set1_epi32(0);
48107        assert_eq_m512i(r, e);
48108    }
48109
48110    #[simd_test(enable = "avx512f,avx512vl")]
48111    fn test_mm256_ternarylogic_epi32() {
48112        let _mm256_set4_epi32 = |a, b, c, d| _mm256_setr_epi32(a, b, c, d, a, b, c, d);
48113
48114        let a = _mm256_set4_epi32(0b100, 0b110, 0b001, 0b101);
48115        let b = _mm256_set4_epi32(0b010, 0b011, 0b001, 0b110);
48116        let c = _mm256_set4_epi32(0b001, 0b000, 0b001, 0b111);
48117
48118        // Identity of A.
48119        let r = _mm256_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48120        assert_eq_m256i(r, a);
48121
48122        // Bitwise xor.
48123        let r = _mm256_ternarylogic_epi32::<0b10010110>(a, b, c);
48124        let e = _mm256_set4_epi32(0b111, 0b101, 0b001, 0b100);
48125        assert_eq_m256i(r, e);
48126        assert_eq_m256i(r, _mm256_xor_si256(_mm256_xor_si256(a, b), c));
48127
48128        // Majority (2 or more bits set).
48129        let r = _mm256_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48130        let e = _mm256_set4_epi32(0b000, 0b010, 0b001, 0b111);
48131        assert_eq_m256i(r, e);
48132    }
48133
48134    #[simd_test(enable = "avx512f,avx512vl")]
48135    fn test_mm256_mask_ternarylogic_epi32() {
48136        let src = _mm256_set1_epi32(1 << 2);
48137        let a = _mm256_set1_epi32(1 << 1);
48138        let b = _mm256_set1_epi32(1 << 0);
48139        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48140        assert_eq_m256i(r, src);
48141        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
48142        let e = _mm256_set1_epi32(0);
48143        assert_eq_m256i(r, e);
48144    }
48145
48146    #[simd_test(enable = "avx512f,avx512vl")]
48147    fn test_mm256_maskz_ternarylogic_epi32() {
48148        let a = _mm256_set1_epi32(1 << 2);
48149        let b = _mm256_set1_epi32(1 << 1);
48150        let c = _mm256_set1_epi32(1 << 0);
48151        let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48152        assert_eq_m256i(r, _mm256_setzero_si256());
48153        let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
48154        let e = _mm256_set1_epi32(0);
48155        assert_eq_m256i(r, e);
48156    }
48157
48158    #[simd_test(enable = "avx512f,avx512vl")]
48159    fn test_mm_ternarylogic_epi32() {
48160        let a = _mm_setr_epi32(0b100, 0b110, 0b001, 0b101);
48161        let b = _mm_setr_epi32(0b010, 0b011, 0b001, 0b110);
48162        let c = _mm_setr_epi32(0b001, 0b000, 0b001, 0b111);
48163
48164        // Identity of A.
48165        let r = _mm_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48166        assert_eq_m128i(r, a);
48167
48168        // Bitwise xor.
48169        let r = _mm_ternarylogic_epi32::<0b10010110>(a, b, c);
48170        let e = _mm_setr_epi32(0b111, 0b101, 0b001, 0b100);
48171        assert_eq_m128i(r, e);
48172        assert_eq_m128i(r, _mm_xor_si128(_mm_xor_si128(a, b), c));
48173
48174        // Majority (2 or more bits set).
48175        let r = _mm_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48176        let e = _mm_setr_epi32(0b000, 0b010, 0b001, 0b111);
48177        assert_eq_m128i(r, e);
48178    }
48179
48180    #[simd_test(enable = "avx512f,avx512vl")]
48181    fn test_mm_mask_ternarylogic_epi32() {
48182        let src = _mm_set1_epi32(1 << 2);
48183        let a = _mm_set1_epi32(1 << 1);
48184        let b = _mm_set1_epi32(1 << 0);
48185        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48186        assert_eq_m128i(r, src);
48187        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
48188        let e = _mm_set1_epi32(0);
48189        assert_eq_m128i(r, e);
48190    }
48191
48192    #[simd_test(enable = "avx512f,avx512vl")]
48193    fn test_mm_maskz_ternarylogic_epi32() {
48194        let a = _mm_set1_epi32(1 << 2);
48195        let b = _mm_set1_epi32(1 << 1);
48196        let c = _mm_set1_epi32(1 << 0);
48197        let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48198        assert_eq_m128i(r, _mm_setzero_si128());
48199        let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
48200        let e = _mm_set1_epi32(0);
48201        assert_eq_m128i(r, e);
48202    }
48203
48204    #[simd_test(enable = "avx512f")]
48205    fn test_mm512_getmant_ps() {
48206        let a = _mm512_set1_ps(10.);
48207        let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48208        let e = _mm512_set1_ps(1.25);
48209        assert_eq_m512(r, e);
48210    }
48211
48212    #[simd_test(enable = "avx512f")]
48213    fn test_mm512_mask_getmant_ps() {
48214        let a = _mm512_set1_ps(10.);
48215        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48216        assert_eq_m512(r, a);
48217        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
48218            a,
48219            0b11111111_00000000,
48220            a,
48221        );
48222        let e = _mm512_setr_ps(
48223            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
48224        );
48225        assert_eq_m512(r, e);
48226    }
48227
48228    #[simd_test(enable = "avx512f")]
48229    fn test_mm512_maskz_getmant_ps() {
48230        let a = _mm512_set1_ps(10.);
48231        let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48232        assert_eq_m512(r, _mm512_setzero_ps());
48233        let r =
48234            _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
48235        let e = _mm512_setr_ps(
48236            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
48237        );
48238        assert_eq_m512(r, e);
48239    }
48240
48241    #[simd_test(enable = "avx512f,avx512vl")]
48242    fn test_mm256_getmant_ps() {
48243        let a = _mm256_set1_ps(10.);
48244        let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48245        let e = _mm256_set1_ps(1.25);
48246        assert_eq_m256(r, e);
48247    }
48248
48249    #[simd_test(enable = "avx512f,avx512vl")]
48250    fn test_mm256_mask_getmant_ps() {
48251        let a = _mm256_set1_ps(10.);
48252        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48253        assert_eq_m256(r, a);
48254        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
48255        let e = _mm256_set1_ps(1.25);
48256        assert_eq_m256(r, e);
48257    }
48258
48259    #[simd_test(enable = "avx512f,avx512vl")]
48260    fn test_mm256_maskz_getmant_ps() {
48261        let a = _mm256_set1_ps(10.);
48262        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48263        assert_eq_m256(r, _mm256_setzero_ps());
48264        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
48265        let e = _mm256_set1_ps(1.25);
48266        assert_eq_m256(r, e);
48267    }
48268
48269    #[simd_test(enable = "avx512f,avx512vl")]
48270    fn test_mm_getmant_ps() {
48271        let a = _mm_set1_ps(10.);
48272        let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48273        let e = _mm_set1_ps(1.25);
48274        assert_eq_m128(r, e);
48275    }
48276
48277    #[simd_test(enable = "avx512f,avx512vl")]
48278    fn test_mm_mask_getmant_ps() {
48279        let a = _mm_set1_ps(10.);
48280        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48281        assert_eq_m128(r, a);
48282        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
48283        let e = _mm_set1_ps(1.25);
48284        assert_eq_m128(r, e);
48285    }
48286
48287    #[simd_test(enable = "avx512f,avx512vl")]
48288    fn test_mm_maskz_getmant_ps() {
48289        let a = _mm_set1_ps(10.);
48290        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48291        assert_eq_m128(r, _mm_setzero_ps());
48292        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
48293        let e = _mm_set1_ps(1.25);
48294        assert_eq_m128(r, e);
48295    }
48296
48297    #[simd_test(enable = "avx512f")]
48298    fn test_mm512_add_round_ps() {
48299        let a = _mm512_setr_ps(
48300            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48301        );
48302        let b = _mm512_set1_ps(-1.);
48303        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48304        #[rustfmt::skip]
48305        let e = _mm512_setr_ps(
48306            -1., 0.5, 1., 2.5,
48307            3., 4.5, 5., 6.5,
48308            7., 8.5, 9., 10.5,
48309            11., 12.5, 13., -0.99999994,
48310        );
48311        assert_eq_m512(r, e);
48312        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48313        let e = _mm512_setr_ps(
48314            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
48315        );
48316        assert_eq_m512(r, e);
48317    }
48318
48319    #[simd_test(enable = "avx512f")]
48320    fn test_mm512_mask_add_round_ps() {
48321        let a = _mm512_setr_ps(
48322            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48323        );
48324        let b = _mm512_set1_ps(-1.);
48325        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
48326        assert_eq_m512(r, a);
48327        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48328            a,
48329            0b11111111_00000000,
48330            a,
48331            b,
48332        );
48333        #[rustfmt::skip]
48334        let e = _mm512_setr_ps(
48335            0., 1.5, 2., 3.5,
48336            4., 5.5, 6., 7.5,
48337            7., 8.5, 9., 10.5,
48338            11., 12.5, 13., -0.99999994,
48339        );
48340        assert_eq_m512(r, e);
48341    }
48342
48343    #[simd_test(enable = "avx512f")]
48344    fn test_mm512_maskz_add_round_ps() {
48345        let a = _mm512_setr_ps(
48346            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48347        );
48348        let b = _mm512_set1_ps(-1.);
48349        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
48350        assert_eq_m512(r, _mm512_setzero_ps());
48351        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48352            0b11111111_00000000,
48353            a,
48354            b,
48355        );
48356        #[rustfmt::skip]
48357        let e = _mm512_setr_ps(
48358            0., 0., 0., 0.,
48359            0., 0., 0., 0.,
48360            7., 8.5, 9., 10.5,
48361            11., 12.5, 13., -0.99999994,
48362        );
48363        assert_eq_m512(r, e);
48364    }
48365
48366    #[simd_test(enable = "avx512f")]
48367    fn test_mm512_sub_round_ps() {
48368        let a = _mm512_setr_ps(
48369            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48370        );
48371        let b = _mm512_set1_ps(1.);
48372        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48373        #[rustfmt::skip]
48374        let e = _mm512_setr_ps(
48375            -1., 0.5, 1., 2.5,
48376            3., 4.5, 5., 6.5,
48377            7., 8.5, 9., 10.5,
48378            11., 12.5, 13., -0.99999994,
48379        );
48380        assert_eq_m512(r, e);
48381        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48382        let e = _mm512_setr_ps(
48383            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
48384        );
48385        assert_eq_m512(r, e);
48386    }
48387
48388    #[simd_test(enable = "avx512f")]
48389    fn test_mm512_mask_sub_round_ps() {
48390        let a = _mm512_setr_ps(
48391            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48392        );
48393        let b = _mm512_set1_ps(1.);
48394        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48395            a, 0, a, b,
48396        );
48397        assert_eq_m512(r, a);
48398        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48399            a,
48400            0b11111111_00000000,
48401            a,
48402            b,
48403        );
48404        #[rustfmt::skip]
48405        let e = _mm512_setr_ps(
48406            0., 1.5, 2., 3.5,
48407            4., 5.5, 6., 7.5,
48408            7., 8.5, 9., 10.5,
48409            11., 12.5, 13., -0.99999994,
48410        );
48411        assert_eq_m512(r, e);
48412    }
48413
48414    #[simd_test(enable = "avx512f")]
48415    fn test_mm512_maskz_sub_round_ps() {
48416        let a = _mm512_setr_ps(
48417            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48418        );
48419        let b = _mm512_set1_ps(1.);
48420        let r =
48421            _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48422        assert_eq_m512(r, _mm512_setzero_ps());
48423        let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48424            0b11111111_00000000,
48425            a,
48426            b,
48427        );
48428        #[rustfmt::skip]
48429        let e = _mm512_setr_ps(
48430            0., 0., 0., 0.,
48431            0., 0., 0., 0.,
48432            7., 8.5, 9., 10.5,
48433            11., 12.5, 13., -0.99999994,
48434        );
48435        assert_eq_m512(r, e);
48436    }
48437
48438    #[simd_test(enable = "avx512f")]
48439    fn test_mm512_mul_round_ps() {
48440        #[rustfmt::skip]
48441        let a = _mm512_setr_ps(
48442            0., 1.5, 2., 3.5,
48443            4., 5.5, 6., 7.5,
48444            8., 9.5, 10., 11.5,
48445            12., 13.5, 14., 0.00000000000000000000007,
48446        );
48447        let b = _mm512_set1_ps(0.1);
48448        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48449        #[rustfmt::skip]
48450        let e = _mm512_setr_ps(
48451            0., 0.15, 0.2, 0.35,
48452            0.4, 0.55, 0.6, 0.75,
48453            0.8, 0.95, 1.0, 1.15,
48454            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48455        );
48456        assert_eq_m512(r, e);
48457        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48458        #[rustfmt::skip]
48459        let e = _mm512_setr_ps(
48460            0., 0.14999999, 0.2, 0.35,
48461            0.4, 0.54999995, 0.59999996, 0.75,
48462            0.8, 0.95, 1.0, 1.15,
48463            1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
48464        );
48465        assert_eq_m512(r, e);
48466    }
48467
48468    #[simd_test(enable = "avx512f")]
48469    fn test_mm512_mask_mul_round_ps() {
48470        #[rustfmt::skip]
48471        let a = _mm512_setr_ps(
48472            0., 1.5, 2., 3.5,
48473            4., 5.5, 6., 7.5,
48474            8., 9.5, 10., 11.5,
48475            12., 13.5, 14., 0.00000000000000000000007,
48476        );
48477        let b = _mm512_set1_ps(0.1);
48478        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48479            a, 0, a, b,
48480        );
48481        assert_eq_m512(r, a);
48482        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48483            a,
48484            0b11111111_00000000,
48485            a,
48486            b,
48487        );
48488        #[rustfmt::skip]
48489        let e = _mm512_setr_ps(
48490            0., 1.5, 2., 3.5,
48491            4., 5.5, 6., 7.5,
48492            0.8, 0.95, 1.0, 1.15,
48493            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48494        );
48495        assert_eq_m512(r, e);
48496    }
48497
48498    #[simd_test(enable = "avx512f")]
48499    fn test_mm512_maskz_mul_round_ps() {
48500        #[rustfmt::skip]
48501        let a = _mm512_setr_ps(
48502            0., 1.5, 2., 3.5,
48503            4., 5.5, 6., 7.5,
48504            8., 9.5, 10., 11.5,
48505            12., 13.5, 14., 0.00000000000000000000007,
48506        );
48507        let b = _mm512_set1_ps(0.1);
48508        let r =
48509            _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48510        assert_eq_m512(r, _mm512_setzero_ps());
48511        let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48512            0b11111111_00000000,
48513            a,
48514            b,
48515        );
48516        #[rustfmt::skip]
48517        let e = _mm512_setr_ps(
48518            0., 0., 0., 0.,
48519            0., 0., 0., 0.,
48520            0.8, 0.95, 1.0, 1.15,
48521            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48522        );
48523        assert_eq_m512(r, e);
48524    }
48525
48526    #[simd_test(enable = "avx512f")]
48527    fn test_mm512_div_round_ps() {
48528        let a = _mm512_set1_ps(1.);
48529        let b = _mm512_set1_ps(3.);
48530        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48531        let e = _mm512_set1_ps(0.33333334);
48532        assert_eq_m512(r, e);
48533        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48534        let e = _mm512_set1_ps(0.3333333);
48535        assert_eq_m512(r, e);
48536    }
48537
48538    #[simd_test(enable = "avx512f")]
48539    fn test_mm512_mask_div_round_ps() {
48540        let a = _mm512_set1_ps(1.);
48541        let b = _mm512_set1_ps(3.);
48542        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48543            a, 0, a, b,
48544        );
48545        assert_eq_m512(r, a);
48546        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48547            a,
48548            0b11111111_00000000,
48549            a,
48550            b,
48551        );
48552        let e = _mm512_setr_ps(
48553            1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
48554            0.33333334, 0.33333334, 0.33333334, 0.33333334,
48555        );
48556        assert_eq_m512(r, e);
48557    }
48558
48559    #[simd_test(enable = "avx512f")]
48560    fn test_mm512_maskz_div_round_ps() {
48561        let a = _mm512_set1_ps(1.);
48562        let b = _mm512_set1_ps(3.);
48563        let r =
48564            _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48565        assert_eq_m512(r, _mm512_setzero_ps());
48566        let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48567            0b11111111_00000000,
48568            a,
48569            b,
48570        );
48571        let e = _mm512_setr_ps(
48572            0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
48573            0.33333334, 0.33333334, 0.33333334, 0.33333334,
48574        );
48575        assert_eq_m512(r, e);
48576    }
48577
48578    #[simd_test(enable = "avx512f")]
48579    fn test_mm512_sqrt_round_ps() {
48580        let a = _mm512_set1_ps(3.);
48581        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48582        let e = _mm512_set1_ps(1.7320508);
48583        assert_eq_m512(r, e);
48584        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
48585        let e = _mm512_set1_ps(1.7320509);
48586        assert_eq_m512(r, e);
48587    }
48588
48589    #[simd_test(enable = "avx512f")]
48590    fn test_mm512_mask_sqrt_round_ps() {
48591        let a = _mm512_set1_ps(3.);
48592        let r =
48593            _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
48594        assert_eq_m512(r, a);
48595        let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48596            a,
48597            0b11111111_00000000,
48598            a,
48599        );
48600        let e = _mm512_setr_ps(
48601            3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
48602            1.7320508, 1.7320508, 1.7320508,
48603        );
48604        assert_eq_m512(r, e);
48605    }
48606
48607    #[simd_test(enable = "avx512f")]
48608    fn test_mm512_maskz_sqrt_round_ps() {
48609        let a = _mm512_set1_ps(3.);
48610        let r =
48611            _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
48612        assert_eq_m512(r, _mm512_setzero_ps());
48613        let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48614            0b11111111_00000000,
48615            a,
48616        );
48617        let e = _mm512_setr_ps(
48618            0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
48619            1.7320508, 1.7320508, 1.7320508,
48620        );
48621        assert_eq_m512(r, e);
48622    }
48623
48624    #[simd_test(enable = "avx512f")]
48625    fn test_mm512_fmadd_round_ps() {
48626        let a = _mm512_set1_ps(0.00000007);
48627        let b = _mm512_set1_ps(1.);
48628        let c = _mm512_set1_ps(-1.);
48629        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48630        let e = _mm512_set1_ps(-0.99999994);
48631        assert_eq_m512(r, e);
48632        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48633        let e = _mm512_set1_ps(-0.9999999);
48634        assert_eq_m512(r, e);
48635    }
48636
48637    #[simd_test(enable = "avx512f")]
48638    fn test_mm512_mask_fmadd_round_ps() {
48639        let a = _mm512_set1_ps(0.00000007);
48640        let b = _mm512_set1_ps(1.);
48641        let c = _mm512_set1_ps(-1.);
48642        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48643            a, 0, b, c,
48644        );
48645        assert_eq_m512(r, a);
48646        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48647            a,
48648            0b00000000_11111111,
48649            b,
48650            c,
48651        );
48652        #[rustfmt::skip]
48653        let e = _mm512_setr_ps(
48654            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48655            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48656            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48657            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48658        );
48659        assert_eq_m512(r, e);
48660    }
48661
48662    #[simd_test(enable = "avx512f")]
48663    fn test_mm512_maskz_fmadd_round_ps() {
48664        let a = _mm512_set1_ps(0.00000007);
48665        let b = _mm512_set1_ps(1.);
48666        let c = _mm512_set1_ps(-1.);
48667        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48668            0, a, b, c,
48669        );
48670        assert_eq_m512(r, _mm512_setzero_ps());
48671        #[rustfmt::skip]
48672        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48673            0b00000000_11111111,
48674            a,
48675            b,
48676            c,
48677        );
48678        #[rustfmt::skip]
48679        let e = _mm512_setr_ps(
48680            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48681            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48682            0., 0., 0., 0.,
48683            0., 0., 0., 0.,
48684        );
48685        assert_eq_m512(r, e);
48686    }
48687
48688    #[simd_test(enable = "avx512f")]
48689    fn test_mm512_mask3_fmadd_round_ps() {
48690        let a = _mm512_set1_ps(0.00000007);
48691        let b = _mm512_set1_ps(1.);
48692        let c = _mm512_set1_ps(-1.);
48693        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48694            a, b, c, 0,
48695        );
48696        assert_eq_m512(r, c);
48697        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48698            a,
48699            b,
48700            c,
48701            0b00000000_11111111,
48702        );
48703        #[rustfmt::skip]
48704        let e = _mm512_setr_ps(
48705            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48706            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48707            -1., -1., -1., -1.,
48708            -1., -1., -1., -1.,
48709        );
48710        assert_eq_m512(r, e);
48711    }
48712
48713    #[simd_test(enable = "avx512f")]
48714    fn test_mm512_fmsub_round_ps() {
48715        let a = _mm512_set1_ps(0.00000007);
48716        let b = _mm512_set1_ps(1.);
48717        let c = _mm512_set1_ps(1.);
48718        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48719        let e = _mm512_set1_ps(-0.99999994);
48720        assert_eq_m512(r, e);
48721        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48722        let e = _mm512_set1_ps(-0.9999999);
48723        assert_eq_m512(r, e);
48724    }
48725
48726    #[simd_test(enable = "avx512f")]
48727    fn test_mm512_mask_fmsub_round_ps() {
48728        let a = _mm512_set1_ps(0.00000007);
48729        let b = _mm512_set1_ps(1.);
48730        let c = _mm512_set1_ps(1.);
48731        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48732            a, 0, b, c,
48733        );
48734        assert_eq_m512(r, a);
48735        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48736            a,
48737            0b00000000_11111111,
48738            b,
48739            c,
48740        );
48741        #[rustfmt::skip]
48742        let e = _mm512_setr_ps(
48743            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48744            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48745            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48746            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48747        );
48748        assert_eq_m512(r, e);
48749    }
48750
48751    #[simd_test(enable = "avx512f")]
48752    fn test_mm512_maskz_fmsub_round_ps() {
48753        let a = _mm512_set1_ps(0.00000007);
48754        let b = _mm512_set1_ps(1.);
48755        let c = _mm512_set1_ps(1.);
48756        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48757            0, a, b, c,
48758        );
48759        assert_eq_m512(r, _mm512_setzero_ps());
48760        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48761            0b00000000_11111111,
48762            a,
48763            b,
48764            c,
48765        );
48766        #[rustfmt::skip]
48767        let e = _mm512_setr_ps(
48768            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48769            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48770            0., 0., 0., 0.,
48771            0., 0., 0., 0.,
48772        );
48773        assert_eq_m512(r, e);
48774    }
48775
48776    #[simd_test(enable = "avx512f")]
48777    fn test_mm512_mask3_fmsub_round_ps() {
48778        let a = _mm512_set1_ps(0.00000007);
48779        let b = _mm512_set1_ps(1.);
48780        let c = _mm512_set1_ps(1.);
48781        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48782            a, b, c, 0,
48783        );
48784        assert_eq_m512(r, c);
48785        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48786            a,
48787            b,
48788            c,
48789            0b00000000_11111111,
48790        );
48791        #[rustfmt::skip]
48792        let e = _mm512_setr_ps(
48793            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48794            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48795            1., 1., 1., 1.,
48796            1., 1., 1., 1.,
48797        );
48798        assert_eq_m512(r, e);
48799    }
48800
48801    #[simd_test(enable = "avx512f")]
48802    fn test_mm512_fmaddsub_round_ps() {
48803        let a = _mm512_set1_ps(0.00000007);
48804        let b = _mm512_set1_ps(1.);
48805        let c = _mm512_set1_ps(-1.);
48806        let r =
48807            _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48808        #[rustfmt::skip]
48809        let e = _mm512_setr_ps(
48810            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48811            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48812            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48813            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48814        );
48815        assert_eq_m512(r, e);
48816        let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48817        let e = _mm512_setr_ps(
48818            1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48819            -0.9999999, 1., -0.9999999, 1., -0.9999999,
48820        );
48821        assert_eq_m512(r, e);
48822    }
48823
48824    #[simd_test(enable = "avx512f")]
48825    fn test_mm512_mask_fmaddsub_round_ps() {
48826        let a = _mm512_set1_ps(0.00000007);
48827        let b = _mm512_set1_ps(1.);
48828        let c = _mm512_set1_ps(-1.);
48829        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48830            a, 0, b, c,
48831        );
48832        assert_eq_m512(r, a);
48833        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48834            a,
48835            0b00000000_11111111,
48836            b,
48837            c,
48838        );
48839        #[rustfmt::skip]
48840        let e = _mm512_setr_ps(
48841            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48842            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48843            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48844            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48845        );
48846        assert_eq_m512(r, e);
48847    }
48848
48849    #[simd_test(enable = "avx512f")]
48850    fn test_mm512_maskz_fmaddsub_round_ps() {
48851        let a = _mm512_set1_ps(0.00000007);
48852        let b = _mm512_set1_ps(1.);
48853        let c = _mm512_set1_ps(-1.);
48854        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48855            0, a, b, c,
48856        );
48857        assert_eq_m512(r, _mm512_setzero_ps());
48858        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48859            0b00000000_11111111,
48860            a,
48861            b,
48862            c,
48863        );
48864        #[rustfmt::skip]
48865        let e = _mm512_setr_ps(
48866            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48867            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48868            0., 0., 0., 0.,
48869            0., 0., 0., 0.,
48870        );
48871        assert_eq_m512(r, e);
48872    }
48873
48874    #[simd_test(enable = "avx512f")]
48875    fn test_mm512_mask3_fmaddsub_round_ps() {
48876        let a = _mm512_set1_ps(0.00000007);
48877        let b = _mm512_set1_ps(1.);
48878        let c = _mm512_set1_ps(-1.);
48879        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48880            a, b, c, 0,
48881        );
48882        assert_eq_m512(r, c);
48883        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48884            a,
48885            b,
48886            c,
48887            0b00000000_11111111,
48888        );
48889        #[rustfmt::skip]
48890        let e = _mm512_setr_ps(
48891            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48892            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48893            -1., -1., -1., -1.,
48894            -1., -1., -1., -1.,
48895        );
48896        assert_eq_m512(r, e);
48897    }
48898
48899    #[simd_test(enable = "avx512f")]
48900    fn test_mm512_fmsubadd_round_ps() {
48901        let a = _mm512_set1_ps(0.00000007);
48902        let b = _mm512_set1_ps(1.);
48903        let c = _mm512_set1_ps(-1.);
48904        let r =
48905            _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48906        #[rustfmt::skip]
48907        let e = _mm512_setr_ps(
48908            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48909            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48910            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48911            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48912        );
48913        assert_eq_m512(r, e);
48914        let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48915        let e = _mm512_setr_ps(
48916            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48917            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48918        );
48919        assert_eq_m512(r, e);
48920    }
48921
48922    #[simd_test(enable = "avx512f")]
48923    fn test_mm512_mask_fmsubadd_round_ps() {
48924        let a = _mm512_set1_ps(0.00000007);
48925        let b = _mm512_set1_ps(1.);
48926        let c = _mm512_set1_ps(-1.);
48927        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48928            a, 0, b, c,
48929        );
48930        assert_eq_m512(r, a);
48931        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48932            a,
48933            0b00000000_11111111,
48934            b,
48935            c,
48936        );
48937        #[rustfmt::skip]
48938        let e = _mm512_setr_ps(
48939            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48940            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48941            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48942            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48943        );
48944        assert_eq_m512(r, e);
48945    }
48946
48947    #[simd_test(enable = "avx512f")]
48948    fn test_mm512_maskz_fmsubadd_round_ps() {
48949        let a = _mm512_set1_ps(0.00000007);
48950        let b = _mm512_set1_ps(1.);
48951        let c = _mm512_set1_ps(-1.);
48952        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48953            0, a, b, c,
48954        );
48955        assert_eq_m512(r, _mm512_setzero_ps());
48956        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48957            0b00000000_11111111,
48958            a,
48959            b,
48960            c,
48961        );
48962        #[rustfmt::skip]
48963        let e = _mm512_setr_ps(
48964            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48965            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48966            0., 0., 0., 0.,
48967            0., 0., 0., 0.,
48968        );
48969        assert_eq_m512(r, e);
48970    }
48971
48972    #[simd_test(enable = "avx512f")]
48973    fn test_mm512_mask3_fmsubadd_round_ps() {
48974        let a = _mm512_set1_ps(0.00000007);
48975        let b = _mm512_set1_ps(1.);
48976        let c = _mm512_set1_ps(-1.);
48977        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48978            a, b, c, 0,
48979        );
48980        assert_eq_m512(r, c);
48981        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48982            a,
48983            b,
48984            c,
48985            0b00000000_11111111,
48986        );
48987        #[rustfmt::skip]
48988        let e = _mm512_setr_ps(
48989            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48990            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48991            -1., -1., -1., -1.,
48992            -1., -1., -1., -1.,
48993        );
48994        assert_eq_m512(r, e);
48995    }
48996
48997    #[simd_test(enable = "avx512f")]
48998    fn test_mm512_fnmadd_round_ps() {
48999        let a = _mm512_set1_ps(0.00000007);
49000        let b = _mm512_set1_ps(1.);
49001        let c = _mm512_set1_ps(1.);
49002        let r =
49003            _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
49004        let e = _mm512_set1_ps(0.99999994);
49005        assert_eq_m512(r, e);
49006        let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
49007        let e = _mm512_set1_ps(0.9999999);
49008        assert_eq_m512(r, e);
49009    }
49010
49011    #[simd_test(enable = "avx512f")]
49012    fn test_mm512_mask_fnmadd_round_ps() {
49013        let a = _mm512_set1_ps(0.00000007);
49014        let b = _mm512_set1_ps(1.);
49015        let c = _mm512_set1_ps(1.);
49016        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49017            a, 0, b, c,
49018        );
49019        assert_eq_m512(r, a);
49020        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49021            a,
49022            0b00000000_11111111,
49023            b,
49024            c,
49025        );
49026        let e = _mm512_setr_ps(
49027            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49028            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
49029            0.00000007, 0.00000007,
49030        );
49031        assert_eq_m512(r, e);
49032    }
49033
49034    #[simd_test(enable = "avx512f")]
49035    fn test_mm512_maskz_fnmadd_round_ps() {
49036        let a = _mm512_set1_ps(0.00000007);
49037        let b = _mm512_set1_ps(1.);
49038        let c = _mm512_set1_ps(1.);
49039        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49040            0, a, b, c,
49041        );
49042        assert_eq_m512(r, _mm512_setzero_ps());
49043        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49044            0b00000000_11111111,
49045            a,
49046            b,
49047            c,
49048        );
49049        let e = _mm512_setr_ps(
49050            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49051            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
49052        );
49053        assert_eq_m512(r, e);
49054    }
49055
49056    #[simd_test(enable = "avx512f")]
49057    fn test_mm512_mask3_fnmadd_round_ps() {
49058        let a = _mm512_set1_ps(0.00000007);
49059        let b = _mm512_set1_ps(1.);
49060        let c = _mm512_set1_ps(1.);
49061        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49062            a, b, c, 0,
49063        );
49064        assert_eq_m512(r, c);
49065        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49066            a,
49067            b,
49068            c,
49069            0b00000000_11111111,
49070        );
49071        let e = _mm512_setr_ps(
49072            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49073            0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
49074        );
49075        assert_eq_m512(r, e);
49076    }
49077
49078    #[simd_test(enable = "avx512f")]
49079    fn test_mm512_fnmsub_round_ps() {
49080        let a = _mm512_set1_ps(0.00000007);
49081        let b = _mm512_set1_ps(1.);
49082        let c = _mm512_set1_ps(-1.);
49083        let r =
49084            _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
49085        let e = _mm512_set1_ps(0.99999994);
49086        assert_eq_m512(r, e);
49087        let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
49088        let e = _mm512_set1_ps(0.9999999);
49089        assert_eq_m512(r, e);
49090    }
49091
49092    #[simd_test(enable = "avx512f")]
49093    fn test_mm512_mask_fnmsub_round_ps() {
49094        let a = _mm512_set1_ps(0.00000007);
49095        let b = _mm512_set1_ps(1.);
49096        let c = _mm512_set1_ps(-1.);
49097        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49098            a, 0, b, c,
49099        );
49100        assert_eq_m512(r, a);
49101        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49102            a,
49103            0b00000000_11111111,
49104            b,
49105            c,
49106        );
49107        let e = _mm512_setr_ps(
49108            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49109            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
49110            0.00000007, 0.00000007,
49111        );
49112        assert_eq_m512(r, e);
49113    }
49114
49115    #[simd_test(enable = "avx512f")]
49116    fn test_mm512_maskz_fnmsub_round_ps() {
49117        let a = _mm512_set1_ps(0.00000007);
49118        let b = _mm512_set1_ps(1.);
49119        let c = _mm512_set1_ps(-1.);
49120        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49121            0, a, b, c,
49122        );
49123        assert_eq_m512(r, _mm512_setzero_ps());
49124        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49125            0b00000000_11111111,
49126            a,
49127            b,
49128            c,
49129        );
49130        let e = _mm512_setr_ps(
49131            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49132            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
49133        );
49134        assert_eq_m512(r, e);
49135    }
49136
49137    #[simd_test(enable = "avx512f")]
49138    fn test_mm512_mask3_fnmsub_round_ps() {
49139        let a = _mm512_set1_ps(0.00000007);
49140        let b = _mm512_set1_ps(1.);
49141        let c = _mm512_set1_ps(-1.);
49142        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49143            a, b, c, 0,
49144        );
49145        assert_eq_m512(r, c);
49146        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49147            a,
49148            b,
49149            c,
49150            0b00000000_11111111,
49151        );
49152        let e = _mm512_setr_ps(
49153            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49154            0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
49155        );
49156        assert_eq_m512(r, e);
49157    }
49158
49159    #[simd_test(enable = "avx512f")]
49160    fn test_mm512_max_round_ps() {
49161        let a = _mm512_setr_ps(
49162            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49163        );
49164        let b = _mm512_setr_ps(
49165            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49166        );
49167        let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
49168        let e = _mm512_setr_ps(
49169            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
49170        );
49171        assert_eq_m512(r, e);
49172    }
49173
49174    #[simd_test(enable = "avx512f")]
49175    fn test_mm512_mask_max_round_ps() {
49176        let a = _mm512_setr_ps(
49177            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49178        );
49179        let b = _mm512_setr_ps(
49180            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49181        );
49182        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
49183        assert_eq_m512(r, a);
49184        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
49185        let e = _mm512_setr_ps(
49186            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
49187        );
49188        assert_eq_m512(r, e);
49189    }
49190
49191    #[simd_test(enable = "avx512f")]
49192    fn test_mm512_maskz_max_round_ps() {
49193        let a = _mm512_setr_ps(
49194            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49195        );
49196        let b = _mm512_setr_ps(
49197            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49198        );
49199        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
49200        assert_eq_m512(r, _mm512_setzero_ps());
49201        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
49202        let e = _mm512_setr_ps(
49203            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
49204        );
49205        assert_eq_m512(r, e);
49206    }
49207
49208    #[simd_test(enable = "avx512f")]
49209    fn test_mm512_min_round_ps() {
49210        let a = _mm512_setr_ps(
49211            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49212        );
49213        let b = _mm512_setr_ps(
49214            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49215        );
49216        let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
49217        let e = _mm512_setr_ps(
49218            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
49219        );
49220        assert_eq_m512(r, e);
49221    }
49222
49223    #[simd_test(enable = "avx512f")]
49224    fn test_mm512_mask_min_round_ps() {
49225        let a = _mm512_setr_ps(
49226            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49227        );
49228        let b = _mm512_setr_ps(
49229            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49230        );
49231        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
49232        assert_eq_m512(r, a);
49233        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
49234        let e = _mm512_setr_ps(
49235            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49236        );
49237        assert_eq_m512(r, e);
49238    }
49239
49240    #[simd_test(enable = "avx512f")]
49241    fn test_mm512_maskz_min_round_ps() {
49242        let a = _mm512_setr_ps(
49243            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49244        );
49245        let b = _mm512_setr_ps(
49246            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49247        );
49248        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
49249        assert_eq_m512(r, _mm512_setzero_ps());
49250        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
49251        let e = _mm512_setr_ps(
49252            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
49253        );
49254        assert_eq_m512(r, e);
49255    }
49256
49257    #[simd_test(enable = "avx512f")]
49258    fn test_mm512_getexp_round_ps() {
49259        let a = _mm512_set1_ps(3.);
49260        let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
49261        let e = _mm512_set1_ps(1.);
49262        assert_eq_m512(r, e);
49263    }
49264
49265    #[simd_test(enable = "avx512f")]
49266    fn test_mm512_mask_getexp_round_ps() {
49267        let a = _mm512_set1_ps(3.);
49268        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
49269        assert_eq_m512(r, a);
49270        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
49271        let e = _mm512_setr_ps(
49272            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
49273        );
49274        assert_eq_m512(r, e);
49275    }
49276
49277    #[simd_test(enable = "avx512f")]
49278    fn test_mm512_maskz_getexp_round_ps() {
49279        let a = _mm512_set1_ps(3.);
49280        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
49281        assert_eq_m512(r, _mm512_setzero_ps());
49282        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
49283        let e = _mm512_setr_ps(
49284            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
49285        );
49286        assert_eq_m512(r, e);
49287    }
49288
49289    #[simd_test(enable = "avx512f")]
49290    fn test_mm512_roundscale_round_ps() {
49291        let a = _mm512_set1_ps(1.1);
49292        let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
49293        let e = _mm512_set1_ps(1.0);
49294        assert_eq_m512(r, e);
49295    }
49296
49297    #[simd_test(enable = "avx512f")]
49298    fn test_mm512_mask_roundscale_round_ps() {
49299        let a = _mm512_set1_ps(1.1);
49300        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
49301        let e = _mm512_set1_ps(1.1);
49302        assert_eq_m512(r, e);
49303        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
49304            a,
49305            0b11111111_11111111,
49306            a,
49307        );
49308        let e = _mm512_set1_ps(1.0);
49309        assert_eq_m512(r, e);
49310    }
49311
49312    #[simd_test(enable = "avx512f")]
49313    fn test_mm512_maskz_roundscale_round_ps() {
49314        let a = _mm512_set1_ps(1.1);
49315        let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
49316        assert_eq_m512(r, _mm512_setzero_ps());
49317        let r =
49318            _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
49319        let e = _mm512_set1_ps(1.0);
49320        assert_eq_m512(r, e);
49321    }
49322
49323    #[simd_test(enable = "avx512f")]
49324    fn test_mm512_scalef_round_ps() {
49325        let a = _mm512_set1_ps(1.);
49326        let b = _mm512_set1_ps(3.);
49327        let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
49328        let e = _mm512_set1_ps(8.);
49329        assert_eq_m512(r, e);
49330    }
49331
49332    #[simd_test(enable = "avx512f")]
49333    fn test_mm512_mask_scalef_round_ps() {
49334        let a = _mm512_set1_ps(1.);
49335        let b = _mm512_set1_ps(3.);
49336        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49337            a, 0, a, b,
49338        );
49339        assert_eq_m512(r, a);
49340        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49341            a,
49342            0b11111111_00000000,
49343            a,
49344            b,
49345        );
49346        let e = _mm512_set_ps(
49347            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
49348        );
49349        assert_eq_m512(r, e);
49350    }
49351
49352    #[simd_test(enable = "avx512f")]
49353    fn test_mm512_maskz_scalef_round_ps() {
49354        let a = _mm512_set1_ps(1.);
49355        let b = _mm512_set1_ps(3.);
49356        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49357            0, a, b,
49358        );
49359        assert_eq_m512(r, _mm512_setzero_ps());
49360        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49361            0b11111111_00000000,
49362            a,
49363            b,
49364        );
49365        let e = _mm512_set_ps(
49366            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
49367        );
49368        assert_eq_m512(r, e);
49369    }
49370
49371    #[simd_test(enable = "avx512f")]
49372    fn test_mm512_fixupimm_round_ps() {
49373        let a = _mm512_set1_ps(f32::NAN);
49374        let b = _mm512_set1_ps(f32::MAX);
49375        let c = _mm512_set1_epi32(i32::MAX);
49376        let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
49377        let e = _mm512_set1_ps(0.0);
49378        assert_eq_m512(r, e);
49379    }
49380
49381    #[simd_test(enable = "avx512f")]
49382    fn test_mm512_mask_fixupimm_round_ps() {
49383        #[rustfmt::skip]
49384        let a = _mm512_set_ps(
49385            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49386            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49387            1., 1., 1., 1.,
49388            1., 1., 1., 1.,
49389        );
49390        let b = _mm512_set1_ps(f32::MAX);
49391        let c = _mm512_set1_epi32(i32::MAX);
49392        let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
49393            a,
49394            0b11111111_00000000,
49395            b,
49396            c,
49397        );
49398        let e = _mm512_set_ps(
49399            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
49400        );
49401        assert_eq_m512(r, e);
49402    }
49403
49404    #[simd_test(enable = "avx512f")]
49405    fn test_mm512_maskz_fixupimm_round_ps() {
49406        #[rustfmt::skip]
49407        let a = _mm512_set_ps(
49408            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49409            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49410            1., 1., 1., 1.,
49411            1., 1., 1., 1.,
49412        );
49413        let b = _mm512_set1_ps(f32::MAX);
49414        let c = _mm512_set1_epi32(i32::MAX);
49415        let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
49416            0b11111111_00000000,
49417            a,
49418            b,
49419            c,
49420        );
49421        let e = _mm512_set_ps(
49422            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
49423        );
49424        assert_eq_m512(r, e);
49425    }
49426
49427    #[simd_test(enable = "avx512f")]
49428    fn test_mm512_getmant_round_ps() {
49429        let a = _mm512_set1_ps(10.);
49430        let r = _mm512_getmant_round_ps::<
49431            _MM_MANT_NORM_1_2,
49432            _MM_MANT_SIGN_SRC,
49433            _MM_FROUND_CUR_DIRECTION,
49434        >(a);
49435        let e = _mm512_set1_ps(1.25);
49436        assert_eq_m512(r, e);
49437    }
49438
49439    #[simd_test(enable = "avx512f")]
49440    fn test_mm512_mask_getmant_round_ps() {
49441        let a = _mm512_set1_ps(10.);
49442        let r = _mm512_mask_getmant_round_ps::<
49443            _MM_MANT_NORM_1_2,
49444            _MM_MANT_SIGN_SRC,
49445            _MM_FROUND_CUR_DIRECTION,
49446        >(a, 0, a);
49447        assert_eq_m512(r, a);
49448        let r = _mm512_mask_getmant_round_ps::<
49449            _MM_MANT_NORM_1_2,
49450            _MM_MANT_SIGN_SRC,
49451            _MM_FROUND_CUR_DIRECTION,
49452        >(a, 0b11111111_00000000, a);
49453        let e = _mm512_setr_ps(
49454            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
49455        );
49456        assert_eq_m512(r, e);
49457    }
49458
49459    #[simd_test(enable = "avx512f")]
49460    fn test_mm512_maskz_getmant_round_ps() {
49461        let a = _mm512_set1_ps(10.);
49462        let r = _mm512_maskz_getmant_round_ps::<
49463            _MM_MANT_NORM_1_2,
49464            _MM_MANT_SIGN_SRC,
49465            _MM_FROUND_CUR_DIRECTION,
49466        >(0, a);
49467        assert_eq_m512(r, _mm512_setzero_ps());
49468        let r = _mm512_maskz_getmant_round_ps::<
49469            _MM_MANT_NORM_1_2,
49470            _MM_MANT_SIGN_SRC,
49471            _MM_FROUND_CUR_DIRECTION,
49472        >(0b11111111_00000000, a);
49473        let e = _mm512_setr_ps(
49474            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
49475        );
49476        assert_eq_m512(r, e);
49477    }
49478
49479    #[simd_test(enable = "avx512f")]
49480    fn test_mm512_cvtps_epi32() {
49481        let a = _mm512_setr_ps(
49482            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49483        );
49484        let r = _mm512_cvtps_epi32(a);
49485        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49486        assert_eq_m512i(r, e);
49487    }
49488
49489    #[simd_test(enable = "avx512f")]
49490    fn test_mm512_mask_cvtps_epi32() {
49491        let a = _mm512_setr_ps(
49492            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49493        );
49494        let src = _mm512_set1_epi32(0);
49495        let r = _mm512_mask_cvtps_epi32(src, 0, a);
49496        assert_eq_m512i(r, src);
49497        let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
49498        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49499        assert_eq_m512i(r, e);
49500    }
49501
49502    #[simd_test(enable = "avx512f")]
49503    fn test_mm512_maskz_cvtps_epi32() {
49504        let a = _mm512_setr_ps(
49505            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49506        );
49507        let r = _mm512_maskz_cvtps_epi32(0, a);
49508        assert_eq_m512i(r, _mm512_setzero_si512());
49509        let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
49510        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49511        assert_eq_m512i(r, e);
49512    }
49513
49514    #[simd_test(enable = "avx512f,avx512vl")]
49515    fn test_mm256_mask_cvtps_epi32() {
49516        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49517        let src = _mm256_set1_epi32(0);
49518        let r = _mm256_mask_cvtps_epi32(src, 0, a);
49519        assert_eq_m256i(r, src);
49520        let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
49521        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49522        assert_eq_m256i(r, e);
49523    }
49524
49525    #[simd_test(enable = "avx512f,avx512vl")]
49526    fn test_mm256_maskz_cvtps_epi32() {
49527        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49528        let r = _mm256_maskz_cvtps_epi32(0, a);
49529        assert_eq_m256i(r, _mm256_setzero_si256());
49530        let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
49531        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49532        assert_eq_m256i(r, e);
49533    }
49534
49535    #[simd_test(enable = "avx512f,avx512vl")]
49536    fn test_mm_mask_cvtps_epi32() {
49537        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49538        let src = _mm_set1_epi32(0);
49539        let r = _mm_mask_cvtps_epi32(src, 0, a);
49540        assert_eq_m128i(r, src);
49541        let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
49542        let e = _mm_set_epi32(12, 14, 14, 16);
49543        assert_eq_m128i(r, e);
49544    }
49545
49546    #[simd_test(enable = "avx512f,avx512vl")]
49547    fn test_mm_maskz_cvtps_epi32() {
49548        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49549        let r = _mm_maskz_cvtps_epi32(0, a);
49550        assert_eq_m128i(r, _mm_setzero_si128());
49551        let r = _mm_maskz_cvtps_epi32(0b00001111, a);
49552        let e = _mm_set_epi32(12, 14, 14, 16);
49553        assert_eq_m128i(r, e);
49554    }
49555
49556    #[simd_test(enable = "avx512f")]
49557    fn test_mm512_cvtps_epu32() {
49558        let a = _mm512_setr_ps(
49559            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49560        );
49561        let r = _mm512_cvtps_epu32(a);
49562        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
49563        assert_eq_m512i(r, e);
49564    }
49565
49566    #[simd_test(enable = "avx512f")]
49567    fn test_mm512_mask_cvtps_epu32() {
49568        let a = _mm512_setr_ps(
49569            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49570        );
49571        let src = _mm512_set1_epi32(0);
49572        let r = _mm512_mask_cvtps_epu32(src, 0, a);
49573        assert_eq_m512i(r, src);
49574        let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
49575        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49576        assert_eq_m512i(r, e);
49577    }
49578
49579    #[simd_test(enable = "avx512f")]
49580    fn test_mm512_maskz_cvtps_epu32() {
49581        let a = _mm512_setr_ps(
49582            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49583        );
49584        let r = _mm512_maskz_cvtps_epu32(0, a);
49585        assert_eq_m512i(r, _mm512_setzero_si512());
49586        let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
49587        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49588        assert_eq_m512i(r, e);
49589    }
49590
49591    #[simd_test(enable = "avx512f,avx512vl")]
49592    fn test_mm256_cvtps_epu32() {
49593        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49594        let r = _mm256_cvtps_epu32(a);
49595        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49596        assert_eq_m256i(r, e);
49597    }
49598
49599    #[simd_test(enable = "avx512f,avx512vl")]
49600    fn test_mm256_mask_cvtps_epu32() {
49601        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49602        let src = _mm256_set1_epi32(0);
49603        let r = _mm256_mask_cvtps_epu32(src, 0, a);
49604        assert_eq_m256i(r, src);
49605        let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
49606        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49607        assert_eq_m256i(r, e);
49608    }
49609
49610    #[simd_test(enable = "avx512f,avx512vl")]
49611    fn test_mm256_maskz_cvtps_epu32() {
49612        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49613        let r = _mm256_maskz_cvtps_epu32(0, a);
49614        assert_eq_m256i(r, _mm256_setzero_si256());
49615        let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
49616        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49617        assert_eq_m256i(r, e);
49618    }
49619
49620    #[simd_test(enable = "avx512f,avx512vl")]
49621    fn test_mm_cvtps_epu32() {
49622        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49623        let r = _mm_cvtps_epu32(a);
49624        let e = _mm_set_epi32(12, 14, 14, 16);
49625        assert_eq_m128i(r, e);
49626    }
49627
49628    #[simd_test(enable = "avx512f,avx512vl")]
49629    fn test_mm_mask_cvtps_epu32() {
49630        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49631        let src = _mm_set1_epi32(0);
49632        let r = _mm_mask_cvtps_epu32(src, 0, a);
49633        assert_eq_m128i(r, src);
49634        let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
49635        let e = _mm_set_epi32(12, 14, 14, 16);
49636        assert_eq_m128i(r, e);
49637    }
49638
49639    #[simd_test(enable = "avx512f,avx512vl")]
49640    fn test_mm_maskz_cvtps_epu32() {
49641        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49642        let r = _mm_maskz_cvtps_epu32(0, a);
49643        assert_eq_m128i(r, _mm_setzero_si128());
49644        let r = _mm_maskz_cvtps_epu32(0b00001111, a);
49645        let e = _mm_set_epi32(12, 14, 14, 16);
49646        assert_eq_m128i(r, e);
49647    }
49648
49649    #[simd_test(enable = "avx512f")]
49650    const fn test_mm512_cvtepi8_epi32() {
49651        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49652        let r = _mm512_cvtepi8_epi32(a);
49653        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49654        assert_eq_m512i(r, e);
49655    }
49656
49657    #[simd_test(enable = "avx512f")]
49658    const fn test_mm512_mask_cvtepi8_epi32() {
49659        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49660        let src = _mm512_set1_epi32(-1);
49661        let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
49662        assert_eq_m512i(r, src);
49663        let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
49664        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49665        assert_eq_m512i(r, e);
49666    }
49667
49668    #[simd_test(enable = "avx512f")]
49669    const fn test_mm512_maskz_cvtepi8_epi32() {
49670        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49671        let r = _mm512_maskz_cvtepi8_epi32(0, a);
49672        assert_eq_m512i(r, _mm512_setzero_si512());
49673        let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
49674        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49675        assert_eq_m512i(r, e);
49676    }
49677
49678    #[simd_test(enable = "avx512f,avx512vl")]
49679    const fn test_mm256_mask_cvtepi8_epi32() {
49680        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49681        let src = _mm256_set1_epi32(-1);
49682        let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
49683        assert_eq_m256i(r, src);
49684        let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
49685        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49686        assert_eq_m256i(r, e);
49687    }
49688
49689    #[simd_test(enable = "avx512f,avx512vl")]
49690    const fn test_mm256_maskz_cvtepi8_epi32() {
49691        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49692        let r = _mm256_maskz_cvtepi8_epi32(0, a);
49693        assert_eq_m256i(r, _mm256_setzero_si256());
49694        let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
49695        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49696        assert_eq_m256i(r, e);
49697    }
49698
49699    #[simd_test(enable = "avx512f,avx512vl")]
49700    const fn test_mm_mask_cvtepi8_epi32() {
49701        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49702        let src = _mm_set1_epi32(-1);
49703        let r = _mm_mask_cvtepi8_epi32(src, 0, a);
49704        assert_eq_m128i(r, src);
49705        let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
49706        let e = _mm_set_epi32(12, 13, 14, 15);
49707        assert_eq_m128i(r, e);
49708    }
49709
49710    #[simd_test(enable = "avx512f,avx512vl")]
49711    const fn test_mm_maskz_cvtepi8_epi32() {
49712        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49713        let r = _mm_maskz_cvtepi8_epi32(0, a);
49714        assert_eq_m128i(r, _mm_setzero_si128());
49715        let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
49716        let e = _mm_set_epi32(12, 13, 14, 15);
49717        assert_eq_m128i(r, e);
49718    }
49719
49720    #[simd_test(enable = "avx512f")]
49721    const fn test_mm512_cvtepu8_epi32() {
49722        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49723        let r = _mm512_cvtepu8_epi32(a);
49724        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49725        assert_eq_m512i(r, e);
49726    }
49727
49728    #[simd_test(enable = "avx512f")]
49729    const fn test_mm512_mask_cvtepu8_epi32() {
49730        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49731        let src = _mm512_set1_epi32(-1);
49732        let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
49733        assert_eq_m512i(r, src);
49734        let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
49735        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49736        assert_eq_m512i(r, e);
49737    }
49738
49739    #[simd_test(enable = "avx512f")]
49740    const fn test_mm512_maskz_cvtepu8_epi32() {
49741        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49742        let r = _mm512_maskz_cvtepu8_epi32(0, a);
49743        assert_eq_m512i(r, _mm512_setzero_si512());
49744        let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
49745        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49746        assert_eq_m512i(r, e);
49747    }
49748
49749    #[simd_test(enable = "avx512f,avx512vl")]
49750    const fn test_mm256_mask_cvtepu8_epi32() {
49751        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49752        let src = _mm256_set1_epi32(-1);
49753        let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
49754        assert_eq_m256i(r, src);
49755        let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
49756        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49757        assert_eq_m256i(r, e);
49758    }
49759
49760    #[simd_test(enable = "avx512f,avx512vl")]
49761    const fn test_mm256_maskz_cvtepu8_epi32() {
49762        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49763        let r = _mm256_maskz_cvtepu8_epi32(0, a);
49764        assert_eq_m256i(r, _mm256_setzero_si256());
49765        let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
49766        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49767        assert_eq_m256i(r, e);
49768    }
49769
49770    #[simd_test(enable = "avx512f,avx512vl")]
49771    const fn test_mm_mask_cvtepu8_epi32() {
49772        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49773        let src = _mm_set1_epi32(-1);
49774        let r = _mm_mask_cvtepu8_epi32(src, 0, a);
49775        assert_eq_m128i(r, src);
49776        let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
49777        let e = _mm_set_epi32(12, 13, 14, 15);
49778        assert_eq_m128i(r, e);
49779    }
49780
49781    #[simd_test(enable = "avx512f,avx512vl")]
49782    const fn test_mm_maskz_cvtepu8_epi32() {
49783        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49784        let r = _mm_maskz_cvtepu8_epi32(0, a);
49785        assert_eq_m128i(r, _mm_setzero_si128());
49786        let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
49787        let e = _mm_set_epi32(12, 13, 14, 15);
49788        assert_eq_m128i(r, e);
49789    }
49790
49791    #[simd_test(enable = "avx512f")]
49792    const fn test_mm512_cvtepi16_epi32() {
49793        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49794        let r = _mm512_cvtepi16_epi32(a);
49795        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49796        assert_eq_m512i(r, e);
49797    }
49798
49799    #[simd_test(enable = "avx512f")]
49800    const fn test_mm512_mask_cvtepi16_epi32() {
49801        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49802        let src = _mm512_set1_epi32(-1);
49803        let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
49804        assert_eq_m512i(r, src);
49805        let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
49806        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49807        assert_eq_m512i(r, e);
49808    }
49809
49810    #[simd_test(enable = "avx512f")]
49811    const fn test_mm512_maskz_cvtepi16_epi32() {
49812        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49813        let r = _mm512_maskz_cvtepi16_epi32(0, a);
49814        assert_eq_m512i(r, _mm512_setzero_si512());
49815        let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
49816        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49817        assert_eq_m512i(r, e);
49818    }
49819
49820    #[simd_test(enable = "avx512f,avx512vl")]
49821    const fn test_mm256_mask_cvtepi16_epi32() {
49822        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49823        let src = _mm256_set1_epi32(-1);
49824        let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
49825        assert_eq_m256i(r, src);
49826        let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
49827        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
49828        assert_eq_m256i(r, e);
49829    }
49830
49831    #[simd_test(enable = "avx512f,avx512vl")]
49832    const fn test_mm256_maskz_cvtepi16_epi32() {
49833        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49834        let r = _mm256_maskz_cvtepi16_epi32(0, a);
49835        assert_eq_m256i(r, _mm256_setzero_si256());
49836        let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
49837        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
49838        assert_eq_m256i(r, e);
49839    }
49840
49841    #[simd_test(enable = "avx512f,avx512vl")]
49842    const fn test_mm_mask_cvtepi16_epi32() {
49843        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49844        let src = _mm_set1_epi32(-1);
49845        let r = _mm_mask_cvtepi16_epi32(src, 0, a);
49846        assert_eq_m128i(r, src);
49847        let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
49848        let e = _mm_set_epi32(4, 5, 6, 7);
49849        assert_eq_m128i(r, e);
49850    }
49851
49852    #[simd_test(enable = "avx512f,avx512vl")]
49853    const fn test_mm_maskz_cvtepi16_epi32() {
49854        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49855        let r = _mm_maskz_cvtepi16_epi32(0, a);
49856        assert_eq_m128i(r, _mm_setzero_si128());
49857        let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
49858        let e = _mm_set_epi32(4, 5, 6, 7);
49859        assert_eq_m128i(r, e);
49860    }
49861
49862    #[simd_test(enable = "avx512f")]
49863    const fn test_mm512_cvtepu16_epi32() {
49864        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49865        let r = _mm512_cvtepu16_epi32(a);
49866        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49867        assert_eq_m512i(r, e);
49868    }
49869
49870    #[simd_test(enable = "avx512f")]
49871    const fn test_mm512_mask_cvtepu16_epi32() {
49872        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49873        let src = _mm512_set1_epi32(-1);
49874        let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
49875        assert_eq_m512i(r, src);
49876        let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
49877        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49878        assert_eq_m512i(r, e);
49879    }
49880
49881    #[simd_test(enable = "avx512f")]
49882    const fn test_mm512_maskz_cvtepu16_epi32() {
49883        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49884        let r = _mm512_maskz_cvtepu16_epi32(0, a);
49885        assert_eq_m512i(r, _mm512_setzero_si512());
49886        let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
49887        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49888        assert_eq_m512i(r, e);
49889    }
49890
49891    #[simd_test(enable = "avx512f,avx512vl")]
49892    const fn test_mm256_mask_cvtepu16_epi32() {
49893        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49894        let src = _mm256_set1_epi32(-1);
49895        let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
49896        assert_eq_m256i(r, src);
49897        let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
49898        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49899        assert_eq_m256i(r, e);
49900    }
49901
49902    #[simd_test(enable = "avx512f,avx512vl")]
49903    const fn test_mm256_maskz_cvtepu16_epi32() {
49904        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49905        let r = _mm256_maskz_cvtepu16_epi32(0, a);
49906        assert_eq_m256i(r, _mm256_setzero_si256());
49907        let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
49908        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49909        assert_eq_m256i(r, e);
49910    }
49911
49912    #[simd_test(enable = "avx512f,avx512vl")]
49913    const fn test_mm_mask_cvtepu16_epi32() {
49914        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49915        let src = _mm_set1_epi32(-1);
49916        let r = _mm_mask_cvtepu16_epi32(src, 0, a);
49917        assert_eq_m128i(r, src);
49918        let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
49919        let e = _mm_set_epi32(12, 13, 14, 15);
49920        assert_eq_m128i(r, e);
49921    }
49922
49923    #[simd_test(enable = "avx512f,avx512vl")]
49924    const fn test_mm_maskz_cvtepu16_epi32() {
49925        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49926        let r = _mm_maskz_cvtepu16_epi32(0, a);
49927        assert_eq_m128i(r, _mm_setzero_si128());
49928        let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
49929        let e = _mm_set_epi32(12, 13, 14, 15);
49930        assert_eq_m128i(r, e);
49931    }
49932
49933    #[simd_test(enable = "avx512f")]
49934    const fn test_mm512_cvtepi32_ps() {
49935        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49936        let r = _mm512_cvtepi32_ps(a);
49937        let e = _mm512_set_ps(
49938            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49939        );
49940        assert_eq_m512(r, e);
49941    }
49942
49943    #[simd_test(enable = "avx512f")]
49944    const fn test_mm512_mask_cvtepi32_ps() {
49945        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49946        let src = _mm512_set1_ps(-1.);
49947        let r = _mm512_mask_cvtepi32_ps(src, 0, a);
49948        assert_eq_m512(r, src);
49949        let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
49950        let e = _mm512_set_ps(
49951            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
49952        );
49953        assert_eq_m512(r, e);
49954    }
49955
49956    #[simd_test(enable = "avx512f")]
49957    const fn test_mm512_maskz_cvtepi32_ps() {
49958        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49959        let r = _mm512_maskz_cvtepi32_ps(0, a);
49960        assert_eq_m512(r, _mm512_setzero_ps());
49961        let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
49962        let e = _mm512_set_ps(
49963            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
49964        );
49965        assert_eq_m512(r, e);
49966    }
49967
49968    #[simd_test(enable = "avx512f,avx512vl")]
49969    const fn test_mm256_mask_cvtepi32_ps() {
49970        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49971        let src = _mm256_set1_ps(-1.);
49972        let r = _mm256_mask_cvtepi32_ps(src, 0, a);
49973        assert_eq_m256(r, src);
49974        let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
49975        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
49976        assert_eq_m256(r, e);
49977    }
49978
49979    #[simd_test(enable = "avx512f,avx512vl")]
49980    const fn test_mm256_maskz_cvtepi32_ps() {
49981        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49982        let r = _mm256_maskz_cvtepi32_ps(0, a);
49983        assert_eq_m256(r, _mm256_setzero_ps());
49984        let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
49985        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
49986        assert_eq_m256(r, e);
49987    }
49988
49989    #[simd_test(enable = "avx512f,avx512vl")]
49990    const fn test_mm_mask_cvtepi32_ps() {
49991        let a = _mm_set_epi32(1, 2, 3, 4);
49992        let src = _mm_set1_ps(-1.);
49993        let r = _mm_mask_cvtepi32_ps(src, 0, a);
49994        assert_eq_m128(r, src);
49995        let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
49996        let e = _mm_set_ps(1., 2., 3., 4.);
49997        assert_eq_m128(r, e);
49998    }
49999
50000    #[simd_test(enable = "avx512f,avx512vl")]
50001    const fn test_mm_maskz_cvtepi32_ps() {
50002        let a = _mm_set_epi32(1, 2, 3, 4);
50003        let r = _mm_maskz_cvtepi32_ps(0, a);
50004        assert_eq_m128(r, _mm_setzero_ps());
50005        let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
50006        let e = _mm_set_ps(1., 2., 3., 4.);
50007        assert_eq_m128(r, e);
50008    }
50009
50010    #[simd_test(enable = "avx512f")]
50011    const fn test_mm512_cvtepu32_ps() {
50012        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50013        let r = _mm512_cvtepu32_ps(a);
50014        let e = _mm512_set_ps(
50015            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50016        );
50017        assert_eq_m512(r, e);
50018    }
50019
50020    #[simd_test(enable = "avx512f")]
50021    const fn test_mm512_mask_cvtepu32_ps() {
50022        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50023        let src = _mm512_set1_ps(-1.);
50024        let r = _mm512_mask_cvtepu32_ps(src, 0, a);
50025        assert_eq_m512(r, src);
50026        let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
50027        let e = _mm512_set_ps(
50028            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
50029        );
50030        assert_eq_m512(r, e);
50031    }
50032
50033    #[simd_test(enable = "avx512f")]
50034    const fn test_mm512_maskz_cvtepu32_ps() {
50035        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50036        let r = _mm512_maskz_cvtepu32_ps(0, a);
50037        assert_eq_m512(r, _mm512_setzero_ps());
50038        let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
50039        let e = _mm512_set_ps(
50040            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
50041        );
50042        assert_eq_m512(r, e);
50043    }
50044
50045    #[simd_test(enable = "avx512f")]
50046    const fn test_mm512_cvtepi32_epi16() {
50047        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50048        let r = _mm512_cvtepi32_epi16(a);
50049        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50050        assert_eq_m256i(r, e);
50051    }
50052
50053    #[simd_test(enable = "avx512f")]
50054    const fn test_mm512_mask_cvtepi32_epi16() {
50055        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50056        let src = _mm256_set1_epi16(-1);
50057        let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
50058        assert_eq_m256i(r, src);
50059        let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
50060        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50061        assert_eq_m256i(r, e);
50062    }
50063
50064    #[simd_test(enable = "avx512f")]
50065    const fn test_mm512_maskz_cvtepi32_epi16() {
50066        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50067        let r = _mm512_maskz_cvtepi32_epi16(0, a);
50068        assert_eq_m256i(r, _mm256_setzero_si256());
50069        let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
50070        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
50071        assert_eq_m256i(r, e);
50072    }
50073
50074    #[simd_test(enable = "avx512f,avx512vl")]
50075    const fn test_mm256_cvtepi32_epi16() {
50076        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50077        let r = _mm256_cvtepi32_epi16(a);
50078        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50079        assert_eq_m128i(r, e);
50080    }
50081
50082    #[simd_test(enable = "avx512f,avx512vl")]
50083    const fn test_mm256_mask_cvtepi32_epi16() {
50084        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50085        let src = _mm_set1_epi16(-1);
50086        let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
50087        assert_eq_m128i(r, src);
50088        let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
50089        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50090        assert_eq_m128i(r, e);
50091    }
50092
50093    #[simd_test(enable = "avx512f,avx512vl")]
50094    const fn test_mm256_maskz_cvtepi32_epi16() {
50095        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50096        let r = _mm256_maskz_cvtepi32_epi16(0, a);
50097        assert_eq_m128i(r, _mm_setzero_si128());
50098        let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
50099        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50100        assert_eq_m128i(r, e);
50101    }
50102
50103    #[simd_test(enable = "avx512f,avx512vl")]
50104    fn test_mm_cvtepi32_epi16() {
50105        let a = _mm_set_epi32(4, 5, 6, 7);
50106        let r = _mm_cvtepi32_epi16(a);
50107        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50108        assert_eq_m128i(r, e);
50109    }
50110
50111    #[simd_test(enable = "avx512f,avx512vl")]
50112    fn test_mm_mask_cvtepi32_epi16() {
50113        let a = _mm_set_epi32(4, 5, 6, 7);
50114        let src = _mm_set1_epi16(0);
50115        let r = _mm_mask_cvtepi32_epi16(src, 0, a);
50116        assert_eq_m128i(r, src);
50117        let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
50118        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50119        assert_eq_m128i(r, e);
50120    }
50121
50122    #[simd_test(enable = "avx512f,avx512vl")]
50123    fn test_mm_maskz_cvtepi32_epi16() {
50124        let a = _mm_set_epi32(4, 5, 6, 7);
50125        let r = _mm_maskz_cvtepi32_epi16(0, a);
50126        assert_eq_m128i(r, _mm_setzero_si128());
50127        let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
50128        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50129        assert_eq_m128i(r, e);
50130    }
50131
50132    #[simd_test(enable = "avx512f")]
50133    const fn test_mm512_cvtepi32_epi8() {
50134        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50135        let r = _mm512_cvtepi32_epi8(a);
50136        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50137        assert_eq_m128i(r, e);
50138    }
50139
50140    #[simd_test(enable = "avx512f")]
50141    const fn test_mm512_mask_cvtepi32_epi8() {
50142        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50143        let src = _mm_set1_epi8(-1);
50144        let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
50145        assert_eq_m128i(r, src);
50146        let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
50147        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50148        assert_eq_m128i(r, e);
50149    }
50150
50151    #[simd_test(enable = "avx512f")]
50152    const fn test_mm512_maskz_cvtepi32_epi8() {
50153        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50154        let r = _mm512_maskz_cvtepi32_epi8(0, a);
50155        assert_eq_m128i(r, _mm_setzero_si128());
50156        let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
50157        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
50158        assert_eq_m128i(r, e);
50159    }
50160
50161    #[simd_test(enable = "avx512f,avx512vl")]
50162    fn test_mm256_cvtepi32_epi8() {
50163        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50164        let r = _mm256_cvtepi32_epi8(a);
50165        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50166        assert_eq_m128i(r, e);
50167    }
50168
50169    #[simd_test(enable = "avx512f,avx512vl")]
50170    fn test_mm256_mask_cvtepi32_epi8() {
50171        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50172        let src = _mm_set1_epi8(0);
50173        let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
50174        assert_eq_m128i(r, src);
50175        let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
50176        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50177        assert_eq_m128i(r, e);
50178    }
50179
50180    #[simd_test(enable = "avx512f,avx512vl")]
50181    fn test_mm256_maskz_cvtepi32_epi8() {
50182        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50183        let r = _mm256_maskz_cvtepi32_epi8(0, a);
50184        assert_eq_m128i(r, _mm_setzero_si128());
50185        let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
50186        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50187        assert_eq_m128i(r, e);
50188    }
50189
50190    #[simd_test(enable = "avx512f,avx512vl")]
50191    fn test_mm_cvtepi32_epi8() {
50192        let a = _mm_set_epi32(4, 5, 6, 7);
50193        let r = _mm_cvtepi32_epi8(a);
50194        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50195        assert_eq_m128i(r, e);
50196    }
50197
50198    #[simd_test(enable = "avx512f,avx512vl")]
50199    fn test_mm_mask_cvtepi32_epi8() {
50200        let a = _mm_set_epi32(4, 5, 6, 7);
50201        let src = _mm_set1_epi8(0);
50202        let r = _mm_mask_cvtepi32_epi8(src, 0, a);
50203        assert_eq_m128i(r, src);
50204        let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
50205        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50206        assert_eq_m128i(r, e);
50207    }
50208
50209    #[simd_test(enable = "avx512f,avx512vl")]
50210    fn test_mm_maskz_cvtepi32_epi8() {
50211        let a = _mm_set_epi32(4, 5, 6, 7);
50212        let r = _mm_maskz_cvtepi32_epi8(0, a);
50213        assert_eq_m128i(r, _mm_setzero_si128());
50214        let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
50215        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50216        assert_eq_m128i(r, e);
50217    }
50218
50219    #[simd_test(enable = "avx512f")]
50220    fn test_mm512_cvtsepi32_epi16() {
50221        #[rustfmt::skip]
50222        let a = _mm512_set_epi32(
50223            0, 1, 2, 3,
50224            4, 5, 6, 7,
50225            8, 9, 10, 11,
50226            12, 13, i32::MIN, i32::MAX,
50227        );
50228        let r = _mm512_cvtsepi32_epi16(a);
50229        #[rustfmt::skip]
50230        let e = _mm256_set_epi16(
50231            0, 1, 2, 3,
50232            4, 5, 6, 7,
50233            8, 9, 10, 11,
50234            12, 13, i16::MIN, i16::MAX,
50235        );
50236        assert_eq_m256i(r, e);
50237    }
50238
50239    #[simd_test(enable = "avx512f")]
50240    fn test_mm512_mask_cvtsepi32_epi16() {
50241        #[rustfmt::skip]
50242        let a = _mm512_set_epi32(
50243            0, 1, 2, 3,
50244            4, 5, 6, 7,
50245            8, 9, 10, 11,
50246            12, 13, i32::MIN, i32::MAX,
50247        );
50248        let src = _mm256_set1_epi16(-1);
50249        let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
50250        assert_eq_m256i(r, src);
50251        let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
50252        #[rustfmt::skip]
50253        let e = _mm256_set_epi16(
50254            -1, -1, -1, -1,
50255            -1, -1, -1, -1,
50256            8, 9, 10, 11,
50257            12, 13, i16::MIN, i16::MAX,
50258        );
50259        assert_eq_m256i(r, e);
50260    }
50261
50262    #[simd_test(enable = "avx512f")]
50263    fn test_mm512_maskz_cvtsepi32_epi16() {
50264        #[rustfmt::skip]
50265        let a = _mm512_set_epi32(
50266            0, 1, 2, 3,
50267            4, 5, 6, 7,
50268            8, 9, 10, 11,
50269            12, 13, i32::MIN, i32::MAX,
50270        );
50271        let r = _mm512_maskz_cvtsepi32_epi16(0, a);
50272        assert_eq_m256i(r, _mm256_setzero_si256());
50273        let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
50274        #[rustfmt::skip]
50275        let e = _mm256_set_epi16(
50276            0, 0, 0, 0,
50277            0, 0, 0, 0,
50278            8, 9, 10, 11,
50279            12, 13, i16::MIN, i16::MAX,
50280        );
50281        assert_eq_m256i(r, e);
50282    }
50283
50284    #[simd_test(enable = "avx512f,avx512vl")]
50285    fn test_mm256_cvtsepi32_epi16() {
50286        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50287        let r = _mm256_cvtsepi32_epi16(a);
50288        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50289        assert_eq_m128i(r, e);
50290    }
50291
50292    #[simd_test(enable = "avx512f,avx512vl")]
50293    fn test_mm256_mask_cvtsepi32_epi16() {
50294        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50295        let src = _mm_set1_epi16(-1);
50296        let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
50297        assert_eq_m128i(r, src);
50298        let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
50299        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50300        assert_eq_m128i(r, e);
50301    }
50302
50303    #[simd_test(enable = "avx512f,avx512vl")]
50304    fn test_mm256_maskz_cvtsepi32_epi16() {
50305        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50306        let r = _mm256_maskz_cvtsepi32_epi16(0, a);
50307        assert_eq_m128i(r, _mm_setzero_si128());
50308        let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
50309        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50310        assert_eq_m128i(r, e);
50311    }
50312
50313    #[simd_test(enable = "avx512f,avx512vl")]
50314    fn test_mm_cvtsepi32_epi16() {
50315        let a = _mm_set_epi32(4, 5, 6, 7);
50316        let r = _mm_cvtsepi32_epi16(a);
50317        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50318        assert_eq_m128i(r, e);
50319    }
50320
50321    #[simd_test(enable = "avx512f,avx512vl")]
50322    fn test_mm_mask_cvtsepi32_epi16() {
50323        let a = _mm_set_epi32(4, 5, 6, 7);
50324        let src = _mm_set1_epi16(0);
50325        let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
50326        assert_eq_m128i(r, src);
50327        let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
50328        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50329        assert_eq_m128i(r, e);
50330    }
50331
50332    #[simd_test(enable = "avx512f,avx512vl")]
50333    fn test_mm_maskz_cvtsepi32_epi16() {
50334        let a = _mm_set_epi32(4, 5, 6, 7);
50335        let r = _mm_maskz_cvtsepi32_epi16(0, a);
50336        assert_eq_m128i(r, _mm_setzero_si128());
50337        let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
50338        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50339        assert_eq_m128i(r, e);
50340    }
50341
50342    #[simd_test(enable = "avx512f")]
50343    fn test_mm512_cvtsepi32_epi8() {
50344        #[rustfmt::skip]
50345        let a = _mm512_set_epi32(
50346            0, 1, 2, 3,
50347            4, 5, 6, 7,
50348            8, 9, 10, 11,
50349            12, 13, i32::MIN, i32::MAX,
50350        );
50351        let r = _mm512_cvtsepi32_epi8(a);
50352        #[rustfmt::skip]
50353        let e = _mm_set_epi8(
50354            0, 1, 2, 3,
50355            4, 5, 6, 7,
50356            8, 9, 10, 11,
50357            12, 13, i8::MIN, i8::MAX,
50358        );
50359        assert_eq_m128i(r, e);
50360    }
50361
50362    #[simd_test(enable = "avx512f")]
50363    fn test_mm512_mask_cvtsepi32_epi8() {
50364        #[rustfmt::skip]
50365        let a = _mm512_set_epi32(
50366            0, 1, 2, 3,
50367            4, 5, 6, 7,
50368            8, 9, 10, 11,
50369            12, 13, i32::MIN, i32::MAX,
50370        );
50371        let src = _mm_set1_epi8(-1);
50372        let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
50373        assert_eq_m128i(r, src);
50374        let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
50375        #[rustfmt::skip]
50376        let e = _mm_set_epi8(
50377            -1, -1, -1, -1,
50378            -1, -1, -1, -1,
50379            8, 9, 10, 11,
50380            12, 13, i8::MIN, i8::MAX,
50381        );
50382        assert_eq_m128i(r, e);
50383    }
50384
50385    #[simd_test(enable = "avx512f")]
50386    fn test_mm512_maskz_cvtsepi32_epi8() {
50387        #[rustfmt::skip]
50388        let a = _mm512_set_epi32(
50389            0, 1, 2, 3,
50390            4, 5, 6, 7,
50391            8, 9, 10, 11,
50392            12, 13, i32::MIN, i32::MAX,
50393        );
50394        let r = _mm512_maskz_cvtsepi32_epi8(0, a);
50395        assert_eq_m128i(r, _mm_setzero_si128());
50396        let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
50397        #[rustfmt::skip]
50398        let e = _mm_set_epi8(
50399            0, 0, 0, 0,
50400            0, 0, 0, 0,
50401            8, 9, 10, 11,
50402            12, 13, i8::MIN, i8::MAX,
50403        );
50404        assert_eq_m128i(r, e);
50405    }
50406
50407    #[simd_test(enable = "avx512f,avx512vl")]
50408    fn test_mm256_cvtsepi32_epi8() {
50409        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50410        let r = _mm256_cvtsepi32_epi8(a);
50411        #[rustfmt::skip]
50412        let e = _mm_set_epi8(
50413            0, 0, 0, 0,
50414            0, 0, 0, 0,
50415            9, 10, 11, 12,
50416            13, 14, 15, 16,
50417        );
50418        assert_eq_m128i(r, e);
50419    }
50420
50421    #[simd_test(enable = "avx512f,avx512vl")]
50422    fn test_mm256_mask_cvtsepi32_epi8() {
50423        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50424        let src = _mm_set1_epi8(0);
50425        let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
50426        assert_eq_m128i(r, src);
50427        let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
50428        #[rustfmt::skip]
50429        let e = _mm_set_epi8(
50430            0, 0, 0, 0,
50431            0, 0, 0, 0,
50432            9, 10, 11, 12,
50433            13, 14, 15, 16,
50434        );
50435        assert_eq_m128i(r, e);
50436    }
50437
50438    #[simd_test(enable = "avx512f,avx512vl")]
50439    fn test_mm256_maskz_cvtsepi32_epi8() {
50440        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50441        let r = _mm256_maskz_cvtsepi32_epi8(0, a);
50442        assert_eq_m128i(r, _mm_setzero_si128());
50443        let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
50444        #[rustfmt::skip]
50445        let e = _mm_set_epi8(
50446            0, 0, 0, 0,
50447            0, 0, 0, 0,
50448            9, 10, 11, 12,
50449            13, 14, 15, 16,
50450        );
50451        assert_eq_m128i(r, e);
50452    }
50453
50454    #[simd_test(enable = "avx512f,avx512vl")]
50455    fn test_mm_cvtsepi32_epi8() {
50456        let a = _mm_set_epi32(13, 14, 15, 16);
50457        let r = _mm_cvtsepi32_epi8(a);
50458        #[rustfmt::skip]
50459        let e = _mm_set_epi8(
50460            0, 0, 0, 0,
50461            0, 0, 0, 0,
50462            0, 0, 0, 0,
50463            13, 14, 15, 16,
50464        );
50465        assert_eq_m128i(r, e);
50466    }
50467
50468    #[simd_test(enable = "avx512f,avx512vl")]
50469    fn test_mm_mask_cvtsepi32_epi8() {
50470        let a = _mm_set_epi32(13, 14, 15, 16);
50471        let src = _mm_set1_epi8(0);
50472        let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
50473        assert_eq_m128i(r, src);
50474        let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
50475        #[rustfmt::skip]
50476        let e = _mm_set_epi8(
50477            0, 0, 0, 0,
50478            0, 0, 0, 0,
50479            0, 0, 0, 0,
50480            13, 14, 15, 16,
50481        );
50482        assert_eq_m128i(r, e);
50483    }
50484
50485    #[simd_test(enable = "avx512f,avx512vl")]
50486    fn test_mm_maskz_cvtsepi32_epi8() {
50487        let a = _mm_set_epi32(13, 14, 15, 16);
50488        let r = _mm_maskz_cvtsepi32_epi8(0, a);
50489        assert_eq_m128i(r, _mm_setzero_si128());
50490        let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
50491        #[rustfmt::skip]
50492        let e = _mm_set_epi8(
50493            0, 0, 0, 0,
50494            0, 0, 0, 0,
50495            0, 0, 0, 0,
50496            13, 14, 15, 16,
50497        );
50498        assert_eq_m128i(r, e);
50499    }
50500
50501    #[simd_test(enable = "avx512f")]
50502    fn test_mm512_cvtusepi32_epi16() {
50503        #[rustfmt::skip]
50504        let a = _mm512_set_epi32(
50505            0, 1, 2, 3,
50506            4, 5, 6, 7,
50507            8, 9, 10, 11,
50508            12, 13, i32::MIN, i32::MIN,
50509        );
50510        let r = _mm512_cvtusepi32_epi16(a);
50511        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
50512        assert_eq_m256i(r, e);
50513    }
50514
50515    #[simd_test(enable = "avx512f")]
50516    fn test_mm512_mask_cvtusepi32_epi16() {
50517        #[rustfmt::skip]
50518        let a = _mm512_set_epi32(
50519            0, 1, 2, 3,
50520            4, 5, 6, 7,
50521            8, 9, 10, 11,
50522            12, 13, i32::MIN, i32::MIN,
50523        );
50524        let src = _mm256_set1_epi16(-1);
50525        let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
50526        assert_eq_m256i(r, src);
50527        let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
50528        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
50529        assert_eq_m256i(r, e);
50530    }
50531
50532    #[simd_test(enable = "avx512f")]
50533    fn test_mm512_maskz_cvtusepi32_epi16() {
50534        #[rustfmt::skip]
50535        let a = _mm512_set_epi32(
50536            0, 1, 2, 3,
50537            4, 5, 6, 7,
50538            8, 9, 10, 11,
50539            12, 13, i32::MIN, i32::MIN,
50540        );
50541        let r = _mm512_maskz_cvtusepi32_epi16(0, a);
50542        assert_eq_m256i(r, _mm256_setzero_si256());
50543        let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
50544        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
50545        assert_eq_m256i(r, e);
50546    }
50547
50548    #[simd_test(enable = "avx512f,avx512vl")]
50549    fn test_mm256_cvtusepi32_epi16() {
50550        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50551        let r = _mm256_cvtusepi32_epi16(a);
50552        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50553        assert_eq_m128i(r, e);
50554    }
50555
50556    #[simd_test(enable = "avx512f,avx512vl")]
50557    fn test_mm256_mask_cvtusepi32_epi16() {
50558        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50559        let src = _mm_set1_epi16(0);
50560        let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
50561        assert_eq_m128i(r, src);
50562        let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
50563        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50564        assert_eq_m128i(r, e);
50565    }
50566
50567    #[simd_test(enable = "avx512f,avx512vl")]
50568    fn test_mm256_maskz_cvtusepi32_epi16() {
50569        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50570        let r = _mm256_maskz_cvtusepi32_epi16(0, a);
50571        assert_eq_m128i(r, _mm_setzero_si128());
50572        let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
50573        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50574        assert_eq_m128i(r, e);
50575    }
50576
50577    #[simd_test(enable = "avx512f,avx512vl")]
50578    fn test_mm_cvtusepi32_epi16() {
50579        let a = _mm_set_epi32(5, 6, 7, 8);
50580        let r = _mm_cvtusepi32_epi16(a);
50581        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50582        assert_eq_m128i(r, e);
50583    }
50584
50585    #[simd_test(enable = "avx512f,avx512vl")]
50586    fn test_mm_mask_cvtusepi32_epi16() {
50587        let a = _mm_set_epi32(5, 6, 7, 8);
50588        let src = _mm_set1_epi16(0);
50589        let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
50590        assert_eq_m128i(r, src);
50591        let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
50592        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50593        assert_eq_m128i(r, e);
50594    }
50595
50596    #[simd_test(enable = "avx512f,avx512vl")]
50597    fn test_mm_maskz_cvtusepi32_epi16() {
50598        let a = _mm_set_epi32(5, 6, 7, 8);
50599        let r = _mm_maskz_cvtusepi32_epi16(0, a);
50600        assert_eq_m128i(r, _mm_setzero_si128());
50601        let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
50602        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50603        assert_eq_m128i(r, e);
50604    }
50605
50606    #[simd_test(enable = "avx512f")]
50607    fn test_mm512_cvtusepi32_epi8() {
50608        #[rustfmt::skip]
50609        let a = _mm512_set_epi32(
50610            0, 1, 2, 3,
50611            4, 5, 6, 7,
50612            8, 9, 10, 11,
50613            12, 13, i32::MIN, i32::MIN,
50614        );
50615        let r = _mm512_cvtusepi32_epi8(a);
50616        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
50617        assert_eq_m128i(r, e);
50618    }
50619
50620    #[simd_test(enable = "avx512f")]
50621    fn test_mm512_mask_cvtusepi32_epi8() {
50622        #[rustfmt::skip]
50623        let a = _mm512_set_epi32(
50624            0, 1, 2, 3,
50625            4, 5, 6, 7,
50626            8, 9, 10, 11,
50627            12, 13, i32::MIN, i32::MIN,
50628        );
50629        let src = _mm_set1_epi8(-1);
50630        let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
50631        assert_eq_m128i(r, src);
50632        let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
50633        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
50634        assert_eq_m128i(r, e);
50635    }
50636
50637    #[simd_test(enable = "avx512f")]
50638    fn test_mm512_maskz_cvtusepi32_epi8() {
50639        #[rustfmt::skip]
50640        let a = _mm512_set_epi32(
50641            0, 1, 2, 3,
50642            4, 5, 6, 7,
50643            8, 9, 10, 11,
50644            12, 13, i32::MIN, i32::MIN,
50645        );
50646        let r = _mm512_maskz_cvtusepi32_epi8(0, a);
50647        assert_eq_m128i(r, _mm_setzero_si128());
50648        let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
50649        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
50650        assert_eq_m128i(r, e);
50651    }
50652
50653    #[simd_test(enable = "avx512f,avx512vl")]
50654    fn test_mm256_cvtusepi32_epi8() {
50655        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50656        let r = _mm256_cvtusepi32_epi8(a);
50657        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50658        assert_eq_m128i(r, e);
50659    }
50660
50661    #[simd_test(enable = "avx512f,avx512vl")]
50662    fn test_mm256_mask_cvtusepi32_epi8() {
50663        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50664        let src = _mm_set1_epi8(0);
50665        let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
50666        assert_eq_m128i(r, src);
50667        let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
50668        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50669        assert_eq_m128i(r, e);
50670    }
50671
50672    #[simd_test(enable = "avx512f,avx512vl")]
50673    fn test_mm256_maskz_cvtusepi32_epi8() {
50674        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50675        let r = _mm256_maskz_cvtusepi32_epi8(0, a);
50676        assert_eq_m128i(r, _mm_setzero_si128());
50677        let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
50678        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50679        assert_eq_m128i(r, e);
50680    }
50681
50682    #[simd_test(enable = "avx512f,avx512vl")]
50683    fn test_mm_cvtusepi32_epi8() {
50684        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50685        let r = _mm_cvtusepi32_epi8(a);
50686        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50687        assert_eq_m128i(r, e);
50688    }
50689
50690    #[simd_test(enable = "avx512f,avx512vl")]
50691    fn test_mm_mask_cvtusepi32_epi8() {
50692        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50693        let src = _mm_set1_epi8(0);
50694        let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
50695        assert_eq_m128i(r, src);
50696        let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
50697        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50698        assert_eq_m128i(r, e);
50699    }
50700
50701    #[simd_test(enable = "avx512f,avx512vl")]
50702    fn test_mm_maskz_cvtusepi32_epi8() {
50703        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50704        let r = _mm_maskz_cvtusepi32_epi8(0, a);
50705        assert_eq_m128i(r, _mm_setzero_si128());
50706        let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
50707        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50708        assert_eq_m128i(r, e);
50709    }
50710
50711    #[simd_test(enable = "avx512f")]
50712    fn test_mm512_cvt_roundps_epi32() {
50713        let a = _mm512_setr_ps(
50714            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50715        );
50716        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50717        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50718        assert_eq_m512i(r, e);
50719        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
50720        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
50721        assert_eq_m512i(r, e);
50722    }
50723
50724    #[simd_test(enable = "avx512f")]
50725    fn test_mm512_mask_cvt_roundps_epi32() {
50726        let a = _mm512_setr_ps(
50727            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50728        );
50729        let src = _mm512_set1_epi32(0);
50730        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50731            src, 0, a,
50732        );
50733        assert_eq_m512i(r, src);
50734        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50735            src,
50736            0b00000000_11111111,
50737            a,
50738        );
50739        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
50740        assert_eq_m512i(r, e);
50741    }
50742
50743    #[simd_test(enable = "avx512f")]
50744    fn test_mm512_maskz_cvt_roundps_epi32() {
50745        let a = _mm512_setr_ps(
50746            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50747        );
50748        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50749            0, a,
50750        );
50751        assert_eq_m512i(r, _mm512_setzero_si512());
50752        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50753            0b00000000_11111111,
50754            a,
50755        );
50756        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
50757        assert_eq_m512i(r, e);
50758    }
50759
50760    #[simd_test(enable = "avx512f")]
50761    fn test_mm512_cvt_roundps_epu32() {
50762        let a = _mm512_setr_ps(
50763            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50764        );
50765        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50766        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
50767        assert_eq_m512i(r, e);
50768        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
50769        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50770        assert_eq_m512i(r, e);
50771    }
50772
50773    #[simd_test(enable = "avx512f")]
50774    fn test_mm512_mask_cvt_roundps_epu32() {
50775        let a = _mm512_setr_ps(
50776            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50777        );
50778        let src = _mm512_set1_epi32(0);
50779        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50780            src, 0, a,
50781        );
50782        assert_eq_m512i(r, src);
50783        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50784            src,
50785            0b00000000_11111111,
50786            a,
50787        );
50788        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
50789        assert_eq_m512i(r, e);
50790    }
50791
50792    #[simd_test(enable = "avx512f")]
50793    fn test_mm512_maskz_cvt_roundps_epu32() {
50794        let a = _mm512_setr_ps(
50795            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50796        );
50797        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50798            0, a,
50799        );
50800        assert_eq_m512i(r, _mm512_setzero_si512());
50801        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50802            0b00000000_11111111,
50803            a,
50804        );
50805        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
50806        assert_eq_m512i(r, e);
50807    }
50808
50809    #[simd_test(enable = "avx512f")]
50810    fn test_mm512_cvt_roundepi32_ps() {
50811        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50812        let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50813        let e = _mm512_setr_ps(
50814            0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
50815        );
50816        assert_eq_m512(r, e);
50817    }
50818
50819    #[simd_test(enable = "avx512f")]
50820    fn test_mm512_mask_cvt_roundepi32_ps() {
50821        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50822        let src = _mm512_set1_ps(0.);
50823        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50824            src, 0, a,
50825        );
50826        assert_eq_m512(r, src);
50827        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50828            src,
50829            0b00000000_11111111,
50830            a,
50831        );
50832        let e = _mm512_setr_ps(
50833            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
50834        );
50835        assert_eq_m512(r, e);
50836    }
50837
50838    #[simd_test(enable = "avx512f")]
50839    fn test_mm512_maskz_cvt_roundepi32_ps() {
50840        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50841        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50842            0, a,
50843        );
50844        assert_eq_m512(r, _mm512_setzero_ps());
50845        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50846            0b00000000_11111111,
50847            a,
50848        );
50849        let e = _mm512_setr_ps(
50850            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
50851        );
50852        assert_eq_m512(r, e);
50853    }
50854
50855    #[simd_test(enable = "avx512f")]
50856    fn test_mm512_cvt_roundepu32_ps() {
50857        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50858        let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50859        #[rustfmt::skip]
50860        let e = _mm512_setr_ps(
50861            0., 4294967300., 2., 4294967300.,
50862            4., 4294967300., 6., 4294967300.,
50863            8., 10., 10., 12.,
50864            12., 14., 14., 16.,
50865        );
50866        assert_eq_m512(r, e);
50867    }
50868
50869    #[simd_test(enable = "avx512f")]
50870    fn test_mm512_mask_cvt_roundepu32_ps() {
50871        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50872        let src = _mm512_set1_ps(0.);
50873        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50874            src, 0, a,
50875        );
50876        assert_eq_m512(r, src);
50877        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50878            src,
50879            0b00000000_11111111,
50880            a,
50881        );
50882        #[rustfmt::skip]
50883        let e = _mm512_setr_ps(
50884            0., 4294967300., 2., 4294967300.,
50885            4., 4294967300., 6., 4294967300.,
50886            0., 0., 0., 0.,
50887            0., 0., 0., 0.,
50888        );
50889        assert_eq_m512(r, e);
50890    }
50891
50892    #[simd_test(enable = "avx512f")]
50893    fn test_mm512_maskz_cvt_roundepu32_ps() {
50894        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50895        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50896            0, a,
50897        );
50898        assert_eq_m512(r, _mm512_setzero_ps());
50899        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50900            0b00000000_11111111,
50901            a,
50902        );
50903        #[rustfmt::skip]
50904        let e = _mm512_setr_ps(
50905            0., 4294967300., 2., 4294967300.,
50906            4., 4294967300., 6., 4294967300.,
50907            0., 0., 0., 0.,
50908            0., 0., 0., 0.,
50909        );
50910        assert_eq_m512(r, e);
50911    }
50912
50913    #[simd_test(enable = "avx512f")]
50914    fn test_mm512_cvt_roundps_ph() {
50915        let a = _mm512_set1_ps(1.);
50916        let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
50917        let e = _mm256_setr_epi64x(
50918            4323521613979991040,
50919            4323521613979991040,
50920            4323521613979991040,
50921            4323521613979991040,
50922        );
50923        assert_eq_m256i(r, e);
50924    }
50925
50926    #[simd_test(enable = "avx512f")]
50927    fn test_mm512_mask_cvt_roundps_ph() {
50928        let a = _mm512_set1_ps(1.);
50929        let src = _mm256_set1_epi16(0);
50930        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50931        assert_eq_m256i(r, src);
50932        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
50933        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
50934        assert_eq_m256i(r, e);
50935    }
50936
50937    #[simd_test(enable = "avx512f")]
50938    fn test_mm512_maskz_cvt_roundps_ph() {
50939        let a = _mm512_set1_ps(1.);
50940        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50941        assert_eq_m256i(r, _mm256_setzero_si256());
50942        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
50943        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
50944        assert_eq_m256i(r, e);
50945    }
50946
50947    #[simd_test(enable = "avx512f,avx512vl")]
50948    fn test_mm256_mask_cvt_roundps_ph() {
50949        let a = _mm256_set1_ps(1.);
50950        let src = _mm_set1_epi16(0);
50951        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50952        assert_eq_m128i(r, src);
50953        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
50954        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
50955        assert_eq_m128i(r, e);
50956    }
50957
50958    #[simd_test(enable = "avx512f,avx512vl")]
50959    fn test_mm256_maskz_cvt_roundps_ph() {
50960        let a = _mm256_set1_ps(1.);
50961        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50962        assert_eq_m128i(r, _mm_setzero_si128());
50963        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
50964        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
50965        assert_eq_m128i(r, e);
50966    }
50967
50968    #[simd_test(enable = "avx512f,avx512vl")]
50969    fn test_mm_mask_cvt_roundps_ph() {
50970        let a = _mm_set1_ps(1.);
50971        let src = _mm_set1_epi16(0);
50972        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50973        assert_eq_m128i(r, src);
50974        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
50975        let e = _mm_setr_epi64x(4323521613979991040, 0);
50976        assert_eq_m128i(r, e);
50977    }
50978
50979    #[simd_test(enable = "avx512f,avx512vl")]
50980    fn test_mm_maskz_cvt_roundps_ph() {
50981        let a = _mm_set1_ps(1.);
50982        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50983        assert_eq_m128i(r, _mm_setzero_si128());
50984        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
50985        let e = _mm_setr_epi64x(4323521613979991040, 0);
50986        assert_eq_m128i(r, e);
50987    }
50988
50989    #[simd_test(enable = "avx512f")]
50990    fn test_mm512_cvtps_ph() {
50991        let a = _mm512_set1_ps(1.);
50992        let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
50993        let e = _mm256_setr_epi64x(
50994            4323521613979991040,
50995            4323521613979991040,
50996            4323521613979991040,
50997            4323521613979991040,
50998        );
50999        assert_eq_m256i(r, e);
51000    }
51001
51002    #[simd_test(enable = "avx512f")]
51003    fn test_mm512_mask_cvtps_ph() {
51004        let a = _mm512_set1_ps(1.);
51005        let src = _mm256_set1_epi16(0);
51006        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51007        assert_eq_m256i(r, src);
51008        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51009        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
51010        assert_eq_m256i(r, e);
51011    }
51012
51013    #[simd_test(enable = "avx512f")]
51014    fn test_mm512_maskz_cvtps_ph() {
51015        let a = _mm512_set1_ps(1.);
51016        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51017        assert_eq_m256i(r, _mm256_setzero_si256());
51018        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51019        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
51020        assert_eq_m256i(r, e);
51021    }
51022
51023    #[simd_test(enable = "avx512f,avx512vl")]
51024    fn test_mm256_mask_cvtps_ph() {
51025        let a = _mm256_set1_ps(1.);
51026        let src = _mm_set1_epi16(0);
51027        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51028        assert_eq_m128i(r, src);
51029        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
51030        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51031        assert_eq_m128i(r, e);
51032    }
51033
51034    #[simd_test(enable = "avx512f,avx512vl")]
51035    fn test_mm256_maskz_cvtps_ph() {
51036        let a = _mm256_set1_ps(1.);
51037        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51038        assert_eq_m128i(r, _mm_setzero_si128());
51039        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
51040        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51041        assert_eq_m128i(r, e);
51042    }
51043
51044    #[simd_test(enable = "avx512f,avx512vl")]
51045    fn test_mm_mask_cvtps_ph() {
51046        let a = _mm_set1_ps(1.);
51047        let src = _mm_set1_epi16(0);
51048        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51049        assert_eq_m128i(r, src);
51050        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
51051        let e = _mm_setr_epi64x(4323521613979991040, 0);
51052        assert_eq_m128i(r, e);
51053    }
51054
51055    #[simd_test(enable = "avx512f,avx512vl")]
51056    fn test_mm_maskz_cvtps_ph() {
51057        let a = _mm_set1_ps(1.);
51058        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51059        assert_eq_m128i(r, _mm_setzero_si128());
51060        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
51061        let e = _mm_setr_epi64x(4323521613979991040, 0);
51062        assert_eq_m128i(r, e);
51063    }
51064
51065    #[simd_test(enable = "avx512f")]
51066    fn test_mm512_cvt_roundph_ps() {
51067        let a = _mm256_setr_epi64x(
51068            4323521613979991040,
51069            4323521613979991040,
51070            4323521613979991040,
51071            4323521613979991040,
51072        );
51073        let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
51074        let e = _mm512_set1_ps(1.);
51075        assert_eq_m512(r, e);
51076    }
51077
51078    #[simd_test(enable = "avx512f")]
51079    fn test_mm512_mask_cvt_roundph_ps() {
51080        let a = _mm256_setr_epi64x(
51081            4323521613979991040,
51082            4323521613979991040,
51083            4323521613979991040,
51084            4323521613979991040,
51085        );
51086        let src = _mm512_set1_ps(0.);
51087        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
51088        assert_eq_m512(r, src);
51089        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51090        let e = _mm512_setr_ps(
51091            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51092        );
51093        assert_eq_m512(r, e);
51094    }
51095
51096    #[simd_test(enable = "avx512f")]
51097    fn test_mm512_maskz_cvt_roundph_ps() {
51098        let a = _mm256_setr_epi64x(
51099            4323521613979991040,
51100            4323521613979991040,
51101            4323521613979991040,
51102            4323521613979991040,
51103        );
51104        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
51105        assert_eq_m512(r, _mm512_setzero_ps());
51106        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51107        let e = _mm512_setr_ps(
51108            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51109        );
51110        assert_eq_m512(r, e);
51111    }
51112
51113    #[simd_test(enable = "avx512f")]
51114    fn test_mm512_cvtph_ps() {
51115        let a = _mm256_setr_epi64x(
51116            4323521613979991040,
51117            4323521613979991040,
51118            4323521613979991040,
51119            4323521613979991040,
51120        );
51121        let r = _mm512_cvtph_ps(a);
51122        let e = _mm512_set1_ps(1.);
51123        assert_eq_m512(r, e);
51124    }
51125
51126    #[simd_test(enable = "avx512f")]
51127    fn test_mm512_mask_cvtph_ps() {
51128        let a = _mm256_setr_epi64x(
51129            4323521613979991040,
51130            4323521613979991040,
51131            4323521613979991040,
51132            4323521613979991040,
51133        );
51134        let src = _mm512_set1_ps(0.);
51135        let r = _mm512_mask_cvtph_ps(src, 0, a);
51136        assert_eq_m512(r, src);
51137        let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
51138        let e = _mm512_setr_ps(
51139            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51140        );
51141        assert_eq_m512(r, e);
51142    }
51143
51144    #[simd_test(enable = "avx512f")]
51145    fn test_mm512_maskz_cvtph_ps() {
51146        let a = _mm256_setr_epi64x(
51147            4323521613979991040,
51148            4323521613979991040,
51149            4323521613979991040,
51150            4323521613979991040,
51151        );
51152        let r = _mm512_maskz_cvtph_ps(0, a);
51153        assert_eq_m512(r, _mm512_setzero_ps());
51154        let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
51155        let e = _mm512_setr_ps(
51156            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51157        );
51158        assert_eq_m512(r, e);
51159    }
51160
51161    #[simd_test(enable = "avx512f,avx512vl")]
51162    fn test_mm256_mask_cvtph_ps() {
51163        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51164        let src = _mm256_set1_ps(0.);
51165        let r = _mm256_mask_cvtph_ps(src, 0, a);
51166        assert_eq_m256(r, src);
51167        let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
51168        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
51169        assert_eq_m256(r, e);
51170    }
51171
51172    #[simd_test(enable = "avx512f,avx512vl")]
51173    fn test_mm256_maskz_cvtph_ps() {
51174        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51175        let r = _mm256_maskz_cvtph_ps(0, a);
51176        assert_eq_m256(r, _mm256_setzero_ps());
51177        let r = _mm256_maskz_cvtph_ps(0b11111111, a);
51178        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
51179        assert_eq_m256(r, e);
51180    }
51181
51182    #[simd_test(enable = "avx512f,avx512vl")]
51183    fn test_mm_mask_cvtph_ps() {
51184        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51185        let src = _mm_set1_ps(0.);
51186        let r = _mm_mask_cvtph_ps(src, 0, a);
51187        assert_eq_m128(r, src);
51188        let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
51189        let e = _mm_setr_ps(1., 1., 1., 1.);
51190        assert_eq_m128(r, e);
51191    }
51192
51193    #[simd_test(enable = "avx512f,avx512vl")]
51194    fn test_mm_maskz_cvtph_ps() {
51195        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51196        let r = _mm_maskz_cvtph_ps(0, a);
51197        assert_eq_m128(r, _mm_setzero_ps());
51198        let r = _mm_maskz_cvtph_ps(0b00001111, a);
51199        let e = _mm_setr_ps(1., 1., 1., 1.);
51200        assert_eq_m128(r, e);
51201    }
51202
51203    #[simd_test(enable = "avx512f")]
51204    fn test_mm512_cvtt_roundps_epi32() {
51205        let a = _mm512_setr_ps(
51206            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51207        );
51208        let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
51209        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
51210        assert_eq_m512i(r, e);
51211    }
51212
51213    #[simd_test(enable = "avx512f")]
51214    fn test_mm512_mask_cvtt_roundps_epi32() {
51215        let a = _mm512_setr_ps(
51216            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51217        );
51218        let src = _mm512_set1_epi32(0);
51219        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
51220        assert_eq_m512i(r, src);
51221        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51222        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51223        assert_eq_m512i(r, e);
51224    }
51225
51226    #[simd_test(enable = "avx512f")]
51227    fn test_mm512_maskz_cvtt_roundps_epi32() {
51228        let a = _mm512_setr_ps(
51229            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51230        );
51231        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
51232        assert_eq_m512i(r, _mm512_setzero_si512());
51233        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51234        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51235        assert_eq_m512i(r, e);
51236    }
51237
51238    #[simd_test(enable = "avx512f")]
51239    fn test_mm512_cvtt_roundps_epu32() {
51240        let a = _mm512_setr_ps(
51241            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51242        );
51243        let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
51244        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
51245        assert_eq_m512i(r, e);
51246    }
51247
51248    #[simd_test(enable = "avx512f")]
51249    fn test_mm512_mask_cvtt_roundps_epu32() {
51250        let a = _mm512_setr_ps(
51251            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51252        );
51253        let src = _mm512_set1_epi32(0);
51254        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
51255        assert_eq_m512i(r, src);
51256        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51257        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51258        assert_eq_m512i(r, e);
51259    }
51260
51261    #[simd_test(enable = "avx512f")]
51262    fn test_mm512_maskz_cvtt_roundps_epu32() {
51263        let a = _mm512_setr_ps(
51264            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51265        );
51266        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
51267        assert_eq_m512i(r, _mm512_setzero_si512());
51268        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51269        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51270        assert_eq_m512i(r, e);
51271    }
51272
51273    #[simd_test(enable = "avx512f")]
51274    fn test_mm512_cvttps_epi32() {
51275        let a = _mm512_setr_ps(
51276            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51277        );
51278        let r = _mm512_cvttps_epi32(a);
51279        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
51280        assert_eq_m512i(r, e);
51281    }
51282
51283    #[simd_test(enable = "avx512f")]
51284    fn test_mm512_mask_cvttps_epi32() {
51285        let a = _mm512_setr_ps(
51286            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51287        );
51288        let src = _mm512_set1_epi32(0);
51289        let r = _mm512_mask_cvttps_epi32(src, 0, a);
51290        assert_eq_m512i(r, src);
51291        let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
51292        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51293        assert_eq_m512i(r, e);
51294    }
51295
51296    #[simd_test(enable = "avx512f")]
51297    fn test_mm512_maskz_cvttps_epi32() {
51298        let a = _mm512_setr_ps(
51299            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51300        );
51301        let r = _mm512_maskz_cvttps_epi32(0, a);
51302        assert_eq_m512i(r, _mm512_setzero_si512());
51303        let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
51304        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51305        assert_eq_m512i(r, e);
51306    }
51307
51308    #[simd_test(enable = "avx512f,avx512vl")]
51309    fn test_mm256_mask_cvttps_epi32() {
51310        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51311        let src = _mm256_set1_epi32(0);
51312        let r = _mm256_mask_cvttps_epi32(src, 0, a);
51313        assert_eq_m256i(r, src);
51314        let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
51315        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51316        assert_eq_m256i(r, e);
51317    }
51318
51319    #[simd_test(enable = "avx512f,avx512vl")]
51320    fn test_mm256_maskz_cvttps_epi32() {
51321        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51322        let r = _mm256_maskz_cvttps_epi32(0, a);
51323        assert_eq_m256i(r, _mm256_setzero_si256());
51324        let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
51325        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51326        assert_eq_m256i(r, e);
51327    }
51328
51329    #[simd_test(enable = "avx512f,avx512vl")]
51330    fn test_mm_mask_cvttps_epi32() {
51331        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51332        let src = _mm_set1_epi32(0);
51333        let r = _mm_mask_cvttps_epi32(src, 0, a);
51334        assert_eq_m128i(r, src);
51335        let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
51336        let e = _mm_set_epi32(12, 13, 14, 15);
51337        assert_eq_m128i(r, e);
51338    }
51339
51340    #[simd_test(enable = "avx512f,avx512vl")]
51341    fn test_mm_maskz_cvttps_epi32() {
51342        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51343        let r = _mm_maskz_cvttps_epi32(0, a);
51344        assert_eq_m128i(r, _mm_setzero_si128());
51345        let r = _mm_maskz_cvttps_epi32(0b00001111, a);
51346        let e = _mm_set_epi32(12, 13, 14, 15);
51347        assert_eq_m128i(r, e);
51348    }
51349
51350    #[simd_test(enable = "avx512f")]
51351    fn test_mm512_cvttps_epu32() {
51352        let a = _mm512_setr_ps(
51353            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51354        );
51355        let r = _mm512_cvttps_epu32(a);
51356        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
51357        assert_eq_m512i(r, e);
51358    }
51359
51360    #[simd_test(enable = "avx512f")]
51361    fn test_mm512_mask_cvttps_epu32() {
51362        let a = _mm512_setr_ps(
51363            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51364        );
51365        let src = _mm512_set1_epi32(0);
51366        let r = _mm512_mask_cvttps_epu32(src, 0, a);
51367        assert_eq_m512i(r, src);
51368        let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
51369        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51370        assert_eq_m512i(r, e);
51371    }
51372
51373    #[simd_test(enable = "avx512f")]
51374    fn test_mm512_maskz_cvttps_epu32() {
51375        let a = _mm512_setr_ps(
51376            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51377        );
51378        let r = _mm512_maskz_cvttps_epu32(0, a);
51379        assert_eq_m512i(r, _mm512_setzero_si512());
51380        let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
51381        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51382        assert_eq_m512i(r, e);
51383    }
51384
51385    #[simd_test(enable = "avx512f,avx512vl")]
51386    fn test_mm256_cvttps_epu32() {
51387        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51388        let r = _mm256_cvttps_epu32(a);
51389        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51390        assert_eq_m256i(r, e);
51391    }
51392
51393    #[simd_test(enable = "avx512f,avx512vl")]
51394    fn test_mm256_mask_cvttps_epu32() {
51395        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51396        let src = _mm256_set1_epi32(0);
51397        let r = _mm256_mask_cvttps_epu32(src, 0, a);
51398        assert_eq_m256i(r, src);
51399        let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
51400        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51401        assert_eq_m256i(r, e);
51402    }
51403
51404    #[simd_test(enable = "avx512f,avx512vl")]
51405    fn test_mm256_maskz_cvttps_epu32() {
51406        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51407        let r = _mm256_maskz_cvttps_epu32(0, a);
51408        assert_eq_m256i(r, _mm256_setzero_si256());
51409        let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
51410        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51411        assert_eq_m256i(r, e);
51412    }
51413
51414    #[simd_test(enable = "avx512f,avx512vl")]
51415    fn test_mm_cvttps_epu32() {
51416        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51417        let r = _mm_cvttps_epu32(a);
51418        let e = _mm_set_epi32(12, 13, 14, 15);
51419        assert_eq_m128i(r, e);
51420    }
51421
51422    #[simd_test(enable = "avx512f,avx512vl")]
51423    fn test_mm_mask_cvttps_epu32() {
51424        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51425        let src = _mm_set1_epi32(0);
51426        let r = _mm_mask_cvttps_epu32(src, 0, a);
51427        assert_eq_m128i(r, src);
51428        let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
51429        let e = _mm_set_epi32(12, 13, 14, 15);
51430        assert_eq_m128i(r, e);
51431    }
51432
51433    #[simd_test(enable = "avx512f,avx512vl")]
51434    fn test_mm_maskz_cvttps_epu32() {
51435        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51436        let r = _mm_maskz_cvttps_epu32(0, a);
51437        assert_eq_m128i(r, _mm_setzero_si128());
51438        let r = _mm_maskz_cvttps_epu32(0b00001111, a);
51439        let e = _mm_set_epi32(12, 13, 14, 15);
51440        assert_eq_m128i(r, e);
51441    }
51442
51443    #[simd_test(enable = "avx512f")]
51444    fn test_mm512_i32gather_ps() {
51445        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
51446        // A multiplier of 4 is word-addressing
51447        #[rustfmt::skip]
51448        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51449                                      120, 128, 136, 144, 152, 160, 168, 176);
51450        let r = unsafe { _mm512_i32gather_ps::<4>(index, arr.as_ptr()) };
51451        #[rustfmt::skip]
51452        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
51453                                         120., 128., 136., 144., 152., 160., 168., 176.));
51454    }
51455
51456    #[simd_test(enable = "avx512f")]
51457    fn test_mm512_mask_i32gather_ps() {
51458        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
51459        let src = _mm512_set1_ps(2.);
51460        let mask = 0b10101010_10101010;
51461        #[rustfmt::skip]
51462        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51463                                      120, 128, 136, 144, 152, 160, 168, 176);
51464        // A multiplier of 4 is word-addressing
51465        let r = unsafe { _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr()) };
51466        #[rustfmt::skip]
51467        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
51468                                         2., 128., 2., 144., 2., 160., 2., 176.));
51469    }
51470
51471    #[simd_test(enable = "avx512f")]
51472    fn test_mm512_i32gather_epi32() {
51473        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
51474        // A multiplier of 4 is word-addressing
51475        #[rustfmt::skip]
51476        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51477                                      120, 128, 136, 144, 152, 160, 168, 176);
51478        let r = unsafe { _mm512_i32gather_epi32::<4>(index, arr.as_ptr()) };
51479        #[rustfmt::skip]
51480        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51481                                             120, 128, 136, 144, 152, 160, 168, 176));
51482    }
51483
51484    #[simd_test(enable = "avx512f")]
51485    fn test_mm512_mask_i32gather_epi32() {
51486        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
51487        let src = _mm512_set1_epi32(2);
51488        let mask = 0b10101010_10101010;
51489        let index = _mm512_setr_epi32(
51490            0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
51491        );
51492        // A multiplier of 4 is word-addressing
51493        let r = unsafe { _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr()) };
51494        assert_eq_m512i(
51495            r,
51496            _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
51497        );
51498    }
51499
51500    #[simd_test(enable = "avx512f")]
51501    fn test_mm512_i32scatter_ps() {
51502        let mut arr = [0f32; 256];
51503        #[rustfmt::skip]
51504        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51505                                      128, 144, 160, 176, 192, 208, 224, 240);
51506        let src = _mm512_setr_ps(
51507            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
51508        );
51509        // A multiplier of 4 is word-addressing
51510        unsafe {
51511            _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
51512        }
51513        let mut expected = [0f32; 256];
51514        for i in 0..16 {
51515            expected[i * 16] = (i + 1) as f32;
51516        }
51517        assert_eq!(&arr[..], &expected[..],);
51518    }
51519
51520    #[simd_test(enable = "avx512f")]
51521    fn test_mm512_mask_i32scatter_ps() {
51522        let mut arr = [0f32; 256];
51523        let mask = 0b10101010_10101010;
51524        #[rustfmt::skip]
51525        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51526                                      128, 144, 160, 176, 192, 208, 224, 240);
51527        let src = _mm512_setr_ps(
51528            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
51529        );
51530        // A multiplier of 4 is word-addressing
51531        unsafe {
51532            _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
51533        }
51534        let mut expected = [0f32; 256];
51535        for i in 0..8 {
51536            expected[i * 32 + 16] = 2. * (i + 1) as f32;
51537        }
51538        assert_eq!(&arr[..], &expected[..],);
51539    }
51540
51541    #[simd_test(enable = "avx512f")]
51542    fn test_mm512_i32scatter_epi32() {
51543        let mut arr = [0i32; 256];
51544        #[rustfmt::skip]
51545
51546        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51547                                      128, 144, 160, 176, 192, 208, 224, 240);
51548        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51549        // A multiplier of 4 is word-addressing
51550        unsafe {
51551            _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
51552        }
51553        let mut expected = [0i32; 256];
51554        for i in 0..16 {
51555            expected[i * 16] = (i + 1) as i32;
51556        }
51557        assert_eq!(&arr[..], &expected[..],);
51558    }
51559
51560    #[simd_test(enable = "avx512f")]
51561    fn test_mm512_mask_i32scatter_epi32() {
51562        let mut arr = [0i32; 256];
51563        let mask = 0b10101010_10101010;
51564        #[rustfmt::skip]
51565        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51566                                      128, 144, 160, 176, 192, 208, 224, 240);
51567        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51568        // A multiplier of 4 is word-addressing
51569        unsafe {
51570            _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
51571        }
51572        let mut expected = [0i32; 256];
51573        for i in 0..8 {
51574            expected[i * 32 + 16] = 2 * (i + 1) as i32;
51575        }
51576        assert_eq!(&arr[..], &expected[..],);
51577    }
51578
51579    #[simd_test(enable = "avx512f")]
51580    fn test_mm512_cmplt_ps_mask() {
51581        #[rustfmt::skip]
51582        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51583                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51584        let b = _mm512_set1_ps(-1.);
51585        let m = _mm512_cmplt_ps_mask(a, b);
51586        assert_eq!(m, 0b00000101_00000101);
51587    }
51588
51589    #[simd_test(enable = "avx512f")]
51590    fn test_mm512_mask_cmplt_ps_mask() {
51591        #[rustfmt::skip]
51592        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51593                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51594        let b = _mm512_set1_ps(-1.);
51595        let mask = 0b01100110_01100110;
51596        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
51597        assert_eq!(r, 0b00000100_00000100);
51598    }
51599
51600    #[simd_test(enable = "avx512f")]
51601    fn test_mm512_cmpnlt_ps_mask() {
51602        #[rustfmt::skip]
51603        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51604                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51605        let b = _mm512_set1_ps(-1.);
51606        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
51607    }
51608
51609    #[simd_test(enable = "avx512f")]
51610    fn test_mm512_mask_cmpnlt_ps_mask() {
51611        #[rustfmt::skip]
51612        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51613                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51614        let b = _mm512_set1_ps(-1.);
51615        let mask = 0b01111010_01111010;
51616        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
51617    }
51618
51619    #[simd_test(enable = "avx512f")]
51620    fn test_mm512_cmpnle_ps_mask() {
51621        #[rustfmt::skip]
51622        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51623                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51624        let b = _mm512_set1_ps(-1.);
51625        let m = _mm512_cmpnle_ps_mask(b, a);
51626        assert_eq!(m, 0b00001101_00001101);
51627    }
51628
51629    #[simd_test(enable = "avx512f")]
51630    fn test_mm512_mask_cmpnle_ps_mask() {
51631        #[rustfmt::skip]
51632        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51633                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51634        let b = _mm512_set1_ps(-1.);
51635        let mask = 0b01100110_01100110;
51636        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
51637        assert_eq!(r, 0b00000100_00000100);
51638    }
51639
51640    #[simd_test(enable = "avx512f")]
51641    fn test_mm512_cmple_ps_mask() {
51642        #[rustfmt::skip]
51643        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51644                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51645        let b = _mm512_set1_ps(-1.);
51646        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
51647    }
51648
51649    #[simd_test(enable = "avx512f")]
51650    fn test_mm512_mask_cmple_ps_mask() {
51651        #[rustfmt::skip]
51652        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51653                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51654        let b = _mm512_set1_ps(-1.);
51655        let mask = 0b01111010_01111010;
51656        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
51657    }
51658
51659    #[simd_test(enable = "avx512f")]
51660    fn test_mm512_cmpeq_ps_mask() {
51661        #[rustfmt::skip]
51662        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51663                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51664        #[rustfmt::skip]
51665        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51666                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51667        let m = _mm512_cmpeq_ps_mask(b, a);
51668        assert_eq!(m, 0b11001101_11001101);
51669    }
51670
51671    #[simd_test(enable = "avx512f")]
51672    fn test_mm512_mask_cmpeq_ps_mask() {
51673        #[rustfmt::skip]
51674        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51675                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51676        #[rustfmt::skip]
51677        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51678                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51679        let mask = 0b01111010_01111010;
51680        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
51681        assert_eq!(r, 0b01001000_01001000);
51682    }
51683
51684    #[simd_test(enable = "avx512f")]
51685    fn test_mm512_cmpneq_ps_mask() {
51686        #[rustfmt::skip]
51687        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51688                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51689        #[rustfmt::skip]
51690        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51691                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51692        let m = _mm512_cmpneq_ps_mask(b, a);
51693        assert_eq!(m, 0b00110010_00110010);
51694    }
51695
51696    #[simd_test(enable = "avx512f")]
51697    fn test_mm512_mask_cmpneq_ps_mask() {
51698        #[rustfmt::skip]
51699        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51700                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51701        #[rustfmt::skip]
51702        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51703                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51704        let mask = 0b01111010_01111010;
51705        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
51706        assert_eq!(r, 0b00110010_00110010)
51707    }
51708
51709    #[simd_test(enable = "avx512f")]
51710    fn test_mm512_cmp_ps_mask() {
51711        #[rustfmt::skip]
51712        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51713                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51714        let b = _mm512_set1_ps(-1.);
51715        let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51716        assert_eq!(m, 0b00000101_00000101);
51717    }
51718
51719    #[simd_test(enable = "avx512f")]
51720    fn test_mm512_mask_cmp_ps_mask() {
51721        #[rustfmt::skip]
51722        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51723                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51724        let b = _mm512_set1_ps(-1.);
51725        let mask = 0b01100110_01100110;
51726        let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51727        assert_eq!(r, 0b00000100_00000100);
51728    }
51729
51730    #[simd_test(enable = "avx512f,avx512vl")]
51731    fn test_mm256_cmp_ps_mask() {
51732        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51733        let b = _mm256_set1_ps(-1.);
51734        let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51735        assert_eq!(m, 0b00000101);
51736    }
51737
51738    #[simd_test(enable = "avx512f,avx512vl")]
51739    fn test_mm256_mask_cmp_ps_mask() {
51740        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51741        let b = _mm256_set1_ps(-1.);
51742        let mask = 0b01100110;
51743        let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51744        assert_eq!(r, 0b00000100);
51745    }
51746
51747    #[simd_test(enable = "avx512f,avx512vl")]
51748    fn test_mm_cmp_ps_mask() {
51749        let a = _mm_set_ps(0., 1., -1., 13.);
51750        let b = _mm_set1_ps(1.);
51751        let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51752        assert_eq!(m, 0b00001010);
51753    }
51754
51755    #[simd_test(enable = "avx512f,avx512vl")]
51756    fn test_mm_mask_cmp_ps_mask() {
51757        let a = _mm_set_ps(0., 1., -1., 13.);
51758        let b = _mm_set1_ps(1.);
51759        let mask = 0b11111111;
51760        let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51761        assert_eq!(r, 0b00001010);
51762    }
51763
51764    #[simd_test(enable = "avx512f")]
51765    fn test_mm512_cmp_round_ps_mask() {
51766        #[rustfmt::skip]
51767        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51768                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51769        let b = _mm512_set1_ps(-1.);
51770        let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
51771        assert_eq!(m, 0b00000101_00000101);
51772    }
51773
51774    #[simd_test(enable = "avx512f")]
51775    fn test_mm512_mask_cmp_round_ps_mask() {
51776        #[rustfmt::skip]
51777        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51778                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51779        let b = _mm512_set1_ps(-1.);
51780        let mask = 0b01100110_01100110;
51781        let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
51782        assert_eq!(r, 0b00000100_00000100);
51783    }
51784
51785    #[simd_test(enable = "avx512f")]
51786    fn test_mm512_cmpord_ps_mask() {
51787        #[rustfmt::skip]
51788        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51789                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51790        #[rustfmt::skip]
51791        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51792                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51793        let m = _mm512_cmpord_ps_mask(a, b);
51794        assert_eq!(m, 0b00000101_00000101);
51795    }
51796
51797    #[simd_test(enable = "avx512f")]
51798    fn test_mm512_mask_cmpord_ps_mask() {
51799        #[rustfmt::skip]
51800        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51801                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51802        #[rustfmt::skip]
51803        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51804                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51805        let mask = 0b11000011_11000011;
51806        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
51807        assert_eq!(m, 0b00000001_00000001);
51808    }
51809
51810    #[simd_test(enable = "avx512f")]
51811    fn test_mm512_cmpunord_ps_mask() {
51812        #[rustfmt::skip]
51813        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51814                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51815        #[rustfmt::skip]
51816        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51817                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51818        let m = _mm512_cmpunord_ps_mask(a, b);
51819
51820        assert_eq!(m, 0b11111010_11111010);
51821    }
51822
51823    #[simd_test(enable = "avx512f")]
51824    fn test_mm512_mask_cmpunord_ps_mask() {
51825        #[rustfmt::skip]
51826        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51827                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51828        #[rustfmt::skip]
51829        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51830                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51831        let mask = 0b00001111_00001111;
51832        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
51833        assert_eq!(m, 0b000001010_00001010);
51834    }
51835
51836    #[simd_test(enable = "avx512f")]
51837    fn test_mm_cmp_ss_mask() {
51838        let a = _mm_setr_ps(2., 1., 1., 1.);
51839        let b = _mm_setr_ps(1., 2., 2., 2.);
51840        let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
51841        assert_eq!(m, 1);
51842    }
51843
51844    #[simd_test(enable = "avx512f")]
51845    fn test_mm_mask_cmp_ss_mask() {
51846        let a = _mm_setr_ps(2., 1., 1., 1.);
51847        let b = _mm_setr_ps(1., 2., 2., 2.);
51848        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
51849        assert_eq!(m, 0);
51850        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
51851        assert_eq!(m, 1);
51852    }
51853
51854    #[simd_test(enable = "avx512f")]
51855    fn test_mm_cmp_round_ss_mask() {
51856        let a = _mm_setr_ps(2., 1., 1., 1.);
51857        let b = _mm_setr_ps(1., 2., 2., 2.);
51858        let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
51859        assert_eq!(m, 1);
51860    }
51861
51862    #[simd_test(enable = "avx512f")]
51863    fn test_mm_mask_cmp_round_ss_mask() {
51864        let a = _mm_setr_ps(2., 1., 1., 1.);
51865        let b = _mm_setr_ps(1., 2., 2., 2.);
51866        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
51867        assert_eq!(m, 0);
51868        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
51869        assert_eq!(m, 1);
51870    }
51871
51872    #[simd_test(enable = "avx512f")]
51873    fn test_mm_cmp_sd_mask() {
51874        let a = _mm_setr_pd(2., 1.);
51875        let b = _mm_setr_pd(1., 2.);
51876        let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
51877        assert_eq!(m, 1);
51878    }
51879
51880    #[simd_test(enable = "avx512f")]
51881    fn test_mm_mask_cmp_sd_mask() {
51882        let a = _mm_setr_pd(2., 1.);
51883        let b = _mm_setr_pd(1., 2.);
51884        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
51885        assert_eq!(m, 0);
51886        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
51887        assert_eq!(m, 1);
51888    }
51889
51890    #[simd_test(enable = "avx512f")]
51891    fn test_mm_cmp_round_sd_mask() {
51892        let a = _mm_setr_pd(2., 1.);
51893        let b = _mm_setr_pd(1., 2.);
51894        let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
51895        assert_eq!(m, 1);
51896    }
51897
51898    #[simd_test(enable = "avx512f")]
51899    fn test_mm_mask_cmp_round_sd_mask() {
51900        let a = _mm_setr_pd(2., 1.);
51901        let b = _mm_setr_pd(1., 2.);
51902        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
51903        assert_eq!(m, 0);
51904        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
51905        assert_eq!(m, 1);
51906    }
51907
51908    #[simd_test(enable = "avx512f")]
51909    const fn test_mm512_cmplt_epu32_mask() {
51910        #[rustfmt::skip]
51911        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51912                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51913        let b = _mm512_set1_epi32(-1);
51914        let m = _mm512_cmplt_epu32_mask(a, b);
51915        assert_eq!(m, 0b11001111_11001111);
51916    }
51917
51918    #[simd_test(enable = "avx512f")]
51919    const fn test_mm512_mask_cmplt_epu32_mask() {
51920        #[rustfmt::skip]
51921        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51922                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51923        let b = _mm512_set1_epi32(-1);
51924        let mask = 0b01111010_01111010;
51925        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
51926        assert_eq!(r, 0b01001010_01001010);
51927    }
51928
51929    #[simd_test(enable = "avx512f,avx512vl")]
51930    const fn test_mm256_cmplt_epu32_mask() {
51931        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
51932        let b = _mm256_set1_epi32(1);
51933        let r = _mm256_cmplt_epu32_mask(a, b);
51934        assert_eq!(r, 0b10000000);
51935    }
51936
51937    #[simd_test(enable = "avx512f,avx512vl")]
51938    const fn test_mm256_mask_cmplt_epu32_mask() {
51939        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
51940        let b = _mm256_set1_epi32(1);
51941        let mask = 0b11111111;
51942        let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
51943        assert_eq!(r, 0b10000000);
51944    }
51945
51946    #[simd_test(enable = "avx512f,avx512vl")]
51947    const fn test_mm_cmplt_epu32_mask() {
51948        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
51949        let b = _mm_set1_epi32(1);
51950        let r = _mm_cmplt_epu32_mask(a, b);
51951        assert_eq!(r, 0b00001000);
51952    }
51953
51954    #[simd_test(enable = "avx512f,avx512vl")]
51955    const fn test_mm_mask_cmplt_epu32_mask() {
51956        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
51957        let b = _mm_set1_epi32(1);
51958        let mask = 0b11111111;
51959        let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
51960        assert_eq!(r, 0b00001000);
51961    }
51962
51963    #[simd_test(enable = "avx512f")]
51964    const fn test_mm512_cmpgt_epu32_mask() {
51965        #[rustfmt::skip]
51966        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51967                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51968        let b = _mm512_set1_epi32(-1);
51969        let m = _mm512_cmpgt_epu32_mask(b, a);
51970        assert_eq!(m, 0b11001111_11001111);
51971    }
51972
51973    #[simd_test(enable = "avx512f")]
51974    const fn test_mm512_mask_cmpgt_epu32_mask() {
51975        #[rustfmt::skip]
51976        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51977                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51978        let b = _mm512_set1_epi32(-1);
51979        let mask = 0b01111010_01111010;
51980        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
51981        assert_eq!(r, 0b01001010_01001010);
51982    }
51983
51984    #[simd_test(enable = "avx512f,avx512vl")]
51985    const fn test_mm256_cmpgt_epu32_mask() {
51986        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
51987        let b = _mm256_set1_epi32(1);
51988        let r = _mm256_cmpgt_epu32_mask(a, b);
51989        assert_eq!(r, 0b00111111);
51990    }
51991
51992    #[simd_test(enable = "avx512f,avx512vl")]
51993    const fn test_mm256_mask_cmpgt_epu32_mask() {
51994        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
51995        let b = _mm256_set1_epi32(1);
51996        let mask = 0b11111111;
51997        let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
51998        assert_eq!(r, 0b00111111);
51999    }
52000
52001    #[simd_test(enable = "avx512f,avx512vl")]
52002    const fn test_mm_cmpgt_epu32_mask() {
52003        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52004        let b = _mm_set1_epi32(1);
52005        let r = _mm_cmpgt_epu32_mask(a, b);
52006        assert_eq!(r, 0b00000011);
52007    }
52008
52009    #[simd_test(enable = "avx512f,avx512vl")]
52010    const fn test_mm_mask_cmpgt_epu32_mask() {
52011        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52012        let b = _mm_set1_epi32(1);
52013        let mask = 0b11111111;
52014        let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
52015        assert_eq!(r, 0b00000011);
52016    }
52017
52018    #[simd_test(enable = "avx512f")]
52019    const fn test_mm512_cmple_epu32_mask() {
52020        #[rustfmt::skip]
52021        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52022                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52023        let b = _mm512_set1_epi32(-1);
52024        assert_eq!(
52025            _mm512_cmple_epu32_mask(a, b),
52026            !_mm512_cmpgt_epu32_mask(a, b)
52027        )
52028    }
52029
52030    #[simd_test(enable = "avx512f")]
52031    const fn test_mm512_mask_cmple_epu32_mask() {
52032        #[rustfmt::skip]
52033        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52034                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52035        let b = _mm512_set1_epi32(-1);
52036        let mask = 0b01111010_01111010;
52037        assert_eq!(
52038            _mm512_mask_cmple_epu32_mask(mask, a, b),
52039            0b01111010_01111010
52040        );
52041    }
52042
52043    #[simd_test(enable = "avx512f,avx512vl")]
52044    const fn test_mm256_cmple_epu32_mask() {
52045        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
52046        let b = _mm256_set1_epi32(1);
52047        let r = _mm256_cmple_epu32_mask(a, b);
52048        assert_eq!(r, 0b11000000)
52049    }
52050
52051    #[simd_test(enable = "avx512f,avx512vl")]
52052    const fn test_mm256_mask_cmple_epu32_mask() {
52053        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
52054        let b = _mm256_set1_epi32(1);
52055        let mask = 0b11111111;
52056        let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
52057        assert_eq!(r, 0b11000000)
52058    }
52059
52060    #[simd_test(enable = "avx512f,avx512vl")]
52061    const fn test_mm_cmple_epu32_mask() {
52062        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52063        let b = _mm_set1_epi32(1);
52064        let r = _mm_cmple_epu32_mask(a, b);
52065        assert_eq!(r, 0b00001100)
52066    }
52067
52068    #[simd_test(enable = "avx512f,avx512vl")]
52069    const fn test_mm_mask_cmple_epu32_mask() {
52070        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52071        let b = _mm_set1_epi32(1);
52072        let mask = 0b11111111;
52073        let r = _mm_mask_cmple_epu32_mask(mask, a, b);
52074        assert_eq!(r, 0b00001100)
52075    }
52076
52077    #[simd_test(enable = "avx512f")]
52078    const fn test_mm512_cmpge_epu32_mask() {
52079        #[rustfmt::skip]
52080        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52081                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52082        let b = _mm512_set1_epi32(-1);
52083        assert_eq!(
52084            _mm512_cmpge_epu32_mask(a, b),
52085            !_mm512_cmplt_epu32_mask(a, b)
52086        )
52087    }
52088
52089    #[simd_test(enable = "avx512f")]
52090    const fn test_mm512_mask_cmpge_epu32_mask() {
52091        #[rustfmt::skip]
52092        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52093                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52094        let b = _mm512_set1_epi32(-1);
52095        let mask = 0b01111010_01111010;
52096        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
52097    }
52098
52099    #[simd_test(enable = "avx512f,avx512vl")]
52100    const fn test_mm256_cmpge_epu32_mask() {
52101        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
52102        let b = _mm256_set1_epi32(1);
52103        let r = _mm256_cmpge_epu32_mask(a, b);
52104        assert_eq!(r, 0b01111111)
52105    }
52106
52107    #[simd_test(enable = "avx512f,avx512vl")]
52108    const fn test_mm256_mask_cmpge_epu32_mask() {
52109        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
52110        let b = _mm256_set1_epi32(1);
52111        let mask = 0b11111111;
52112        let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
52113        assert_eq!(r, 0b01111111)
52114    }
52115
52116    #[simd_test(enable = "avx512f,avx512vl")]
52117    const fn test_mm_cmpge_epu32_mask() {
52118        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52119        let b = _mm_set1_epi32(1);
52120        let r = _mm_cmpge_epu32_mask(a, b);
52121        assert_eq!(r, 0b00000111)
52122    }
52123
52124    #[simd_test(enable = "avx512f,avx512vl")]
52125    const fn test_mm_mask_cmpge_epu32_mask() {
52126        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52127        let b = _mm_set1_epi32(1);
52128        let mask = 0b11111111;
52129        let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
52130        assert_eq!(r, 0b00000111)
52131    }
52132
52133    #[simd_test(enable = "avx512f")]
52134    const fn test_mm512_cmpeq_epu32_mask() {
52135        #[rustfmt::skip]
52136        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52137                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52138        #[rustfmt::skip]
52139        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52140                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52141        let m = _mm512_cmpeq_epu32_mask(b, a);
52142        assert_eq!(m, 0b11001111_11001111);
52143    }
52144
52145    #[simd_test(enable = "avx512f")]
52146    const fn test_mm512_mask_cmpeq_epu32_mask() {
52147        #[rustfmt::skip]
52148        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52149                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52150        #[rustfmt::skip]
52151        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52152                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52153        let mask = 0b01111010_01111010;
52154        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
52155        assert_eq!(r, 0b01001010_01001010);
52156    }
52157
52158    #[simd_test(enable = "avx512f,avx512vl")]
52159    const fn test_mm256_cmpeq_epu32_mask() {
52160        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52161        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52162        let m = _mm256_cmpeq_epu32_mask(b, a);
52163        assert_eq!(m, 0b11001111);
52164    }
52165
52166    #[simd_test(enable = "avx512f,avx512vl")]
52167    const fn test_mm256_mask_cmpeq_epu32_mask() {
52168        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52169        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52170        let mask = 0b01111010;
52171        let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
52172        assert_eq!(r, 0b01001010);
52173    }
52174
52175    #[simd_test(enable = "avx512f,avx512vl")]
52176    const fn test_mm_cmpeq_epu32_mask() {
52177        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52178        let b = _mm_set_epi32(0, 1, 13, 42);
52179        let m = _mm_cmpeq_epu32_mask(b, a);
52180        assert_eq!(m, 0b00001100);
52181    }
52182
52183    #[simd_test(enable = "avx512f,avx512vl")]
52184    const fn test_mm_mask_cmpeq_epu32_mask() {
52185        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52186        let b = _mm_set_epi32(0, 1, 13, 42);
52187        let mask = 0b11111111;
52188        let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
52189        assert_eq!(r, 0b00001100);
52190    }
52191
52192    #[simd_test(enable = "avx512f")]
52193    const fn test_mm512_cmpneq_epu32_mask() {
52194        #[rustfmt::skip]
52195        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52196                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52197        #[rustfmt::skip]
52198        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52199                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52200        let m = _mm512_cmpneq_epu32_mask(b, a);
52201        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
52202    }
52203
52204    #[simd_test(enable = "avx512f")]
52205    const fn test_mm512_mask_cmpneq_epu32_mask() {
52206        #[rustfmt::skip]
52207        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
52208                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52209        #[rustfmt::skip]
52210        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52211                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52212        let mask = 0b01111010_01111010;
52213        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
52214        assert_eq!(r, 0b00110010_00110010);
52215    }
52216
52217    #[simd_test(enable = "avx512f,avx512vl")]
52218    const fn test_mm256_cmpneq_epu32_mask() {
52219        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52220        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
52221        let r = _mm256_cmpneq_epu32_mask(b, a);
52222        assert_eq!(r, 0b00110000);
52223    }
52224
52225    #[simd_test(enable = "avx512f,avx512vl")]
52226    const fn test_mm256_mask_cmpneq_epu32_mask() {
52227        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52228        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
52229        let mask = 0b11111111;
52230        let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
52231        assert_eq!(r, 0b00110000);
52232    }
52233
52234    #[simd_test(enable = "avx512f,avx512vl")]
52235    const fn test_mm_cmpneq_epu32_mask() {
52236        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52237        let b = _mm_set_epi32(0, 1, 13, 42);
52238        let r = _mm_cmpneq_epu32_mask(b, a);
52239        assert_eq!(r, 0b00000011);
52240    }
52241
52242    #[simd_test(enable = "avx512f,avx512vl")]
52243    const fn test_mm_mask_cmpneq_epu32_mask() {
52244        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52245        let b = _mm_set_epi32(0, 1, 13, 42);
52246        let mask = 0b11111111;
52247        let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
52248        assert_eq!(r, 0b00000011);
52249    }
52250
52251    #[simd_test(enable = "avx512f")]
52252    const fn test_mm512_cmp_epu32_mask() {
52253        #[rustfmt::skip]
52254        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52255                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52256        let b = _mm512_set1_epi32(-1);
52257        let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52258        assert_eq!(m, 0b11001111_11001111);
52259    }
52260
52261    #[simd_test(enable = "avx512f")]
52262    const fn test_mm512_mask_cmp_epu32_mask() {
52263        #[rustfmt::skip]
52264        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52265                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52266        let b = _mm512_set1_epi32(-1);
52267        let mask = 0b01111010_01111010;
52268        let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52269        assert_eq!(r, 0b01001010_01001010);
52270    }
52271
52272    #[simd_test(enable = "avx512f,avx512vl")]
52273    const fn test_mm256_cmp_epu32_mask() {
52274        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52275        let b = _mm256_set1_epi32(-1);
52276        let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52277        assert_eq!(m, 0b11001111);
52278    }
52279
52280    #[simd_test(enable = "avx512f,avx512vl")]
52281    const fn test_mm256_mask_cmp_epu32_mask() {
52282        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52283        let b = _mm256_set1_epi32(-1);
52284        let mask = 0b11111111;
52285        let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52286        assert_eq!(r, 0b11001111);
52287    }
52288
52289    #[simd_test(enable = "avx512f,avx512vl")]
52290    const fn test_mm_cmp_epu32_mask() {
52291        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
52292        let b = _mm_set1_epi32(1);
52293        let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52294        assert_eq!(m, 0b00001000);
52295    }
52296
52297    #[simd_test(enable = "avx512f,avx512vl")]
52298    const fn test_mm_mask_cmp_epu32_mask() {
52299        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
52300        let b = _mm_set1_epi32(1);
52301        let mask = 0b11111111;
52302        let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52303        assert_eq!(r, 0b00001000);
52304    }
52305
52306    #[simd_test(enable = "avx512f")]
52307    const fn test_mm512_cmplt_epi32_mask() {
52308        #[rustfmt::skip]
52309        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52310                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52311        let b = _mm512_set1_epi32(-1);
52312        let m = _mm512_cmplt_epi32_mask(a, b);
52313        assert_eq!(m, 0b00000101_00000101);
52314    }
52315
52316    #[simd_test(enable = "avx512f")]
52317    const fn test_mm512_mask_cmplt_epi32_mask() {
52318        #[rustfmt::skip]
52319        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52320                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52321        let b = _mm512_set1_epi32(-1);
52322        let mask = 0b01100110_01100110;
52323        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
52324        assert_eq!(r, 0b00000100_00000100);
52325    }
52326
52327    #[simd_test(enable = "avx512f,avx512vl")]
52328    const fn test_mm256_cmplt_epi32_mask() {
52329        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
52330        let b = _mm256_set1_epi32(-1);
52331        let r = _mm256_cmplt_epi32_mask(a, b);
52332        assert_eq!(r, 0b00000101);
52333    }
52334
52335    #[simd_test(enable = "avx512f,avx512vl")]
52336    const fn test_mm256_mask_cmplt_epi32_mask() {
52337        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
52338        let b = _mm256_set1_epi32(-1);
52339        let mask = 0b11111111;
52340        let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
52341        assert_eq!(r, 0b00000101);
52342    }
52343
52344    #[simd_test(enable = "avx512f,avx512vl")]
52345    const fn test_mm_cmplt_epi32_mask() {
52346        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
52347        let b = _mm_set1_epi32(-1);
52348        let r = _mm_cmplt_epi32_mask(a, b);
52349        assert_eq!(r, 0b00000101);
52350    }
52351
52352    #[simd_test(enable = "avx512f,avx512vl")]
52353    const fn test_mm_mask_cmplt_epi32_mask() {
52354        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
52355        let b = _mm_set1_epi32(-1);
52356        let mask = 0b11111111;
52357        let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
52358        assert_eq!(r, 0b00000101);
52359    }
52360
52361    #[simd_test(enable = "avx512f")]
52362    const fn test_mm512_cmpgt_epi32_mask() {
52363        #[rustfmt::skip]
52364        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52365                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52366        let b = _mm512_set1_epi32(-1);
52367        let m = _mm512_cmpgt_epi32_mask(b, a);
52368        assert_eq!(m, 0b00000101_00000101);
52369    }
52370
52371    #[simd_test(enable = "avx512f")]
52372    const fn test_mm512_mask_cmpgt_epi32_mask() {
52373        #[rustfmt::skip]
52374        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52375                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52376        let b = _mm512_set1_epi32(-1);
52377        let mask = 0b01100110_01100110;
52378        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
52379        assert_eq!(r, 0b00000100_00000100);
52380    }
52381
52382    #[simd_test(enable = "avx512f,avx512vl")]
52383    const fn test_mm256_cmpgt_epi32_mask() {
52384        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52385        let b = _mm256_set1_epi32(-1);
52386        let r = _mm256_cmpgt_epi32_mask(a, b);
52387        assert_eq!(r, 0b11011010);
52388    }
52389
52390    #[simd_test(enable = "avx512f,avx512vl")]
52391    const fn test_mm256_mask_cmpgt_epi32_mask() {
52392        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52393        let b = _mm256_set1_epi32(-1);
52394        let mask = 0b11111111;
52395        let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
52396        assert_eq!(r, 0b11011010);
52397    }
52398
52399    #[simd_test(enable = "avx512f,avx512vl")]
52400    const fn test_mm_cmpgt_epi32_mask() {
52401        let a = _mm_set_epi32(0, 1, -1, 13);
52402        let b = _mm_set1_epi32(-1);
52403        let r = _mm_cmpgt_epi32_mask(a, b);
52404        assert_eq!(r, 0b00001101);
52405    }
52406
52407    #[simd_test(enable = "avx512f,avx512vl")]
52408    const fn test_mm_mask_cmpgt_epi32_mask() {
52409        let a = _mm_set_epi32(0, 1, -1, 13);
52410        let b = _mm_set1_epi32(-1);
52411        let mask = 0b11111111;
52412        let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
52413        assert_eq!(r, 0b00001101);
52414    }
52415
52416    #[simd_test(enable = "avx512f")]
52417    const fn test_mm512_cmple_epi32_mask() {
52418        #[rustfmt::skip]
52419        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52420                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52421        let b = _mm512_set1_epi32(-1);
52422        assert_eq!(
52423            _mm512_cmple_epi32_mask(a, b),
52424            !_mm512_cmpgt_epi32_mask(a, b)
52425        )
52426    }
52427
52428    #[simd_test(enable = "avx512f")]
52429    const fn test_mm512_mask_cmple_epi32_mask() {
52430        #[rustfmt::skip]
52431        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52432                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52433        let b = _mm512_set1_epi32(-1);
52434        let mask = 0b01111010_01111010;
52435        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
52436    }
52437
52438    #[simd_test(enable = "avx512f,avx512vl")]
52439    const fn test_mm256_cmple_epi32_mask() {
52440        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
52441        let b = _mm256_set1_epi32(-1);
52442        let r = _mm256_cmple_epi32_mask(a, b);
52443        assert_eq!(r, 0b00100101)
52444    }
52445
52446    #[simd_test(enable = "avx512f,avx512vl")]
52447    const fn test_mm256_mask_cmple_epi32_mask() {
52448        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
52449        let b = _mm256_set1_epi32(-1);
52450        let mask = 0b11111111;
52451        let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
52452        assert_eq!(r, 0b00100101)
52453    }
52454
52455    #[simd_test(enable = "avx512f,avx512vl")]
52456    const fn test_mm_cmple_epi32_mask() {
52457        let a = _mm_set_epi32(0, 1, -1, 200);
52458        let b = _mm_set1_epi32(-1);
52459        let r = _mm_cmple_epi32_mask(a, b);
52460        assert_eq!(r, 0b00000010)
52461    }
52462
52463    #[simd_test(enable = "avx512f,avx512vl")]
52464    const fn test_mm_mask_cmple_epi32_mask() {
52465        let a = _mm_set_epi32(0, 1, -1, 200);
52466        let b = _mm_set1_epi32(-1);
52467        let mask = 0b11111111;
52468        let r = _mm_mask_cmple_epi32_mask(mask, a, b);
52469        assert_eq!(r, 0b00000010)
52470    }
52471
52472    #[simd_test(enable = "avx512f")]
52473    const fn test_mm512_cmpge_epi32_mask() {
52474        #[rustfmt::skip]
52475        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52476                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52477        let b = _mm512_set1_epi32(-1);
52478        assert_eq!(
52479            _mm512_cmpge_epi32_mask(a, b),
52480            !_mm512_cmplt_epi32_mask(a, b)
52481        )
52482    }
52483
52484    #[simd_test(enable = "avx512f")]
52485    const fn test_mm512_mask_cmpge_epi32_mask() {
52486        #[rustfmt::skip]
52487        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52488                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52489        let b = _mm512_set1_epi32(-1);
52490        let mask = 0b01111010_01111010;
52491        assert_eq!(
52492            _mm512_mask_cmpge_epi32_mask(mask, a, b),
52493            0b01111010_01111010
52494        );
52495    }
52496
52497    #[simd_test(enable = "avx512f,avx512vl")]
52498    const fn test_mm256_cmpge_epi32_mask() {
52499        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52500        let b = _mm256_set1_epi32(-1);
52501        let r = _mm256_cmpge_epi32_mask(a, b);
52502        assert_eq!(r, 0b11111010)
52503    }
52504
52505    #[simd_test(enable = "avx512f,avx512vl")]
52506    const fn test_mm256_mask_cmpge_epi32_mask() {
52507        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52508        let b = _mm256_set1_epi32(-1);
52509        let mask = 0b11111111;
52510        let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
52511        assert_eq!(r, 0b11111010)
52512    }
52513
52514    #[simd_test(enable = "avx512f,avx512vl")]
52515    const fn test_mm_cmpge_epi32_mask() {
52516        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52517        let b = _mm_set1_epi32(-1);
52518        let r = _mm_cmpge_epi32_mask(a, b);
52519        assert_eq!(r, 0b00001111)
52520    }
52521
52522    #[simd_test(enable = "avx512f,avx512vl")]
52523    const fn test_mm_mask_cmpge_epi32_mask() {
52524        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52525        let b = _mm_set1_epi32(-1);
52526        let mask = 0b11111111;
52527        let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
52528        assert_eq!(r, 0b00001111)
52529    }
52530
52531    #[simd_test(enable = "avx512f")]
52532    const fn test_mm512_cmpeq_epi32_mask() {
52533        #[rustfmt::skip]
52534        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52535                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52536        #[rustfmt::skip]
52537        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52538                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52539        let m = _mm512_cmpeq_epi32_mask(b, a);
52540        assert_eq!(m, 0b11001111_11001111);
52541    }
52542
52543    #[simd_test(enable = "avx512f")]
52544    const fn test_mm512_mask_cmpeq_epi32_mask() {
52545        #[rustfmt::skip]
52546        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52547                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52548        #[rustfmt::skip]
52549        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52550                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52551        let mask = 0b01111010_01111010;
52552        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
52553        assert_eq!(r, 0b01001010_01001010);
52554    }
52555
52556    #[simd_test(enable = "avx512f,avx512vl")]
52557    const fn test_mm256_cmpeq_epi32_mask() {
52558        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52559        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52560        let m = _mm256_cmpeq_epi32_mask(b, a);
52561        assert_eq!(m, 0b11001111);
52562    }
52563
52564    #[simd_test(enable = "avx512f,avx512vl")]
52565    const fn test_mm256_mask_cmpeq_epi32_mask() {
52566        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52567        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52568        let mask = 0b01111010;
52569        let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
52570        assert_eq!(r, 0b01001010);
52571    }
52572
52573    #[simd_test(enable = "avx512f,avx512vl")]
52574    const fn test_mm_cmpeq_epi32_mask() {
52575        let a = _mm_set_epi32(0, 1, -1, 13);
52576        let b = _mm_set_epi32(0, 1, 13, 42);
52577        let m = _mm_cmpeq_epi32_mask(b, a);
52578        assert_eq!(m, 0b00001100);
52579    }
52580
52581    #[simd_test(enable = "avx512f,avx512vl")]
52582    const fn test_mm_mask_cmpeq_epi32_mask() {
52583        let a = _mm_set_epi32(0, 1, -1, 13);
52584        let b = _mm_set_epi32(0, 1, 13, 42);
52585        let mask = 0b11111111;
52586        let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
52587        assert_eq!(r, 0b00001100);
52588    }
52589
52590    #[simd_test(enable = "avx512f")]
52591    const fn test_mm512_cmpneq_epi32_mask() {
52592        #[rustfmt::skip]
52593        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52594                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52595        #[rustfmt::skip]
52596        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52597                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52598        let m = _mm512_cmpneq_epi32_mask(b, a);
52599        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
52600    }
52601
52602    #[simd_test(enable = "avx512f")]
52603    const fn test_mm512_mask_cmpneq_epi32_mask() {
52604        #[rustfmt::skip]
52605        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
52606                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
52607        #[rustfmt::skip]
52608        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52609                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52610        let mask = 0b01111010_01111010;
52611        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
52612        assert_eq!(r, 0b00110010_00110010)
52613    }
52614
52615    #[simd_test(enable = "avx512f,avx512vl")]
52616    const fn test_mm256_cmpneq_epi32_mask() {
52617        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52618        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52619        let m = _mm256_cmpneq_epi32_mask(b, a);
52620        assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
52621    }
52622
52623    #[simd_test(enable = "avx512f,avx512vl")]
52624    const fn test_mm256_mask_cmpneq_epi32_mask() {
52625        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
52626        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52627        let mask = 0b11111111;
52628        let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
52629        assert_eq!(r, 0b00110011)
52630    }
52631
52632    #[simd_test(enable = "avx512f,avx512vl")]
52633    const fn test_mm_cmpneq_epi32_mask() {
52634        let a = _mm_set_epi32(0, 1, -1, 13);
52635        let b = _mm_set_epi32(0, 1, 13, 42);
52636        let r = _mm_cmpneq_epi32_mask(b, a);
52637        assert_eq!(r, 0b00000011)
52638    }
52639
52640    #[simd_test(enable = "avx512f,avx512vl")]
52641    const fn test_mm_mask_cmpneq_epi32_mask() {
52642        let a = _mm_set_epi32(0, 1, -1, 13);
52643        let b = _mm_set_epi32(0, 1, 13, 42);
52644        let mask = 0b11111111;
52645        let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
52646        assert_eq!(r, 0b00000011)
52647    }
52648
52649    #[simd_test(enable = "avx512f")]
52650    const fn test_mm512_cmp_epi32_mask() {
52651        #[rustfmt::skip]
52652        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52653                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52654        let b = _mm512_set1_epi32(-1);
52655        let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52656        assert_eq!(m, 0b00000101_00000101);
52657    }
52658
52659    #[simd_test(enable = "avx512f")]
52660    const fn test_mm512_mask_cmp_epi32_mask() {
52661        #[rustfmt::skip]
52662        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52663                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52664        let b = _mm512_set1_epi32(-1);
52665        let mask = 0b01100110_01100110;
52666        let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52667        assert_eq!(r, 0b00000100_00000100);
52668    }
52669
52670    #[simd_test(enable = "avx512f,avx512vl")]
52671    const fn test_mm256_cmp_epi32_mask() {
52672        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52673        let b = _mm256_set1_epi32(-1);
52674        let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52675        assert_eq!(m, 0b00000101);
52676    }
52677
52678    #[simd_test(enable = "avx512f,avx512vl")]
52679    const fn test_mm256_mask_cmp_epi32_mask() {
52680        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52681        let b = _mm256_set1_epi32(-1);
52682        let mask = 0b01100110;
52683        let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52684        assert_eq!(r, 0b00000100);
52685    }
52686
52687    #[simd_test(enable = "avx512f,avx512vl")]
52688    const fn test_mm_cmp_epi32_mask() {
52689        let a = _mm_set_epi32(0, 1, -1, 13);
52690        let b = _mm_set1_epi32(1);
52691        let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52692        assert_eq!(m, 0b00001010);
52693    }
52694
52695    #[simd_test(enable = "avx512f,avx512vl")]
52696    const fn test_mm_mask_cmp_epi32_mask() {
52697        let a = _mm_set_epi32(0, 1, -1, 13);
52698        let b = _mm_set1_epi32(1);
52699        let mask = 0b11111111;
52700        let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52701        assert_eq!(r, 0b00001010);
52702    }
52703
52704    #[simd_test(enable = "avx512f")]
52705    const fn test_mm512_set_epi8() {
52706        let r = _mm512_set1_epi8(2);
52707        assert_eq_m512i(
52708            r,
52709            _mm512_set_epi8(
52710                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52711                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52712                2, 2, 2, 2, 2, 2, 2, 2,
52713            ),
52714        )
52715    }
52716
52717    #[simd_test(enable = "avx512f")]
52718    const fn test_mm512_set_epi16() {
52719        let r = _mm512_set1_epi16(2);
52720        assert_eq_m512i(
52721            r,
52722            _mm512_set_epi16(
52723                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52724                2, 2, 2, 2,
52725            ),
52726        )
52727    }
52728
52729    #[simd_test(enable = "avx512f")]
52730    const fn test_mm512_set_epi32() {
52731        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
52732        assert_eq_m512i(
52733            r,
52734            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
52735        )
52736    }
52737
52738    #[simd_test(enable = "avx512f")]
52739    const fn test_mm512_setr_epi32() {
52740        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
52741        assert_eq_m512i(
52742            r,
52743            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
52744        )
52745    }
52746
52747    #[simd_test(enable = "avx512f")]
52748    const fn test_mm512_set1_epi8() {
52749        let r = _mm512_set_epi8(
52750            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52751            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52752            2, 2, 2, 2, 2, 2,
52753        );
52754        assert_eq_m512i(r, _mm512_set1_epi8(2));
52755    }
52756
52757    #[simd_test(enable = "avx512f")]
52758    const fn test_mm512_set1_epi16() {
52759        let r = _mm512_set_epi16(
52760            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52761            2, 2, 2,
52762        );
52763        assert_eq_m512i(r, _mm512_set1_epi16(2));
52764    }
52765
52766    #[simd_test(enable = "avx512f")]
52767    const fn test_mm512_set1_epi32() {
52768        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52769        assert_eq_m512i(r, _mm512_set1_epi32(2));
52770    }
52771
52772    #[simd_test(enable = "avx512f")]
52773    const fn test_mm512_setzero_si512() {
52774        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
52775    }
52776
52777    #[simd_test(enable = "avx512f")]
52778    const fn test_mm512_setzero_epi32() {
52779        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
52780    }
52781
52782    #[simd_test(enable = "avx512f")]
52783    const fn test_mm512_set_ps() {
52784        let r = _mm512_setr_ps(
52785            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
52786        );
52787        assert_eq_m512(
52788            r,
52789            _mm512_set_ps(
52790                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
52791            ),
52792        )
52793    }
52794
52795    #[simd_test(enable = "avx512f")]
52796    const fn test_mm512_setr_ps() {
52797        let r = _mm512_set_ps(
52798            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
52799        );
52800        assert_eq_m512(
52801            r,
52802            _mm512_setr_ps(
52803                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
52804            ),
52805        )
52806    }
52807
52808    #[simd_test(enable = "avx512f")]
52809    const fn test_mm512_set1_ps() {
52810        #[rustfmt::skip]
52811        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
52812                                     2., 2., 2., 2., 2., 2., 2., 2.);
52813        assert_eq_m512(expected, _mm512_set1_ps(2.));
52814    }
52815
52816    #[simd_test(enable = "avx512f")]
52817    const fn test_mm512_set4_epi32() {
52818        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
52819        assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
52820    }
52821
52822    #[simd_test(enable = "avx512f")]
52823    const fn test_mm512_set4_ps() {
52824        let r = _mm512_set_ps(
52825            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
52826        );
52827        assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
52828    }
52829
52830    #[simd_test(enable = "avx512f")]
52831    const fn test_mm512_setr4_epi32() {
52832        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
52833        assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
52834    }
52835
52836    #[simd_test(enable = "avx512f")]
52837    const fn test_mm512_setr4_ps() {
52838        let r = _mm512_set_ps(
52839            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
52840        );
52841        assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
52842    }
52843
52844    #[simd_test(enable = "avx512f")]
52845    const fn test_mm512_setzero_ps() {
52846        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
52847    }
52848
52849    #[simd_test(enable = "avx512f")]
52850    const fn test_mm512_setzero() {
52851        assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
52852    }
52853
52854    #[simd_test(enable = "avx512f")]
52855    const fn test_mm512_loadu_pd() {
52856        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
52857        let p = a.as_ptr();
52858        let r = unsafe { _mm512_loadu_pd(black_box(p)) };
52859        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
52860        assert_eq_m512d(r, e);
52861    }
52862
52863    #[simd_test(enable = "avx512f")]
52864    const fn test_mm512_storeu_pd() {
52865        let a = _mm512_set1_pd(9.);
52866        let mut r = _mm512_undefined_pd();
52867        unsafe {
52868            _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
52869        }
52870        assert_eq_m512d(r, a);
52871    }
52872
52873    #[simd_test(enable = "avx512f")]
52874    const fn test_mm512_loadu_ps() {
52875        let a = &[
52876            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
52877        ];
52878        let p = a.as_ptr();
52879        let r = unsafe { _mm512_loadu_ps(black_box(p)) };
52880        let e = _mm512_setr_ps(
52881            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
52882        );
52883        assert_eq_m512(r, e);
52884    }
52885
52886    #[simd_test(enable = "avx512f")]
52887    const fn test_mm512_storeu_ps() {
52888        let a = _mm512_set1_ps(9.);
52889        let mut r = _mm512_undefined_ps();
52890        unsafe {
52891            _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
52892        }
52893        assert_eq_m512(r, a);
52894    }
52895
52896    #[simd_test(enable = "avx512f")]
52897    const fn test_mm512_mask_loadu_epi32() {
52898        let src = _mm512_set1_epi32(42);
52899        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
52900        let p = a.as_ptr();
52901        let m = 0b11101000_11001010;
52902        let r = unsafe { _mm512_mask_loadu_epi32(src, m, black_box(p)) };
52903        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52904        assert_eq_m512i(r, e);
52905    }
52906
52907    #[simd_test(enable = "avx512f")]
52908    const fn test_mm512_maskz_loadu_epi32() {
52909        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
52910        let p = a.as_ptr();
52911        let m = 0b11101000_11001010;
52912        let r = unsafe { _mm512_maskz_loadu_epi32(m, black_box(p)) };
52913        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
52914        assert_eq_m512i(r, e);
52915    }
52916
52917    #[simd_test(enable = "avx512f")]
52918    const fn test_mm512_mask_load_epi32() {
52919        #[repr(align(64))]
52920        struct Align {
52921            data: [i32; 16], // 64 bytes
52922        }
52923        let src = _mm512_set1_epi32(42);
52924        let a = Align {
52925            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
52926        };
52927        let p = a.data.as_ptr();
52928        let m = 0b11101000_11001010;
52929        let r = unsafe { _mm512_mask_load_epi32(src, m, black_box(p)) };
52930        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52931        assert_eq_m512i(r, e);
52932    }
52933
52934    #[simd_test(enable = "avx512f")]
52935    const fn test_mm512_maskz_load_epi32() {
52936        #[repr(align(64))]
52937        struct Align {
52938            data: [i32; 16], // 64 bytes
52939        }
52940        let a = Align {
52941            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
52942        };
52943        let p = a.data.as_ptr();
52944        let m = 0b11101000_11001010;
52945        let r = unsafe { _mm512_maskz_load_epi32(m, black_box(p)) };
52946        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
52947        assert_eq_m512i(r, e);
52948    }
52949
52950    #[simd_test(enable = "avx512f")]
52951    const fn test_mm512_mask_storeu_epi32() {
52952        let mut r = [42_i32; 16];
52953        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52954        let m = 0b11101000_11001010;
52955        unsafe {
52956            _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
52957        }
52958        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52959        assert_eq_m512i(unsafe { _mm512_loadu_epi32(r.as_ptr()) }, e);
52960    }
52961
52962    #[simd_test(enable = "avx512f")]
52963    const fn test_mm512_mask_store_epi32() {
52964        #[repr(align(64))]
52965        struct Align {
52966            data: [i32; 16],
52967        }
52968        let mut r = Align { data: [42; 16] };
52969        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52970        let m = 0b11101000_11001010;
52971        unsafe {
52972            _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
52973        }
52974        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52975        assert_eq_m512i(unsafe { _mm512_load_epi32(r.data.as_ptr()) }, e);
52976    }
52977
52978    #[simd_test(enable = "avx512f")]
52979    const fn test_mm512_mask_loadu_epi64() {
52980        let src = _mm512_set1_epi64(42);
52981        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
52982        let p = a.as_ptr();
52983        let m = 0b11001010;
52984        let r = unsafe { _mm512_mask_loadu_epi64(src, m, black_box(p)) };
52985        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
52986        assert_eq_m512i(r, e);
52987    }
52988
52989    #[simd_test(enable = "avx512f")]
52990    const fn test_mm512_maskz_loadu_epi64() {
52991        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
52992        let p = a.as_ptr();
52993        let m = 0b11001010;
52994        let r = unsafe { _mm512_maskz_loadu_epi64(m, black_box(p)) };
52995        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
52996        assert_eq_m512i(r, e);
52997    }
52998
52999    #[simd_test(enable = "avx512f")]
53000    const fn test_mm512_mask_load_epi64() {
53001        #[repr(align(64))]
53002        struct Align {
53003            data: [i64; 8], // 64 bytes
53004        }
53005        let src = _mm512_set1_epi64(42);
53006        let a = Align {
53007            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
53008        };
53009        let p = a.data.as_ptr();
53010        let m = 0b11001010;
53011        let r = unsafe { _mm512_mask_load_epi64(src, m, black_box(p)) };
53012        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53013        assert_eq_m512i(r, e);
53014    }
53015
53016    #[simd_test(enable = "avx512f")]
53017    const fn test_mm512_maskz_load_epi64() {
53018        #[repr(align(64))]
53019        struct Align {
53020            data: [i64; 8], // 64 bytes
53021        }
53022        let a = Align {
53023            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
53024        };
53025        let p = a.data.as_ptr();
53026        let m = 0b11001010;
53027        let r = unsafe { _mm512_maskz_load_epi64(m, black_box(p)) };
53028        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
53029        assert_eq_m512i(r, e);
53030    }
53031
53032    #[simd_test(enable = "avx512f")]
53033    const fn test_mm512_mask_storeu_epi64() {
53034        let mut r = [42_i64; 8];
53035        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
53036        let m = 0b11001010;
53037        unsafe {
53038            _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53039        }
53040        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53041        assert_eq_m512i(unsafe { _mm512_loadu_epi64(r.as_ptr()) }, e);
53042    }
53043
53044    #[simd_test(enable = "avx512f")]
53045    const fn test_mm512_mask_store_epi64() {
53046        #[repr(align(64))]
53047        struct Align {
53048            data: [i64; 8],
53049        }
53050        let mut r = Align { data: [42; 8] };
53051        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
53052        let m = 0b11001010;
53053        let p = r.data.as_mut_ptr();
53054        unsafe {
53055            _mm512_mask_store_epi64(p, m, a);
53056        }
53057        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53058        assert_eq_m512i(unsafe { _mm512_load_epi64(r.data.as_ptr()) }, e);
53059    }
53060
53061    #[simd_test(enable = "avx512f")]
53062    const fn test_mm512_mask_loadu_ps() {
53063        let src = _mm512_set1_ps(42.0);
53064        let a = &[
53065            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
53066            16.0,
53067        ];
53068        let p = a.as_ptr();
53069        let m = 0b11101000_11001010;
53070        let r = unsafe { _mm512_mask_loadu_ps(src, m, black_box(p)) };
53071        let e = _mm512_setr_ps(
53072            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53073            16.0,
53074        );
53075        assert_eq_m512(r, e);
53076    }
53077
53078    #[simd_test(enable = "avx512f")]
53079    const fn test_mm512_maskz_loadu_ps() {
53080        let a = &[
53081            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
53082            16.0,
53083        ];
53084        let p = a.as_ptr();
53085        let m = 0b11101000_11001010;
53086        let r = unsafe { _mm512_maskz_loadu_ps(m, black_box(p)) };
53087        let e = _mm512_setr_ps(
53088            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
53089        );
53090        assert_eq_m512(r, e);
53091    }
53092
53093    #[simd_test(enable = "avx512f")]
53094    const fn test_mm512_mask_load_ps() {
53095        #[repr(align(64))]
53096        struct Align {
53097            data: [f32; 16], // 64 bytes
53098        }
53099        let src = _mm512_set1_ps(42.0);
53100        let a = Align {
53101            data: [
53102                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
53103                15.0, 16.0,
53104            ],
53105        };
53106        let p = a.data.as_ptr();
53107        let m = 0b11101000_11001010;
53108        let r = unsafe { _mm512_mask_load_ps(src, m, black_box(p)) };
53109        let e = _mm512_setr_ps(
53110            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53111            16.0,
53112        );
53113        assert_eq_m512(r, e);
53114    }
53115
53116    #[simd_test(enable = "avx512f")]
53117    const fn test_mm512_maskz_load_ps() {
53118        #[repr(align(64))]
53119        struct Align {
53120            data: [f32; 16], // 64 bytes
53121        }
53122        let a = Align {
53123            data: [
53124                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
53125                15.0, 16.0,
53126            ],
53127        };
53128        let p = a.data.as_ptr();
53129        let m = 0b11101000_11001010;
53130        let r = unsafe { _mm512_maskz_load_ps(m, black_box(p)) };
53131        let e = _mm512_setr_ps(
53132            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
53133        );
53134        assert_eq_m512(r, e);
53135    }
53136
53137    #[simd_test(enable = "avx512f")]
53138    const fn test_mm512_mask_storeu_ps() {
53139        let mut r = [42_f32; 16];
53140        let a = _mm512_setr_ps(
53141            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
53142        );
53143        let m = 0b11101000_11001010;
53144        unsafe {
53145            _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
53146        }
53147        let e = _mm512_setr_ps(
53148            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53149            16.0,
53150        );
53151        assert_eq_m512(unsafe { _mm512_loadu_ps(r.as_ptr()) }, e);
53152    }
53153
53154    #[simd_test(enable = "avx512f")]
53155    const fn test_mm512_mask_store_ps() {
53156        #[repr(align(64))]
53157        struct Align {
53158            data: [f32; 16],
53159        }
53160        let mut r = Align { data: [42.0; 16] };
53161        let a = _mm512_setr_ps(
53162            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
53163        );
53164        let m = 0b11101000_11001010;
53165        unsafe {
53166            _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
53167        }
53168        let e = _mm512_setr_ps(
53169            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53170            16.0,
53171        );
53172        assert_eq_m512(unsafe { _mm512_load_ps(r.data.as_ptr()) }, e);
53173    }
53174
53175    #[simd_test(enable = "avx512f")]
53176    const fn test_mm512_mask_loadu_pd() {
53177        let src = _mm512_set1_pd(42.0);
53178        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53179        let p = a.as_ptr();
53180        let m = 0b11001010;
53181        let r = unsafe { _mm512_mask_loadu_pd(src, m, black_box(p)) };
53182        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53183        assert_eq_m512d(r, e);
53184    }
53185
53186    #[simd_test(enable = "avx512f")]
53187    const fn test_mm512_maskz_loadu_pd() {
53188        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53189        let p = a.as_ptr();
53190        let m = 0b11001010;
53191        let r = unsafe { _mm512_maskz_loadu_pd(m, black_box(p)) };
53192        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53193        assert_eq_m512d(r, e);
53194    }
53195
53196    #[simd_test(enable = "avx512f")]
53197    const fn test_mm512_mask_load_pd() {
53198        #[repr(align(64))]
53199        struct Align {
53200            data: [f64; 8], // 64 bytes
53201        }
53202        let src = _mm512_set1_pd(42.0);
53203        let a = Align {
53204            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53205        };
53206        let p = a.data.as_ptr();
53207        let m = 0b11001010;
53208        let r = unsafe { _mm512_mask_load_pd(src, m, black_box(p)) };
53209        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53210        assert_eq_m512d(r, e);
53211    }
53212
53213    #[simd_test(enable = "avx512f")]
53214    const fn test_mm512_maskz_load_pd() {
53215        #[repr(align(64))]
53216        struct Align {
53217            data: [f64; 8], // 64 bytes
53218        }
53219        let a = Align {
53220            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53221        };
53222        let p = a.data.as_ptr();
53223        let m = 0b11001010;
53224        let r = unsafe { _mm512_maskz_load_pd(m, black_box(p)) };
53225        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53226        assert_eq_m512d(r, e);
53227    }
53228
53229    #[simd_test(enable = "avx512f")]
53230    const fn test_mm512_mask_storeu_pd() {
53231        let mut r = [42_f64; 8];
53232        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53233        let m = 0b11001010;
53234        unsafe {
53235            _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
53236        }
53237        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53238        assert_eq_m512d(unsafe { _mm512_loadu_pd(r.as_ptr()) }, e);
53239    }
53240
53241    #[simd_test(enable = "avx512f")]
53242    const fn test_mm512_mask_store_pd() {
53243        #[repr(align(64))]
53244        struct Align {
53245            data: [f64; 8],
53246        }
53247        let mut r = Align { data: [42.0; 8] };
53248        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53249        let m = 0b11001010;
53250        unsafe {
53251            _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
53252        }
53253        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53254        assert_eq_m512d(unsafe { _mm512_load_pd(r.data.as_ptr()) }, e);
53255    }
53256
53257    #[simd_test(enable = "avx512f,avx512vl")]
53258    const fn test_mm256_mask_loadu_epi32() {
53259        let src = _mm256_set1_epi32(42);
53260        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
53261        let p = a.as_ptr();
53262        let m = 0b11001010;
53263        let r = unsafe { _mm256_mask_loadu_epi32(src, m, black_box(p)) };
53264        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53265        assert_eq_m256i(r, e);
53266    }
53267
53268    #[simd_test(enable = "avx512f,avx512vl")]
53269    const fn test_mm256_maskz_loadu_epi32() {
53270        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
53271        let p = a.as_ptr();
53272        let m = 0b11001010;
53273        let r = unsafe { _mm256_maskz_loadu_epi32(m, black_box(p)) };
53274        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
53275        assert_eq_m256i(r, e);
53276    }
53277
53278    #[simd_test(enable = "avx512f,avx512vl")]
53279    const fn test_mm256_mask_load_epi32() {
53280        #[repr(align(32))]
53281        struct Align {
53282            data: [i32; 8], // 32 bytes
53283        }
53284        let src = _mm256_set1_epi32(42);
53285        let a = Align {
53286            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
53287        };
53288        let p = a.data.as_ptr();
53289        let m = 0b11001010;
53290        let r = unsafe { _mm256_mask_load_epi32(src, m, black_box(p)) };
53291        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53292        assert_eq_m256i(r, e);
53293    }
53294
53295    #[simd_test(enable = "avx512f,avx512vl")]
53296    const fn test_mm256_maskz_load_epi32() {
53297        #[repr(align(32))]
53298        struct Align {
53299            data: [i32; 8], // 32 bytes
53300        }
53301        let a = Align {
53302            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
53303        };
53304        let p = a.data.as_ptr();
53305        let m = 0b11001010;
53306        let r = unsafe { _mm256_maskz_load_epi32(m, black_box(p)) };
53307        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
53308        assert_eq_m256i(r, e);
53309    }
53310
53311    #[simd_test(enable = "avx512f,avx512vl")]
53312    const fn test_mm256_mask_storeu_epi32() {
53313        let mut r = [42_i32; 8];
53314        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53315        let m = 0b11001010;
53316        unsafe {
53317            _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
53318        }
53319        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53320        assert_eq_m256i(unsafe { _mm256_loadu_epi32(r.as_ptr()) }, e);
53321    }
53322
53323    #[simd_test(enable = "avx512f,avx512vl")]
53324    const fn test_mm256_mask_store_epi32() {
53325        #[repr(align(64))]
53326        struct Align {
53327            data: [i32; 8],
53328        }
53329        let mut r = Align { data: [42; 8] };
53330        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53331        let m = 0b11001010;
53332        unsafe {
53333            _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
53334        }
53335        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53336        assert_eq_m256i(unsafe { _mm256_load_epi32(r.data.as_ptr()) }, e);
53337    }
53338
53339    #[simd_test(enable = "avx512f,avx512vl")]
53340    const fn test_mm256_mask_loadu_epi64() {
53341        let src = _mm256_set1_epi64x(42);
53342        let a = &[1_i64, 2, 3, 4];
53343        let p = a.as_ptr();
53344        let m = 0b1010;
53345        let r = unsafe { _mm256_mask_loadu_epi64(src, m, black_box(p)) };
53346        let e = _mm256_setr_epi64x(42, 2, 42, 4);
53347        assert_eq_m256i(r, e);
53348    }
53349
53350    #[simd_test(enable = "avx512f,avx512vl")]
53351    const fn test_mm256_maskz_loadu_epi64() {
53352        let a = &[1_i64, 2, 3, 4];
53353        let p = a.as_ptr();
53354        let m = 0b1010;
53355        let r = unsafe { _mm256_maskz_loadu_epi64(m, black_box(p)) };
53356        let e = _mm256_setr_epi64x(0, 2, 0, 4);
53357        assert_eq_m256i(r, e);
53358    }
53359
53360    #[simd_test(enable = "avx512f,avx512vl")]
53361    const fn test_mm256_mask_load_epi64() {
53362        #[repr(align(32))]
53363        struct Align {
53364            data: [i64; 4], // 32 bytes
53365        }
53366        let src = _mm256_set1_epi64x(42);
53367        let a = Align {
53368            data: [1_i64, 2, 3, 4],
53369        };
53370        let p = a.data.as_ptr();
53371        let m = 0b1010;
53372        let r = unsafe { _mm256_mask_load_epi64(src, m, black_box(p)) };
53373        let e = _mm256_setr_epi64x(42, 2, 42, 4);
53374        assert_eq_m256i(r, e);
53375    }
53376
53377    #[simd_test(enable = "avx512f,avx512vl")]
53378    const fn test_mm256_maskz_load_epi64() {
53379        #[repr(align(32))]
53380        struct Align {
53381            data: [i64; 4], // 32 bytes
53382        }
53383        let a = Align {
53384            data: [1_i64, 2, 3, 4],
53385        };
53386        let p = a.data.as_ptr();
53387        let m = 0b1010;
53388        let r = unsafe { _mm256_maskz_load_epi64(m, black_box(p)) };
53389        let e = _mm256_setr_epi64x(0, 2, 0, 4);
53390        assert_eq_m256i(r, e);
53391    }
53392
53393    #[simd_test(enable = "avx512f,avx512vl")]
53394    const fn test_mm256_mask_storeu_epi64() {
53395        let mut r = [42_i64; 4];
53396        let a = _mm256_setr_epi64x(1, 2, 3, 4);
53397        let m = 0b1010;
53398        unsafe {
53399            _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53400        }
53401        let e = _mm256_setr_epi64x(42, 2, 42, 4);
53402        assert_eq_m256i(unsafe { _mm256_loadu_epi64(r.as_ptr()) }, e);
53403    }
53404
53405    #[simd_test(enable = "avx512f,avx512vl")]
53406    const fn test_mm256_mask_store_epi64() {
53407        #[repr(align(32))]
53408        struct Align {
53409            data: [i64; 4],
53410        }
53411        let mut r = Align { data: [42; 4] };
53412        let a = _mm256_setr_epi64x(1, 2, 3, 4);
53413        let m = 0b1010;
53414        unsafe {
53415            _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
53416        }
53417        let e = _mm256_setr_epi64x(42, 2, 42, 4);
53418        assert_eq_m256i(unsafe { _mm256_load_epi64(r.data.as_ptr()) }, e);
53419    }
53420
53421    #[simd_test(enable = "avx512f,avx512vl")]
53422    const fn test_mm256_mask_loadu_ps() {
53423        let src = _mm256_set1_ps(42.0);
53424        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53425        let p = a.as_ptr();
53426        let m = 0b11001010;
53427        let r = unsafe { _mm256_mask_loadu_ps(src, m, black_box(p)) };
53428        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53429        assert_eq_m256(r, e);
53430    }
53431
53432    #[simd_test(enable = "avx512f,avx512vl")]
53433    const fn test_mm256_maskz_loadu_ps() {
53434        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53435        let p = a.as_ptr();
53436        let m = 0b11001010;
53437        let r = unsafe { _mm256_maskz_loadu_ps(m, black_box(p)) };
53438        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53439        assert_eq_m256(r, e);
53440    }
53441
53442    #[simd_test(enable = "avx512f,avx512vl")]
53443    const fn test_mm256_mask_load_ps() {
53444        #[repr(align(32))]
53445        struct Align {
53446            data: [f32; 8], // 32 bytes
53447        }
53448        let src = _mm256_set1_ps(42.0);
53449        let a = Align {
53450            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53451        };
53452        let p = a.data.as_ptr();
53453        let m = 0b11001010;
53454        let r = unsafe { _mm256_mask_load_ps(src, m, black_box(p)) };
53455        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53456        assert_eq_m256(r, e);
53457    }
53458
53459    #[simd_test(enable = "avx512f,avx512vl")]
53460    const fn test_mm256_maskz_load_ps() {
53461        #[repr(align(32))]
53462        struct Align {
53463            data: [f32; 8], // 32 bytes
53464        }
53465        let a = Align {
53466            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53467        };
53468        let p = a.data.as_ptr();
53469        let m = 0b11001010;
53470        let r = unsafe { _mm256_maskz_load_ps(m, black_box(p)) };
53471        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53472        assert_eq_m256(r, e);
53473    }
53474
53475    #[simd_test(enable = "avx512f,avx512vl")]
53476    const fn test_mm256_mask_storeu_ps() {
53477        let mut r = [42_f32; 8];
53478        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53479        let m = 0b11001010;
53480        unsafe {
53481            _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
53482        }
53483        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53484        assert_eq_m256(unsafe { _mm256_loadu_ps(r.as_ptr()) }, e);
53485    }
53486
53487    #[simd_test(enable = "avx512f,avx512vl")]
53488    const fn test_mm256_mask_store_ps() {
53489        #[repr(align(32))]
53490        struct Align {
53491            data: [f32; 8],
53492        }
53493        let mut r = Align { data: [42.0; 8] };
53494        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53495        let m = 0b11001010;
53496        unsafe {
53497            _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
53498        }
53499        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53500        assert_eq_m256(unsafe { _mm256_load_ps(r.data.as_ptr()) }, e);
53501    }
53502
53503    #[simd_test(enable = "avx512f,avx512vl")]
53504    const fn test_mm256_mask_loadu_pd() {
53505        let src = _mm256_set1_pd(42.0);
53506        let a = &[1.0_f64, 2.0, 3.0, 4.0];
53507        let p = a.as_ptr();
53508        let m = 0b1010;
53509        let r = unsafe { _mm256_mask_loadu_pd(src, m, black_box(p)) };
53510        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53511        assert_eq_m256d(r, e);
53512    }
53513
53514    #[simd_test(enable = "avx512f,avx512vl")]
53515    const fn test_mm256_maskz_loadu_pd() {
53516        let a = &[1.0_f64, 2.0, 3.0, 4.0];
53517        let p = a.as_ptr();
53518        let m = 0b1010;
53519        let r = unsafe { _mm256_maskz_loadu_pd(m, black_box(p)) };
53520        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
53521        assert_eq_m256d(r, e);
53522    }
53523
53524    #[simd_test(enable = "avx512f,avx512vl")]
53525    const fn test_mm256_mask_load_pd() {
53526        #[repr(align(32))]
53527        struct Align {
53528            data: [f64; 4], // 32 bytes
53529        }
53530        let src = _mm256_set1_pd(42.0);
53531        let a = Align {
53532            data: [1.0_f64, 2.0, 3.0, 4.0],
53533        };
53534        let p = a.data.as_ptr();
53535        let m = 0b1010;
53536        let r = unsafe { _mm256_mask_load_pd(src, m, black_box(p)) };
53537        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53538        assert_eq_m256d(r, e);
53539    }
53540
53541    #[simd_test(enable = "avx512f,avx512vl")]
53542    const fn test_mm256_maskz_load_pd() {
53543        #[repr(align(32))]
53544        struct Align {
53545            data: [f64; 4], // 32 bytes
53546        }
53547        let a = Align {
53548            data: [1.0_f64, 2.0, 3.0, 4.0],
53549        };
53550        let p = a.data.as_ptr();
53551        let m = 0b1010;
53552        let r = unsafe { _mm256_maskz_load_pd(m, black_box(p)) };
53553        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
53554        assert_eq_m256d(r, e);
53555    }
53556
53557    #[simd_test(enable = "avx512f,avx512vl")]
53558    const fn test_mm256_mask_storeu_pd() {
53559        let mut r = [42_f64; 4];
53560        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
53561        let m = 0b1010;
53562        unsafe {
53563            _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
53564        }
53565        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53566        assert_eq_m256d(unsafe { _mm256_loadu_pd(r.as_ptr()) }, e);
53567    }
53568
53569    #[simd_test(enable = "avx512f,avx512vl")]
53570    const fn test_mm256_mask_store_pd() {
53571        #[repr(align(32))]
53572        struct Align {
53573            data: [f64; 4],
53574        }
53575        let mut r = Align { data: [42.0; 4] };
53576        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
53577        let m = 0b1010;
53578        unsafe {
53579            _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
53580        }
53581        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53582        assert_eq_m256d(unsafe { _mm256_load_pd(r.data.as_ptr()) }, e);
53583    }
53584
53585    #[simd_test(enable = "avx512f,avx512vl")]
53586    const fn test_mm_mask_loadu_epi32() {
53587        let src = _mm_set1_epi32(42);
53588        let a = &[1_i32, 2, 3, 4];
53589        let p = a.as_ptr();
53590        let m = 0b1010;
53591        let r = unsafe { _mm_mask_loadu_epi32(src, m, black_box(p)) };
53592        let e = _mm_setr_epi32(42, 2, 42, 4);
53593        assert_eq_m128i(r, e);
53594    }
53595
53596    #[simd_test(enable = "avx512f,avx512vl")]
53597    const fn test_mm_maskz_loadu_epi32() {
53598        let a = &[1_i32, 2, 3, 4];
53599        let p = a.as_ptr();
53600        let m = 0b1010;
53601        let r = unsafe { _mm_maskz_loadu_epi32(m, black_box(p)) };
53602        let e = _mm_setr_epi32(0, 2, 0, 4);
53603        assert_eq_m128i(r, e);
53604    }
53605
53606    #[simd_test(enable = "avx512f,avx512vl")]
53607    const fn test_mm_mask_load_epi32() {
53608        #[repr(align(16))]
53609        struct Align {
53610            data: [i32; 4], // 32 bytes
53611        }
53612        let src = _mm_set1_epi32(42);
53613        let a = Align {
53614            data: [1_i32, 2, 3, 4],
53615        };
53616        let p = a.data.as_ptr();
53617        let m = 0b1010;
53618        let r = unsafe { _mm_mask_load_epi32(src, m, black_box(p)) };
53619        let e = _mm_setr_epi32(42, 2, 42, 4);
53620        assert_eq_m128i(r, e);
53621    }
53622
53623    #[simd_test(enable = "avx512f,avx512vl")]
53624    const fn test_mm_maskz_load_epi32() {
53625        #[repr(align(16))]
53626        struct Align {
53627            data: [i32; 4], // 16 bytes
53628        }
53629        let a = Align {
53630            data: [1_i32, 2, 3, 4],
53631        };
53632        let p = a.data.as_ptr();
53633        let m = 0b1010;
53634        let r = unsafe { _mm_maskz_load_epi32(m, black_box(p)) };
53635        let e = _mm_setr_epi32(0, 2, 0, 4);
53636        assert_eq_m128i(r, e);
53637    }
53638
53639    #[simd_test(enable = "avx512f,avx512vl")]
53640    const fn test_mm_mask_storeu_epi32() {
53641        let mut r = [42_i32; 4];
53642        let a = _mm_setr_epi32(1, 2, 3, 4);
53643        let m = 0b1010;
53644        unsafe {
53645            _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
53646        }
53647        let e = _mm_setr_epi32(42, 2, 42, 4);
53648        assert_eq_m128i(unsafe { _mm_loadu_epi32(r.as_ptr()) }, e);
53649    }
53650
53651    #[simd_test(enable = "avx512f,avx512vl")]
53652    const fn test_mm_mask_store_epi32() {
53653        #[repr(align(16))]
53654        struct Align {
53655            data: [i32; 4], // 16 bytes
53656        }
53657        let mut r = Align { data: [42; 4] };
53658        let a = _mm_setr_epi32(1, 2, 3, 4);
53659        let m = 0b1010;
53660        unsafe {
53661            _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
53662        }
53663        let e = _mm_setr_epi32(42, 2, 42, 4);
53664        assert_eq_m128i(unsafe { _mm_load_epi32(r.data.as_ptr()) }, e);
53665    }
53666
53667    #[simd_test(enable = "avx512f,avx512vl")]
53668    const fn test_mm_mask_loadu_epi64() {
53669        let src = _mm_set1_epi64x(42);
53670        let a = &[1_i64, 2];
53671        let p = a.as_ptr();
53672        let m = 0b10;
53673        let r = unsafe { _mm_mask_loadu_epi64(src, m, black_box(p)) };
53674        let e = _mm_setr_epi64x(42, 2);
53675        assert_eq_m128i(r, e);
53676    }
53677
53678    #[simd_test(enable = "avx512f,avx512vl")]
53679    const fn test_mm_maskz_loadu_epi64() {
53680        let a = &[1_i64, 2];
53681        let p = a.as_ptr();
53682        let m = 0b10;
53683        let r = unsafe { _mm_maskz_loadu_epi64(m, black_box(p)) };
53684        let e = _mm_setr_epi64x(0, 2);
53685        assert_eq_m128i(r, e);
53686    }
53687
53688    #[simd_test(enable = "avx512f,avx512vl")]
53689    const fn test_mm_mask_load_epi64() {
53690        #[repr(align(16))]
53691        struct Align {
53692            data: [i64; 2], // 16 bytes
53693        }
53694        let src = _mm_set1_epi64x(42);
53695        let a = Align { data: [1_i64, 2] };
53696        let p = a.data.as_ptr();
53697        let m = 0b10;
53698        let r = unsafe { _mm_mask_load_epi64(src, m, black_box(p)) };
53699        let e = _mm_setr_epi64x(42, 2);
53700        assert_eq_m128i(r, e);
53701    }
53702
53703    #[simd_test(enable = "avx512f,avx512vl")]
53704    const fn test_mm_maskz_load_epi64() {
53705        #[repr(align(16))]
53706        struct Align {
53707            data: [i64; 2], // 16 bytes
53708        }
53709        let a = Align { data: [1_i64, 2] };
53710        let p = a.data.as_ptr();
53711        let m = 0b10;
53712        let r = unsafe { _mm_maskz_load_epi64(m, black_box(p)) };
53713        let e = _mm_setr_epi64x(0, 2);
53714        assert_eq_m128i(r, e);
53715    }
53716
53717    #[simd_test(enable = "avx512f,avx512vl")]
53718    const fn test_mm_mask_storeu_epi64() {
53719        let mut r = [42_i64; 2];
53720        let a = _mm_setr_epi64x(1, 2);
53721        let m = 0b10;
53722        unsafe {
53723            _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53724        }
53725        let e = _mm_setr_epi64x(42, 2);
53726        assert_eq_m128i(unsafe { _mm_loadu_epi64(r.as_ptr()) }, e);
53727    }
53728
53729    #[simd_test(enable = "avx512f,avx512vl")]
53730    const fn test_mm_mask_store_epi64() {
53731        #[repr(align(16))]
53732        struct Align {
53733            data: [i64; 2], // 16 bytes
53734        }
53735        let mut r = Align { data: [42; 2] };
53736        let a = _mm_setr_epi64x(1, 2);
53737        let m = 0b10;
53738        unsafe {
53739            _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
53740        }
53741        let e = _mm_setr_epi64x(42, 2);
53742        assert_eq_m128i(unsafe { _mm_load_epi64(r.data.as_ptr()) }, e);
53743    }
53744
53745    #[simd_test(enable = "avx512f,avx512vl")]
53746    const fn test_mm_mask_loadu_ps() {
53747        let src = _mm_set1_ps(42.0);
53748        let a = &[1.0_f32, 2.0, 3.0, 4.0];
53749        let p = a.as_ptr();
53750        let m = 0b1010;
53751        let r = unsafe { _mm_mask_loadu_ps(src, m, black_box(p)) };
53752        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53753        assert_eq_m128(r, e);
53754    }
53755
53756    #[simd_test(enable = "avx512f,avx512vl")]
53757    const fn test_mm_maskz_loadu_ps() {
53758        let a = &[1.0_f32, 2.0, 3.0, 4.0];
53759        let p = a.as_ptr();
53760        let m = 0b1010;
53761        let r = unsafe { _mm_maskz_loadu_ps(m, black_box(p)) };
53762        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
53763        assert_eq_m128(r, e);
53764    }
53765
53766    #[simd_test(enable = "avx512f,avx512vl")]
53767    const fn test_mm_mask_load_ps() {
53768        #[repr(align(16))]
53769        struct Align {
53770            data: [f32; 4], // 16 bytes
53771        }
53772        let src = _mm_set1_ps(42.0);
53773        let a = Align {
53774            data: [1.0_f32, 2.0, 3.0, 4.0],
53775        };
53776        let p = a.data.as_ptr();
53777        let m = 0b1010;
53778        let r = unsafe { _mm_mask_load_ps(src, m, black_box(p)) };
53779        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53780        assert_eq_m128(r, e);
53781    }
53782
53783    #[simd_test(enable = "avx512f,avx512vl")]
53784    const fn test_mm_maskz_load_ps() {
53785        #[repr(align(16))]
53786        struct Align {
53787            data: [f32; 4], // 16 bytes
53788        }
53789        let a = Align {
53790            data: [1.0_f32, 2.0, 3.0, 4.0],
53791        };
53792        let p = a.data.as_ptr();
53793        let m = 0b1010;
53794        let r = unsafe { _mm_maskz_load_ps(m, black_box(p)) };
53795        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
53796        assert_eq_m128(r, e);
53797    }
53798
53799    #[simd_test(enable = "avx512f,avx512vl")]
53800    const fn test_mm_mask_storeu_ps() {
53801        let mut r = [42_f32; 4];
53802        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
53803        let m = 0b1010;
53804        unsafe {
53805            _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
53806        }
53807        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53808        assert_eq_m128(unsafe { _mm_loadu_ps(r.as_ptr()) }, e);
53809    }
53810
53811    #[simd_test(enable = "avx512f,avx512vl")]
53812    const fn test_mm_mask_store_ps() {
53813        #[repr(align(16))]
53814        struct Align {
53815            data: [f32; 4], // 16 bytes
53816        }
53817        let mut r = Align { data: [42.0; 4] };
53818        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
53819        let m = 0b1010;
53820        unsafe {
53821            _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
53822        }
53823        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53824        assert_eq_m128(unsafe { _mm_load_ps(r.data.as_ptr()) }, e);
53825    }
53826
53827    #[simd_test(enable = "avx512f,avx512vl")]
53828    const fn test_mm_mask_loadu_pd() {
53829        let src = _mm_set1_pd(42.0);
53830        let a = &[1.0_f64, 2.0];
53831        let p = a.as_ptr();
53832        let m = 0b10;
53833        let r = unsafe { _mm_mask_loadu_pd(src, m, black_box(p)) };
53834        let e = _mm_setr_pd(42.0, 2.0);
53835        assert_eq_m128d(r, e);
53836    }
53837
53838    #[simd_test(enable = "avx512f,avx512vl")]
53839    const fn test_mm_maskz_loadu_pd() {
53840        let a = &[1.0_f64, 2.0];
53841        let p = a.as_ptr();
53842        let m = 0b10;
53843        let r = unsafe { _mm_maskz_loadu_pd(m, black_box(p)) };
53844        let e = _mm_setr_pd(0.0, 2.0);
53845        assert_eq_m128d(r, e);
53846    }
53847
53848    #[simd_test(enable = "avx512f,avx512vl")]
53849    const fn test_mm_mask_load_pd() {
53850        #[repr(align(16))]
53851        struct Align {
53852            data: [f64; 2], // 16 bytes
53853        }
53854        let src = _mm_set1_pd(42.0);
53855        let a = Align {
53856            data: [1.0_f64, 2.0],
53857        };
53858        let p = a.data.as_ptr();
53859        let m = 0b10;
53860        let r = unsafe { _mm_mask_load_pd(src, m, black_box(p)) };
53861        let e = _mm_setr_pd(42.0, 2.0);
53862        assert_eq_m128d(r, e);
53863    }
53864
53865    #[simd_test(enable = "avx512f,avx512vl")]
53866    const fn test_mm_maskz_load_pd() {
53867        #[repr(align(16))]
53868        struct Align {
53869            data: [f64; 2], // 16 bytes
53870        }
53871        let a = Align {
53872            data: [1.0_f64, 2.0],
53873        };
53874        let p = a.data.as_ptr();
53875        let m = 0b10;
53876        let r = unsafe { _mm_maskz_load_pd(m, black_box(p)) };
53877        let e = _mm_setr_pd(0.0, 2.0);
53878        assert_eq_m128d(r, e);
53879    }
53880
53881    #[simd_test(enable = "avx512f")]
53882    fn test_mm_mask_load_ss() {
53883        #[repr(align(16))]
53884        struct Align {
53885            data: f32,
53886        }
53887        let src = _mm_set_ss(2.0);
53888        let mem = Align { data: 1.0 };
53889        let r = unsafe { _mm_mask_load_ss(src, 0b1, &mem.data) };
53890        assert_eq_m128(r, _mm_set_ss(1.0));
53891        let r = unsafe { _mm_mask_load_ss(src, 0b0, &mem.data) };
53892        assert_eq_m128(r, _mm_set_ss(2.0));
53893    }
53894
53895    #[simd_test(enable = "avx512f")]
53896    fn test_mm_maskz_load_ss() {
53897        #[repr(align(16))]
53898        struct Align {
53899            data: f32,
53900        }
53901        let mem = Align { data: 1.0 };
53902        let r = unsafe { _mm_maskz_load_ss(0b1, &mem.data) };
53903        assert_eq_m128(r, _mm_set_ss(1.0));
53904        let r = unsafe { _mm_maskz_load_ss(0b0, &mem.data) };
53905        assert_eq_m128(r, _mm_set_ss(0.0));
53906    }
53907
53908    #[simd_test(enable = "avx512f")]
53909    fn test_mm_mask_load_sd() {
53910        #[repr(align(16))]
53911        struct Align {
53912            data: f64,
53913        }
53914        let src = _mm_set_sd(2.0);
53915        let mem = Align { data: 1.0 };
53916        let r = unsafe { _mm_mask_load_sd(src, 0b1, &mem.data) };
53917        assert_eq_m128d(r, _mm_set_sd(1.0));
53918        let r = unsafe { _mm_mask_load_sd(src, 0b0, &mem.data) };
53919        assert_eq_m128d(r, _mm_set_sd(2.0));
53920    }
53921
53922    #[simd_test(enable = "avx512f")]
53923    fn test_mm_maskz_load_sd() {
53924        #[repr(align(16))]
53925        struct Align {
53926            data: f64,
53927        }
53928        let mem = Align { data: 1.0 };
53929        let r = unsafe { _mm_maskz_load_sd(0b1, &mem.data) };
53930        assert_eq_m128d(r, _mm_set_sd(1.0));
53931        let r = unsafe { _mm_maskz_load_sd(0b0, &mem.data) };
53932        assert_eq_m128d(r, _mm_set_sd(0.0));
53933    }
53934
53935    #[simd_test(enable = "avx512f,avx512vl")]
53936    const fn test_mm_mask_storeu_pd() {
53937        let mut r = [42_f64; 2];
53938        let a = _mm_setr_pd(1.0, 2.0);
53939        let m = 0b10;
53940        unsafe {
53941            _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
53942        }
53943        let e = _mm_setr_pd(42.0, 2.0);
53944        assert_eq_m128d(unsafe { _mm_loadu_pd(r.as_ptr()) }, e);
53945    }
53946
53947    #[simd_test(enable = "avx512f,avx512vl")]
53948    const fn test_mm_mask_store_pd() {
53949        #[repr(align(16))]
53950        struct Align {
53951            data: [f64; 2], // 16 bytes
53952        }
53953        let mut r = Align { data: [42.0; 2] };
53954        let a = _mm_setr_pd(1.0, 2.0);
53955        let m = 0b10;
53956        unsafe {
53957            _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
53958        }
53959        let e = _mm_setr_pd(42.0, 2.0);
53960        assert_eq_m128d(unsafe { _mm_load_pd(r.data.as_ptr()) }, e);
53961    }
53962
53963    #[simd_test(enable = "avx512f")]
53964    fn test_mm_mask_store_ss() {
53965        #[repr(align(16))]
53966        struct Align {
53967            data: f32,
53968        }
53969        let a = _mm_set_ss(2.0);
53970        let mut mem = Align { data: 1.0 };
53971        unsafe {
53972            _mm_mask_store_ss(&mut mem.data, 0b1, a);
53973        }
53974        assert_eq!(mem.data, 2.0);
53975        unsafe {
53976            _mm_mask_store_ss(&mut mem.data, 0b0, a);
53977        }
53978        assert_eq!(mem.data, 2.0);
53979    }
53980
53981    #[simd_test(enable = "avx512f")]
53982    fn test_mm_mask_store_sd() {
53983        #[repr(align(16))]
53984        struct Align {
53985            data: f64,
53986        }
53987        let a = _mm_set_sd(2.0);
53988        let mut mem = Align { data: 1.0 };
53989        unsafe {
53990            _mm_mask_store_sd(&mut mem.data, 0b1, a);
53991        }
53992        assert_eq!(mem.data, 2.0);
53993        unsafe {
53994            _mm_mask_store_sd(&mut mem.data, 0b0, a);
53995        }
53996        assert_eq!(mem.data, 2.0);
53997    }
53998
53999    #[simd_test(enable = "avx512f")]
54000    const fn test_mm512_setr_pd() {
54001        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
54002        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
54003    }
54004
54005    #[simd_test(enable = "avx512f")]
54006    const fn test_mm512_set_pd() {
54007        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
54008        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
54009    }
54010
54011    #[simd_test(enable = "avx512f")]
54012    const fn test_mm512_rol_epi32() {
54013        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54014        let r = _mm512_rol_epi32::<1>(a);
54015        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54016        assert_eq_m512i(r, e);
54017    }
54018
54019    #[simd_test(enable = "avx512f")]
54020    const fn test_mm512_mask_rol_epi32() {
54021        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54022        let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
54023        assert_eq_m512i(r, a);
54024        let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
54025        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54026        assert_eq_m512i(r, e);
54027    }
54028
54029    #[simd_test(enable = "avx512f")]
54030    const fn test_mm512_maskz_rol_epi32() {
54031        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54032        let r = _mm512_maskz_rol_epi32::<1>(0, a);
54033        assert_eq_m512i(r, _mm512_setzero_si512());
54034        let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
54035        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54036        assert_eq_m512i(r, e);
54037    }
54038
54039    #[simd_test(enable = "avx512f,avx512vl")]
54040    const fn test_mm256_rol_epi32() {
54041        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54042        let r = _mm256_rol_epi32::<1>(a);
54043        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54044        assert_eq_m256i(r, e);
54045    }
54046
54047    #[simd_test(enable = "avx512f,avx512vl")]
54048    const fn test_mm256_mask_rol_epi32() {
54049        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54050        let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
54051        assert_eq_m256i(r, a);
54052        let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
54053        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54054        assert_eq_m256i(r, e);
54055    }
54056
54057    #[simd_test(enable = "avx512f,avx512vl")]
54058    const fn test_mm256_maskz_rol_epi32() {
54059        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54060        let r = _mm256_maskz_rol_epi32::<1>(0, a);
54061        assert_eq_m256i(r, _mm256_setzero_si256());
54062        let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
54063        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54064        assert_eq_m256i(r, e);
54065    }
54066
54067    #[simd_test(enable = "avx512f,avx512vl")]
54068    const fn test_mm_rol_epi32() {
54069        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54070        let r = _mm_rol_epi32::<1>(a);
54071        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54072        assert_eq_m128i(r, e);
54073    }
54074
54075    #[simd_test(enable = "avx512f,avx512vl")]
54076    const fn test_mm_mask_rol_epi32() {
54077        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54078        let r = _mm_mask_rol_epi32::<1>(a, 0, a);
54079        assert_eq_m128i(r, a);
54080        let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
54081        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54082        assert_eq_m128i(r, e);
54083    }
54084
54085    #[simd_test(enable = "avx512f,avx512vl")]
54086    const fn test_mm_maskz_rol_epi32() {
54087        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54088        let r = _mm_maskz_rol_epi32::<1>(0, a);
54089        assert_eq_m128i(r, _mm_setzero_si128());
54090        let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
54091        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54092        assert_eq_m128i(r, e);
54093    }
54094
54095    #[simd_test(enable = "avx512f")]
54096    const fn test_mm512_ror_epi32() {
54097        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54098        let r = _mm512_ror_epi32::<1>(a);
54099        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54100        assert_eq_m512i(r, e);
54101    }
54102
54103    #[simd_test(enable = "avx512f")]
54104    const fn test_mm512_mask_ror_epi32() {
54105        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54106        let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
54107        assert_eq_m512i(r, a);
54108        let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
54109        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54110        assert_eq_m512i(r, e);
54111    }
54112
54113    #[simd_test(enable = "avx512f")]
54114    const fn test_mm512_maskz_ror_epi32() {
54115        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54116        let r = _mm512_maskz_ror_epi32::<1>(0, a);
54117        assert_eq_m512i(r, _mm512_setzero_si512());
54118        let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
54119        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54120        assert_eq_m512i(r, e);
54121    }
54122
54123    #[simd_test(enable = "avx512f,avx512vl")]
54124    const fn test_mm256_ror_epi32() {
54125        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54126        let r = _mm256_ror_epi32::<1>(a);
54127        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54128        assert_eq_m256i(r, e);
54129    }
54130
54131    #[simd_test(enable = "avx512f,avx512vl")]
54132    const fn test_mm256_mask_ror_epi32() {
54133        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54134        let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
54135        assert_eq_m256i(r, a);
54136        let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
54137        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54138        assert_eq_m256i(r, e);
54139    }
54140
54141    #[simd_test(enable = "avx512f,avx512vl")]
54142    const fn test_mm256_maskz_ror_epi32() {
54143        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54144        let r = _mm256_maskz_ror_epi32::<1>(0, a);
54145        assert_eq_m256i(r, _mm256_setzero_si256());
54146        let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
54147        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54148        assert_eq_m256i(r, e);
54149    }
54150
54151    #[simd_test(enable = "avx512f,avx512vl")]
54152    const fn test_mm_ror_epi32() {
54153        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54154        let r = _mm_ror_epi32::<1>(a);
54155        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54156        assert_eq_m128i(r, e);
54157    }
54158
54159    #[simd_test(enable = "avx512f,avx512vl")]
54160    const fn test_mm_mask_ror_epi32() {
54161        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54162        let r = _mm_mask_ror_epi32::<1>(a, 0, a);
54163        assert_eq_m128i(r, a);
54164        let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
54165        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54166        assert_eq_m128i(r, e);
54167    }
54168
54169    #[simd_test(enable = "avx512f,avx512vl")]
54170    const fn test_mm_maskz_ror_epi32() {
54171        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54172        let r = _mm_maskz_ror_epi32::<1>(0, a);
54173        assert_eq_m128i(r, _mm_setzero_si128());
54174        let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
54175        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54176        assert_eq_m128i(r, e);
54177    }
54178
54179    #[simd_test(enable = "avx512f")]
54180    const fn test_mm512_slli_epi32() {
54181        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54182        let r = _mm512_slli_epi32::<1>(a);
54183        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54184        assert_eq_m512i(r, e);
54185    }
54186
54187    #[simd_test(enable = "avx512f")]
54188    const fn test_mm512_mask_slli_epi32() {
54189        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54190        let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
54191        assert_eq_m512i(r, a);
54192        let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
54193        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54194        assert_eq_m512i(r, e);
54195    }
54196
54197    #[simd_test(enable = "avx512f")]
54198    const fn test_mm512_maskz_slli_epi32() {
54199        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54200        let r = _mm512_maskz_slli_epi32::<1>(0, a);
54201        assert_eq_m512i(r, _mm512_setzero_si512());
54202        let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
54203        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
54204        assert_eq_m512i(r, e);
54205    }
54206
54207    #[simd_test(enable = "avx512f,avx512vl")]
54208    const fn test_mm256_mask_slli_epi32() {
54209        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54210        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
54211        assert_eq_m256i(r, a);
54212        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
54213        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54214        assert_eq_m256i(r, e);
54215    }
54216
54217    #[simd_test(enable = "avx512f,avx512vl")]
54218    const fn test_mm256_maskz_slli_epi32() {
54219        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54220        let r = _mm256_maskz_slli_epi32::<1>(0, a);
54221        assert_eq_m256i(r, _mm256_setzero_si256());
54222        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
54223        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54224        assert_eq_m256i(r, e);
54225    }
54226
54227    #[simd_test(enable = "avx512f,avx512vl")]
54228    const fn test_mm_mask_slli_epi32() {
54229        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54230        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
54231        assert_eq_m128i(r, a);
54232        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
54233        let e = _mm_set_epi32(0, 2, 2, 2);
54234        assert_eq_m128i(r, e);
54235    }
54236
54237    #[simd_test(enable = "avx512f,avx512vl")]
54238    const fn test_mm_maskz_slli_epi32() {
54239        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54240        let r = _mm_maskz_slli_epi32::<1>(0, a);
54241        assert_eq_m128i(r, _mm_setzero_si128());
54242        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
54243        let e = _mm_set_epi32(0, 2, 2, 2);
54244        assert_eq_m128i(r, e);
54245    }
54246
54247    #[simd_test(enable = "avx512f")]
54248    const fn test_mm512_srli_epi32() {
54249        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54250        let r = _mm512_srli_epi32::<1>(a);
54251        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54252        assert_eq_m512i(r, e);
54253    }
54254
54255    #[simd_test(enable = "avx512f")]
54256    const fn test_mm512_mask_srli_epi32() {
54257        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54258        let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
54259        assert_eq_m512i(r, a);
54260        let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
54261        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54262        assert_eq_m512i(r, e);
54263    }
54264
54265    #[simd_test(enable = "avx512f")]
54266    const fn test_mm512_maskz_srli_epi32() {
54267        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
54268        let r = _mm512_maskz_srli_epi32::<1>(0, a);
54269        assert_eq_m512i(r, _mm512_setzero_si512());
54270        let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
54271        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
54272        assert_eq_m512i(r, e);
54273    }
54274
54275    #[simd_test(enable = "avx512f,avx512vl")]
54276    const fn test_mm256_mask_srli_epi32() {
54277        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54278        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
54279        assert_eq_m256i(r, a);
54280        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
54281        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54282        assert_eq_m256i(r, e);
54283    }
54284
54285    #[simd_test(enable = "avx512f,avx512vl")]
54286    const fn test_mm256_maskz_srli_epi32() {
54287        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54288        let r = _mm256_maskz_srli_epi32::<1>(0, a);
54289        assert_eq_m256i(r, _mm256_setzero_si256());
54290        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
54291        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54292        assert_eq_m256i(r, e);
54293    }
54294
54295    #[simd_test(enable = "avx512f,avx512vl")]
54296    const fn test_mm_mask_srli_epi32() {
54297        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54298        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
54299        assert_eq_m128i(r, a);
54300        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
54301        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54302        assert_eq_m128i(r, e);
54303    }
54304
54305    #[simd_test(enable = "avx512f,avx512vl")]
54306    const fn test_mm_maskz_srli_epi32() {
54307        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54308        let r = _mm_maskz_srli_epi32::<1>(0, a);
54309        assert_eq_m128i(r, _mm_setzero_si128());
54310        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
54311        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54312        assert_eq_m128i(r, e);
54313    }
54314
54315    #[simd_test(enable = "avx512f")]
54316    const fn test_mm512_rolv_epi32() {
54317        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54318        let b = _mm512_set1_epi32(1);
54319        let r = _mm512_rolv_epi32(a, b);
54320        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54321        assert_eq_m512i(r, e);
54322    }
54323
54324    #[simd_test(enable = "avx512f")]
54325    const fn test_mm512_mask_rolv_epi32() {
54326        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54327        let b = _mm512_set1_epi32(1);
54328        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
54329        assert_eq_m512i(r, a);
54330        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
54331        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54332        assert_eq_m512i(r, e);
54333    }
54334
54335    #[simd_test(enable = "avx512f")]
54336    const fn test_mm512_maskz_rolv_epi32() {
54337        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54338        let b = _mm512_set1_epi32(1);
54339        let r = _mm512_maskz_rolv_epi32(0, a, b);
54340        assert_eq_m512i(r, _mm512_setzero_si512());
54341        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
54342        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54343        assert_eq_m512i(r, e);
54344    }
54345
54346    #[simd_test(enable = "avx512f,avx512vl")]
54347    const fn test_mm256_rolv_epi32() {
54348        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54349        let b = _mm256_set1_epi32(1);
54350        let r = _mm256_rolv_epi32(a, b);
54351        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54352        assert_eq_m256i(r, e);
54353    }
54354
54355    #[simd_test(enable = "avx512f,avx512vl")]
54356    const fn test_mm256_mask_rolv_epi32() {
54357        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54358        let b = _mm256_set1_epi32(1);
54359        let r = _mm256_mask_rolv_epi32(a, 0, a, b);
54360        assert_eq_m256i(r, a);
54361        let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
54362        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54363        assert_eq_m256i(r, e);
54364    }
54365
54366    #[simd_test(enable = "avx512f,avx512vl")]
54367    const fn test_mm256_maskz_rolv_epi32() {
54368        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54369        let b = _mm256_set1_epi32(1);
54370        let r = _mm256_maskz_rolv_epi32(0, a, b);
54371        assert_eq_m256i(r, _mm256_setzero_si256());
54372        let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
54373        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54374        assert_eq_m256i(r, e);
54375    }
54376
54377    #[simd_test(enable = "avx512f,avx512vl")]
54378    const fn test_mm_rolv_epi32() {
54379        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54380        let b = _mm_set1_epi32(1);
54381        let r = _mm_rolv_epi32(a, b);
54382        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54383        assert_eq_m128i(r, e);
54384    }
54385
54386    #[simd_test(enable = "avx512f,avx512vl")]
54387    const fn test_mm_mask_rolv_epi32() {
54388        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54389        let b = _mm_set1_epi32(1);
54390        let r = _mm_mask_rolv_epi32(a, 0, a, b);
54391        assert_eq_m128i(r, a);
54392        let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
54393        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54394        assert_eq_m128i(r, e);
54395    }
54396
54397    #[simd_test(enable = "avx512f,avx512vl")]
54398    const fn test_mm_maskz_rolv_epi32() {
54399        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54400        let b = _mm_set1_epi32(1);
54401        let r = _mm_maskz_rolv_epi32(0, a, b);
54402        assert_eq_m128i(r, _mm_setzero_si128());
54403        let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
54404        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54405        assert_eq_m128i(r, e);
54406    }
54407
54408    #[simd_test(enable = "avx512f")]
54409    const fn test_mm512_rorv_epi32() {
54410        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54411        let b = _mm512_set1_epi32(1);
54412        let r = _mm512_rorv_epi32(a, b);
54413        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54414        assert_eq_m512i(r, e);
54415    }
54416
54417    #[simd_test(enable = "avx512f")]
54418    const fn test_mm512_mask_rorv_epi32() {
54419        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54420        let b = _mm512_set1_epi32(1);
54421        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
54422        assert_eq_m512i(r, a);
54423        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
54424        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54425        assert_eq_m512i(r, e);
54426    }
54427
54428    #[simd_test(enable = "avx512f")]
54429    const fn test_mm512_maskz_rorv_epi32() {
54430        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54431        let b = _mm512_set1_epi32(1);
54432        let r = _mm512_maskz_rorv_epi32(0, a, b);
54433        assert_eq_m512i(r, _mm512_setzero_si512());
54434        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
54435        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54436        assert_eq_m512i(r, e);
54437    }
54438
54439    #[simd_test(enable = "avx512f,avx512vl")]
54440    const fn test_mm256_rorv_epi32() {
54441        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54442        let b = _mm256_set1_epi32(1);
54443        let r = _mm256_rorv_epi32(a, b);
54444        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54445        assert_eq_m256i(r, e);
54446    }
54447
54448    #[simd_test(enable = "avx512f,avx512vl")]
54449    const fn test_mm256_mask_rorv_epi32() {
54450        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54451        let b = _mm256_set1_epi32(1);
54452        let r = _mm256_mask_rorv_epi32(a, 0, a, b);
54453        assert_eq_m256i(r, a);
54454        let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
54455        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54456        assert_eq_m256i(r, e);
54457    }
54458
54459    #[simd_test(enable = "avx512f,avx512vl")]
54460    const fn test_mm256_maskz_rorv_epi32() {
54461        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54462        let b = _mm256_set1_epi32(1);
54463        let r = _mm256_maskz_rorv_epi32(0, a, b);
54464        assert_eq_m256i(r, _mm256_setzero_si256());
54465        let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
54466        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54467        assert_eq_m256i(r, e);
54468    }
54469
54470    #[simd_test(enable = "avx512f,avx512vl")]
54471    const fn test_mm_rorv_epi32() {
54472        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54473        let b = _mm_set1_epi32(1);
54474        let r = _mm_rorv_epi32(a, b);
54475        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54476        assert_eq_m128i(r, e);
54477    }
54478
54479    #[simd_test(enable = "avx512f,avx512vl")]
54480    const fn test_mm_mask_rorv_epi32() {
54481        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54482        let b = _mm_set1_epi32(1);
54483        let r = _mm_mask_rorv_epi32(a, 0, a, b);
54484        assert_eq_m128i(r, a);
54485        let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
54486        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54487        assert_eq_m128i(r, e);
54488    }
54489
54490    #[simd_test(enable = "avx512f,avx512vl")]
54491    const fn test_mm_maskz_rorv_epi32() {
54492        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54493        let b = _mm_set1_epi32(1);
54494        let r = _mm_maskz_rorv_epi32(0, a, b);
54495        assert_eq_m128i(r, _mm_setzero_si128());
54496        let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
54497        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54498        assert_eq_m128i(r, e);
54499    }
54500
54501    #[simd_test(enable = "avx512f")]
54502    const fn test_mm512_sllv_epi32() {
54503        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54504        let count = _mm512_set1_epi32(1);
54505        let r = _mm512_sllv_epi32(a, count);
54506        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54507        assert_eq_m512i(r, e);
54508    }
54509
54510    #[simd_test(enable = "avx512f")]
54511    const fn test_mm512_mask_sllv_epi32() {
54512        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54513        let count = _mm512_set1_epi32(1);
54514        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
54515        assert_eq_m512i(r, a);
54516        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
54517        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54518        assert_eq_m512i(r, e);
54519    }
54520
54521    #[simd_test(enable = "avx512f")]
54522    const fn test_mm512_maskz_sllv_epi32() {
54523        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54524        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54525        let r = _mm512_maskz_sllv_epi32(0, a, count);
54526        assert_eq_m512i(r, _mm512_setzero_si512());
54527        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
54528        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
54529        assert_eq_m512i(r, e);
54530    }
54531
54532    #[simd_test(enable = "avx512f,avx512vl")]
54533    const fn test_mm256_mask_sllv_epi32() {
54534        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54535        let count = _mm256_set1_epi32(1);
54536        let r = _mm256_mask_sllv_epi32(a, 0, a, count);
54537        assert_eq_m256i(r, a);
54538        let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
54539        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54540        assert_eq_m256i(r, e);
54541    }
54542
54543    #[simd_test(enable = "avx512f,avx512vl")]
54544    const fn test_mm256_maskz_sllv_epi32() {
54545        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54546        let count = _mm256_set1_epi32(1);
54547        let r = _mm256_maskz_sllv_epi32(0, a, count);
54548        assert_eq_m256i(r, _mm256_setzero_si256());
54549        let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
54550        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54551        assert_eq_m256i(r, e);
54552    }
54553
54554    #[simd_test(enable = "avx512f,avx512vl")]
54555    const fn test_mm_mask_sllv_epi32() {
54556        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54557        let count = _mm_set1_epi32(1);
54558        let r = _mm_mask_sllv_epi32(a, 0, a, count);
54559        assert_eq_m128i(r, a);
54560        let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
54561        let e = _mm_set_epi32(0, 2, 2, 2);
54562        assert_eq_m128i(r, e);
54563    }
54564
54565    #[simd_test(enable = "avx512f,avx512vl")]
54566    const fn test_mm_maskz_sllv_epi32() {
54567        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54568        let count = _mm_set1_epi32(1);
54569        let r = _mm_maskz_sllv_epi32(0, a, count);
54570        assert_eq_m128i(r, _mm_setzero_si128());
54571        let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
54572        let e = _mm_set_epi32(0, 2, 2, 2);
54573        assert_eq_m128i(r, e);
54574    }
54575
54576    #[simd_test(enable = "avx512f")]
54577    const fn test_mm512_srlv_epi32() {
54578        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54579        let count = _mm512_set1_epi32(1);
54580        let r = _mm512_srlv_epi32(a, count);
54581        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54582        assert_eq_m512i(r, e);
54583    }
54584
54585    #[simd_test(enable = "avx512f")]
54586    const fn test_mm512_mask_srlv_epi32() {
54587        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54588        let count = _mm512_set1_epi32(1);
54589        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
54590        assert_eq_m512i(r, a);
54591        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
54592        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54593        assert_eq_m512i(r, e);
54594    }
54595
54596    #[simd_test(enable = "avx512f")]
54597    const fn test_mm512_maskz_srlv_epi32() {
54598        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
54599        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54600        let r = _mm512_maskz_srlv_epi32(0, a, count);
54601        assert_eq_m512i(r, _mm512_setzero_si512());
54602        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
54603        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
54604        assert_eq_m512i(r, e);
54605    }
54606
54607    #[simd_test(enable = "avx512f,avx512vl")]
54608    const fn test_mm256_mask_srlv_epi32() {
54609        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54610        let count = _mm256_set1_epi32(1);
54611        let r = _mm256_mask_srlv_epi32(a, 0, a, count);
54612        assert_eq_m256i(r, a);
54613        let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
54614        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54615        assert_eq_m256i(r, e);
54616    }
54617
54618    #[simd_test(enable = "avx512f,avx512vl")]
54619    const fn test_mm256_maskz_srlv_epi32() {
54620        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54621        let count = _mm256_set1_epi32(1);
54622        let r = _mm256_maskz_srlv_epi32(0, a, count);
54623        assert_eq_m256i(r, _mm256_setzero_si256());
54624        let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
54625        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54626        assert_eq_m256i(r, e);
54627    }
54628
54629    #[simd_test(enable = "avx512f,avx512vl")]
54630    const fn test_mm_mask_srlv_epi32() {
54631        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54632        let count = _mm_set1_epi32(1);
54633        let r = _mm_mask_srlv_epi32(a, 0, a, count);
54634        assert_eq_m128i(r, a);
54635        let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
54636        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54637        assert_eq_m128i(r, e);
54638    }
54639
54640    #[simd_test(enable = "avx512f,avx512vl")]
54641    const fn test_mm_maskz_srlv_epi32() {
54642        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54643        let count = _mm_set1_epi32(1);
54644        let r = _mm_maskz_srlv_epi32(0, a, count);
54645        assert_eq_m128i(r, _mm_setzero_si128());
54646        let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
54647        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54648        assert_eq_m128i(r, e);
54649    }
54650
54651    #[simd_test(enable = "avx512f")]
54652    fn test_mm512_sll_epi32() {
54653        #[rustfmt::skip]
54654        let a = _mm512_set_epi32(
54655            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54656            0, 0, 0, 0,
54657            0, 0, 0, 0,
54658            0, 0, 0, 0,
54659        );
54660        let count = _mm_set_epi32(0, 0, 0, 2);
54661        let r = _mm512_sll_epi32(a, count);
54662        #[rustfmt::skip]
54663        let e = _mm512_set_epi32(
54664            0, 1 << 2, 1 << 3, 1 << 4,
54665            0, 0, 0, 0,
54666            0, 0, 0, 0,
54667            0, 0, 0, 0,
54668        );
54669        assert_eq_m512i(r, e);
54670    }
54671
54672    #[simd_test(enable = "avx512f")]
54673    fn test_mm512_mask_sll_epi32() {
54674        #[rustfmt::skip]
54675        let a = _mm512_set_epi32(
54676            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54677            0, 0, 0, 0,
54678            0, 0, 0, 0,
54679            0, 0, 0, 0,
54680        );
54681        let count = _mm_set_epi32(0, 0, 0, 2);
54682        let r = _mm512_mask_sll_epi32(a, 0, a, count);
54683        assert_eq_m512i(r, a);
54684        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
54685        #[rustfmt::skip]
54686        let e = _mm512_set_epi32(
54687            0, 1 << 2, 1 << 3, 1 << 4,
54688            0, 0, 0, 0,
54689            0, 0, 0, 0,
54690            0, 0, 0, 0,
54691        );
54692        assert_eq_m512i(r, e);
54693    }
54694
54695    #[simd_test(enable = "avx512f")]
54696    fn test_mm512_maskz_sll_epi32() {
54697        #[rustfmt::skip]
54698        let a = _mm512_set_epi32(
54699            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54700            0, 0, 0, 0,
54701            0, 0, 0, 0,
54702            0, 0, 0, 1 << 31,
54703        );
54704        let count = _mm_set_epi32(2, 0, 0, 2);
54705        let r = _mm512_maskz_sll_epi32(0, a, count);
54706        assert_eq_m512i(r, _mm512_setzero_si512());
54707        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
54708        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54709        assert_eq_m512i(r, e);
54710    }
54711
54712    #[simd_test(enable = "avx512f,avx512vl")]
54713    fn test_mm256_mask_sll_epi32() {
54714        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
54715        let count = _mm_set_epi32(0, 0, 0, 1);
54716        let r = _mm256_mask_sll_epi32(a, 0, a, count);
54717        assert_eq_m256i(r, a);
54718        let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
54719        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
54720        assert_eq_m256i(r, e);
54721    }
54722
54723    #[simd_test(enable = "avx512f,avx512vl")]
54724    fn test_mm256_maskz_sll_epi32() {
54725        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
54726        let count = _mm_set_epi32(0, 0, 0, 1);
54727        let r = _mm256_maskz_sll_epi32(0, a, count);
54728        assert_eq_m256i(r, _mm256_setzero_si256());
54729        let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
54730        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
54731        assert_eq_m256i(r, e);
54732    }
54733
54734    #[simd_test(enable = "avx512f,avx512vl")]
54735    fn test_mm_mask_sll_epi32() {
54736        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
54737        let count = _mm_set_epi32(0, 0, 0, 1);
54738        let r = _mm_mask_sll_epi32(a, 0, a, count);
54739        assert_eq_m128i(r, a);
54740        let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
54741        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
54742        assert_eq_m128i(r, e);
54743    }
54744
54745    #[simd_test(enable = "avx512f,avx512vl")]
54746    fn test_mm_maskz_sll_epi32() {
54747        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
54748        let count = _mm_set_epi32(0, 0, 0, 1);
54749        let r = _mm_maskz_sll_epi32(0, a, count);
54750        assert_eq_m128i(r, _mm_setzero_si128());
54751        let r = _mm_maskz_sll_epi32(0b00001111, a, count);
54752        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
54753        assert_eq_m128i(r, e);
54754    }
54755
54756    #[simd_test(enable = "avx512f")]
54757    fn test_mm512_srl_epi32() {
54758        #[rustfmt::skip]
54759        let a = _mm512_set_epi32(
54760            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54761            0, 0, 0, 0,
54762            0, 0, 0, 0,
54763            0, 0, 0, 0,
54764        );
54765        let count = _mm_set_epi32(0, 0, 0, 2);
54766        let r = _mm512_srl_epi32(a, count);
54767        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54768        assert_eq_m512i(r, e);
54769    }
54770
54771    #[simd_test(enable = "avx512f")]
54772    fn test_mm512_mask_srl_epi32() {
54773        #[rustfmt::skip]
54774        let a = _mm512_set_epi32(
54775            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54776            0, 0, 0, 0,
54777            0, 0, 0, 0,
54778            0, 0, 0, 0,
54779        );
54780        let count = _mm_set_epi32(0, 0, 0, 2);
54781        let r = _mm512_mask_srl_epi32(a, 0, a, count);
54782        assert_eq_m512i(r, a);
54783        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
54784        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54785        assert_eq_m512i(r, e);
54786    }
54787
54788    #[simd_test(enable = "avx512f")]
54789    fn test_mm512_maskz_srl_epi32() {
54790        #[rustfmt::skip]
54791        let a = _mm512_set_epi32(
54792            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54793            0, 0, 0, 0,
54794            0, 0, 0, 0,
54795            0, 0, 0, 1 << 31,
54796        );
54797        let count = _mm_set_epi32(2, 0, 0, 2);
54798        let r = _mm512_maskz_srl_epi32(0, a, count);
54799        assert_eq_m512i(r, _mm512_setzero_si512());
54800        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
54801        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
54802        assert_eq_m512i(r, e);
54803    }
54804
54805    #[simd_test(enable = "avx512f,avx512vl")]
54806    fn test_mm256_mask_srl_epi32() {
54807        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54808        let count = _mm_set_epi32(0, 0, 0, 1);
54809        let r = _mm256_mask_srl_epi32(a, 0, a, count);
54810        assert_eq_m256i(r, a);
54811        let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
54812        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54813        assert_eq_m256i(r, e);
54814    }
54815
54816    #[simd_test(enable = "avx512f,avx512vl")]
54817    fn test_mm256_maskz_srl_epi32() {
54818        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54819        let count = _mm_set_epi32(0, 0, 0, 1);
54820        let r = _mm256_maskz_srl_epi32(0, a, count);
54821        assert_eq_m256i(r, _mm256_setzero_si256());
54822        let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
54823        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54824        assert_eq_m256i(r, e);
54825    }
54826
54827    #[simd_test(enable = "avx512f,avx512vl")]
54828    fn test_mm_mask_srl_epi32() {
54829        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54830        let count = _mm_set_epi32(0, 0, 0, 1);
54831        let r = _mm_mask_srl_epi32(a, 0, a, count);
54832        assert_eq_m128i(r, a);
54833        let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
54834        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54835        assert_eq_m128i(r, e);
54836    }
54837
54838    #[simd_test(enable = "avx512f,avx512vl")]
54839    fn test_mm_maskz_srl_epi32() {
54840        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54841        let count = _mm_set_epi32(0, 0, 0, 1);
54842        let r = _mm_maskz_srl_epi32(0, a, count);
54843        assert_eq_m128i(r, _mm_setzero_si128());
54844        let r = _mm_maskz_srl_epi32(0b00001111, a, count);
54845        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54846        assert_eq_m128i(r, e);
54847    }
54848
54849    #[simd_test(enable = "avx512f")]
54850    fn test_mm512_sra_epi32() {
54851        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54852        let count = _mm_set_epi32(1, 0, 0, 2);
54853        let r = _mm512_sra_epi32(a, count);
54854        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54855        assert_eq_m512i(r, e);
54856    }
54857
54858    #[simd_test(enable = "avx512f")]
54859    fn test_mm512_mask_sra_epi32() {
54860        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
54861        let count = _mm_set_epi32(0, 0, 0, 2);
54862        let r = _mm512_mask_sra_epi32(a, 0, a, count);
54863        assert_eq_m512i(r, a);
54864        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
54865        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
54866        assert_eq_m512i(r, e);
54867    }
54868
54869    #[simd_test(enable = "avx512f")]
54870    fn test_mm512_maskz_sra_epi32() {
54871        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
54872        let count = _mm_set_epi32(2, 0, 0, 2);
54873        let r = _mm512_maskz_sra_epi32(0, a, count);
54874        assert_eq_m512i(r, _mm512_setzero_si512());
54875        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
54876        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
54877        assert_eq_m512i(r, e);
54878    }
54879
54880    #[simd_test(enable = "avx512f,avx512vl")]
54881    fn test_mm256_mask_sra_epi32() {
54882        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54883        let count = _mm_set_epi32(0, 0, 0, 1);
54884        let r = _mm256_mask_sra_epi32(a, 0, a, count);
54885        assert_eq_m256i(r, a);
54886        let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
54887        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54888        assert_eq_m256i(r, e);
54889    }
54890
54891    #[simd_test(enable = "avx512f,avx512vl")]
54892    fn test_mm256_maskz_sra_epi32() {
54893        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54894        let count = _mm_set_epi32(0, 0, 0, 1);
54895        let r = _mm256_maskz_sra_epi32(0, a, count);
54896        assert_eq_m256i(r, _mm256_setzero_si256());
54897        let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
54898        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54899        assert_eq_m256i(r, e);
54900    }
54901
54902    #[simd_test(enable = "avx512f,avx512vl")]
54903    fn test_mm_mask_sra_epi32() {
54904        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54905        let count = _mm_set_epi32(0, 0, 0, 1);
54906        let r = _mm_mask_sra_epi32(a, 0, a, count);
54907        assert_eq_m128i(r, a);
54908        let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
54909        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54910        assert_eq_m128i(r, e);
54911    }
54912
54913    #[simd_test(enable = "avx512f,avx512vl")]
54914    fn test_mm_maskz_sra_epi32() {
54915        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54916        let count = _mm_set_epi32(0, 0, 0, 1);
54917        let r = _mm_maskz_sra_epi32(0, a, count);
54918        assert_eq_m128i(r, _mm_setzero_si128());
54919        let r = _mm_maskz_sra_epi32(0b00001111, a, count);
54920        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54921        assert_eq_m128i(r, e);
54922    }
54923
54924    #[simd_test(enable = "avx512f")]
54925    const fn test_mm512_srav_epi32() {
54926        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54927        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54928        let r = _mm512_srav_epi32(a, count);
54929        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54930        assert_eq_m512i(r, e);
54931    }
54932
54933    #[simd_test(enable = "avx512f")]
54934    const fn test_mm512_mask_srav_epi32() {
54935        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
54936        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54937        let r = _mm512_mask_srav_epi32(a, 0, a, count);
54938        assert_eq_m512i(r, a);
54939        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
54940        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
54941        assert_eq_m512i(r, e);
54942    }
54943
54944    #[simd_test(enable = "avx512f")]
54945    const fn test_mm512_maskz_srav_epi32() {
54946        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
54947        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
54948        let r = _mm512_maskz_srav_epi32(0, a, count);
54949        assert_eq_m512i(r, _mm512_setzero_si512());
54950        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
54951        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
54952        assert_eq_m512i(r, e);
54953    }
54954
54955    #[simd_test(enable = "avx512f,avx512vl")]
54956    const fn test_mm256_mask_srav_epi32() {
54957        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54958        let count = _mm256_set1_epi32(1);
54959        let r = _mm256_mask_srav_epi32(a, 0, a, count);
54960        assert_eq_m256i(r, a);
54961        let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
54962        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54963        assert_eq_m256i(r, e);
54964    }
54965
54966    #[simd_test(enable = "avx512f,avx512vl")]
54967    const fn test_mm256_maskz_srav_epi32() {
54968        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54969        let count = _mm256_set1_epi32(1);
54970        let r = _mm256_maskz_srav_epi32(0, a, count);
54971        assert_eq_m256i(r, _mm256_setzero_si256());
54972        let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
54973        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54974        assert_eq_m256i(r, e);
54975    }
54976
54977    #[simd_test(enable = "avx512f,avx512vl")]
54978    const fn test_mm_mask_srav_epi32() {
54979        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54980        let count = _mm_set1_epi32(1);
54981        let r = _mm_mask_srav_epi32(a, 0, a, count);
54982        assert_eq_m128i(r, a);
54983        let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
54984        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54985        assert_eq_m128i(r, e);
54986    }
54987
54988    #[simd_test(enable = "avx512f,avx512vl")]
54989    const fn test_mm_maskz_srav_epi32() {
54990        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54991        let count = _mm_set1_epi32(1);
54992        let r = _mm_maskz_srav_epi32(0, a, count);
54993        assert_eq_m128i(r, _mm_setzero_si128());
54994        let r = _mm_maskz_srav_epi32(0b00001111, a, count);
54995        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54996        assert_eq_m128i(r, e);
54997    }
54998
54999    #[simd_test(enable = "avx512f")]
55000    const fn test_mm512_srai_epi32() {
55001        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
55002        let r = _mm512_srai_epi32::<2>(a);
55003        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
55004        assert_eq_m512i(r, e);
55005    }
55006
55007    #[simd_test(enable = "avx512f")]
55008    const fn test_mm512_mask_srai_epi32() {
55009        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
55010        let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
55011        assert_eq_m512i(r, a);
55012        let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
55013        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
55014        assert_eq_m512i(r, e);
55015    }
55016
55017    #[simd_test(enable = "avx512f")]
55018    const fn test_mm512_maskz_srai_epi32() {
55019        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
55020        let r = _mm512_maskz_srai_epi32::<2>(0, a);
55021        assert_eq_m512i(r, _mm512_setzero_si512());
55022        let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
55023        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
55024        assert_eq_m512i(r, e);
55025    }
55026
55027    #[simd_test(enable = "avx512f,avx512vl")]
55028    const fn test_mm256_mask_srai_epi32() {
55029        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
55030        let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
55031        assert_eq_m256i(r, a);
55032        let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
55033        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
55034        assert_eq_m256i(r, e);
55035    }
55036
55037    #[simd_test(enable = "avx512f,avx512vl")]
55038    const fn test_mm256_maskz_srai_epi32() {
55039        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
55040        let r = _mm256_maskz_srai_epi32::<1>(0, a);
55041        assert_eq_m256i(r, _mm256_setzero_si256());
55042        let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
55043        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
55044        assert_eq_m256i(r, e);
55045    }
55046
55047    #[simd_test(enable = "avx512f,avx512vl")]
55048    const fn test_mm_mask_srai_epi32() {
55049        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
55050        let r = _mm_mask_srai_epi32::<1>(a, 0, a);
55051        assert_eq_m128i(r, a);
55052        let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
55053        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
55054        assert_eq_m128i(r, e);
55055    }
55056
55057    #[simd_test(enable = "avx512f,avx512vl")]
55058    const fn test_mm_maskz_srai_epi32() {
55059        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
55060        let r = _mm_maskz_srai_epi32::<1>(0, a);
55061        assert_eq_m128i(r, _mm_setzero_si128());
55062        let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
55063        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
55064        assert_eq_m128i(r, e);
55065    }
55066
55067    #[simd_test(enable = "avx512f")]
55068    const fn test_mm512_permute_ps() {
55069        let a = _mm512_setr_ps(
55070            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55071        );
55072        let r = _mm512_permute_ps::<0b11_11_11_11>(a);
55073        let e = _mm512_setr_ps(
55074            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55075        );
55076        assert_eq_m512(r, e);
55077    }
55078
55079    #[simd_test(enable = "avx512f")]
55080    const fn test_mm512_mask_permute_ps() {
55081        let a = _mm512_setr_ps(
55082            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55083        );
55084        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55085        assert_eq_m512(r, a);
55086        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
55087        let e = _mm512_setr_ps(
55088            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55089        );
55090        assert_eq_m512(r, e);
55091    }
55092
55093    #[simd_test(enable = "avx512f")]
55094    const fn test_mm512_maskz_permute_ps() {
55095        let a = _mm512_setr_ps(
55096            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55097        );
55098        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
55099        assert_eq_m512(r, _mm512_setzero_ps());
55100        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
55101        let e = _mm512_setr_ps(
55102            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55103        );
55104        assert_eq_m512(r, e);
55105    }
55106
55107    #[simd_test(enable = "avx512f,avx512vl")]
55108    const fn test_mm256_mask_permute_ps() {
55109        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55110        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55111        assert_eq_m256(r, a);
55112        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
55113        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
55114        assert_eq_m256(r, e);
55115    }
55116
55117    #[simd_test(enable = "avx512f,avx512vl")]
55118    const fn test_mm256_maskz_permute_ps() {
55119        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55120        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
55121        assert_eq_m256(r, _mm256_setzero_ps());
55122        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
55123        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
55124        assert_eq_m256(r, e);
55125    }
55126
55127    #[simd_test(enable = "avx512f,avx512vl")]
55128    const fn test_mm_mask_permute_ps() {
55129        let a = _mm_set_ps(0., 1., 2., 3.);
55130        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55131        assert_eq_m128(r, a);
55132        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
55133        let e = _mm_set_ps(0., 0., 0., 0.);
55134        assert_eq_m128(r, e);
55135    }
55136
55137    #[simd_test(enable = "avx512f,avx512vl")]
55138    const fn test_mm_maskz_permute_ps() {
55139        let a = _mm_set_ps(0., 1., 2., 3.);
55140        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
55141        assert_eq_m128(r, _mm_setzero_ps());
55142        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
55143        let e = _mm_set_ps(0., 0., 0., 0.);
55144        assert_eq_m128(r, e);
55145    }
55146
55147    #[simd_test(enable = "avx512f")]
55148    fn test_mm512_permutevar_epi32() {
55149        let idx = _mm512_set1_epi32(1);
55150        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55151        let r = _mm512_permutevar_epi32(idx, a);
55152        let e = _mm512_set1_epi32(14);
55153        assert_eq_m512i(r, e);
55154    }
55155
55156    #[simd_test(enable = "avx512f")]
55157    fn test_mm512_mask_permutevar_epi32() {
55158        let idx = _mm512_set1_epi32(1);
55159        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55160        let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
55161        assert_eq_m512i(r, a);
55162        let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
55163        let e = _mm512_set1_epi32(14);
55164        assert_eq_m512i(r, e);
55165    }
55166
55167    #[simd_test(enable = "avx512f")]
55168    fn test_mm512_permutevar_ps() {
55169        let a = _mm512_set_ps(
55170            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55171        );
55172        let b = _mm512_set1_epi32(0b01);
55173        let r = _mm512_permutevar_ps(a, b);
55174        let e = _mm512_set_ps(
55175            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
55176        );
55177        assert_eq_m512(r, e);
55178    }
55179
55180    #[simd_test(enable = "avx512f")]
55181    fn test_mm512_mask_permutevar_ps() {
55182        let a = _mm512_set_ps(
55183            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55184        );
55185        let b = _mm512_set1_epi32(0b01);
55186        let r = _mm512_mask_permutevar_ps(a, 0, a, b);
55187        assert_eq_m512(r, a);
55188        let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
55189        let e = _mm512_set_ps(
55190            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
55191        );
55192        assert_eq_m512(r, e);
55193    }
55194
55195    #[simd_test(enable = "avx512f")]
55196    fn test_mm512_maskz_permutevar_ps() {
55197        let a = _mm512_set_ps(
55198            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55199        );
55200        let b = _mm512_set1_epi32(0b01);
55201        let r = _mm512_maskz_permutevar_ps(0, a, b);
55202        assert_eq_m512(r, _mm512_setzero_ps());
55203        let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
55204        let e = _mm512_set_ps(
55205            0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
55206        );
55207        assert_eq_m512(r, e);
55208    }
55209
55210    #[simd_test(enable = "avx512f,avx512vl")]
55211    fn test_mm256_mask_permutevar_ps() {
55212        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55213        let b = _mm256_set1_epi32(0b01);
55214        let r = _mm256_mask_permutevar_ps(a, 0, a, b);
55215        assert_eq_m256(r, a);
55216        let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
55217        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
55218        assert_eq_m256(r, e);
55219    }
55220
55221    #[simd_test(enable = "avx512f,avx512vl")]
55222    fn test_mm256_maskz_permutevar_ps() {
55223        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55224        let b = _mm256_set1_epi32(0b01);
55225        let r = _mm256_maskz_permutevar_ps(0, a, b);
55226        assert_eq_m256(r, _mm256_setzero_ps());
55227        let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
55228        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
55229        assert_eq_m256(r, e);
55230    }
55231
55232    #[simd_test(enable = "avx512f,avx512vl")]
55233    fn test_mm_mask_permutevar_ps() {
55234        let a = _mm_set_ps(0., 1., 2., 3.);
55235        let b = _mm_set1_epi32(0b01);
55236        let r = _mm_mask_permutevar_ps(a, 0, a, b);
55237        assert_eq_m128(r, a);
55238        let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
55239        let e = _mm_set_ps(2., 2., 2., 2.);
55240        assert_eq_m128(r, e);
55241    }
55242
55243    #[simd_test(enable = "avx512f,avx512vl")]
55244    fn test_mm_maskz_permutevar_ps() {
55245        let a = _mm_set_ps(0., 1., 2., 3.);
55246        let b = _mm_set1_epi32(0b01);
55247        let r = _mm_maskz_permutevar_ps(0, a, b);
55248        assert_eq_m128(r, _mm_setzero_ps());
55249        let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
55250        let e = _mm_set_ps(2., 2., 2., 2.);
55251        assert_eq_m128(r, e);
55252    }
55253
55254    #[simd_test(enable = "avx512f")]
55255    fn test_mm512_permutexvar_epi32() {
55256        let idx = _mm512_set1_epi32(1);
55257        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55258        let r = _mm512_permutexvar_epi32(idx, a);
55259        let e = _mm512_set1_epi32(14);
55260        assert_eq_m512i(r, e);
55261    }
55262
55263    #[simd_test(enable = "avx512f")]
55264    fn test_mm512_mask_permutexvar_epi32() {
55265        let idx = _mm512_set1_epi32(1);
55266        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55267        let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
55268        assert_eq_m512i(r, a);
55269        let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
55270        let e = _mm512_set1_epi32(14);
55271        assert_eq_m512i(r, e);
55272    }
55273
55274    #[simd_test(enable = "avx512f")]
55275    fn test_mm512_maskz_permutexvar_epi32() {
55276        let idx = _mm512_set1_epi32(1);
55277        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55278        let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
55279        assert_eq_m512i(r, _mm512_setzero_si512());
55280        let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
55281        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
55282        assert_eq_m512i(r, e);
55283    }
55284
55285    #[simd_test(enable = "avx512f,avx512vl")]
55286    fn test_mm256_permutexvar_epi32() {
55287        let idx = _mm256_set1_epi32(1);
55288        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55289        let r = _mm256_permutexvar_epi32(idx, a);
55290        let e = _mm256_set1_epi32(6);
55291        assert_eq_m256i(r, e);
55292    }
55293
55294    #[simd_test(enable = "avx512f,avx512vl")]
55295    fn test_mm256_mask_permutexvar_epi32() {
55296        let idx = _mm256_set1_epi32(1);
55297        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55298        let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
55299        assert_eq_m256i(r, a);
55300        let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
55301        let e = _mm256_set1_epi32(6);
55302        assert_eq_m256i(r, e);
55303    }
55304
55305    #[simd_test(enable = "avx512f,avx512vl")]
55306    fn test_mm256_maskz_permutexvar_epi32() {
55307        let idx = _mm256_set1_epi32(1);
55308        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55309        let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
55310        assert_eq_m256i(r, _mm256_setzero_si256());
55311        let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
55312        let e = _mm256_set1_epi32(6);
55313        assert_eq_m256i(r, e);
55314    }
55315
55316    #[simd_test(enable = "avx512f")]
55317    fn test_mm512_permutexvar_ps() {
55318        let idx = _mm512_set1_epi32(1);
55319        let a = _mm512_set_ps(
55320            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55321        );
55322        let r = _mm512_permutexvar_ps(idx, a);
55323        let e = _mm512_set1_ps(14.);
55324        assert_eq_m512(r, e);
55325    }
55326
55327    #[simd_test(enable = "avx512f")]
55328    fn test_mm512_mask_permutexvar_ps() {
55329        let idx = _mm512_set1_epi32(1);
55330        let a = _mm512_set_ps(
55331            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55332        );
55333        let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
55334        assert_eq_m512(r, a);
55335        let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
55336        let e = _mm512_set1_ps(14.);
55337        assert_eq_m512(r, e);
55338    }
55339
55340    #[simd_test(enable = "avx512f")]
55341    fn test_mm512_maskz_permutexvar_ps() {
55342        let idx = _mm512_set1_epi32(1);
55343        let a = _mm512_set_ps(
55344            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55345        );
55346        let r = _mm512_maskz_permutexvar_ps(0, idx, a);
55347        assert_eq_m512(r, _mm512_setzero_ps());
55348        let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
55349        let e = _mm512_set_ps(
55350            0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
55351        );
55352        assert_eq_m512(r, e);
55353    }
55354
55355    #[simd_test(enable = "avx512f,avx512vl")]
55356    fn test_mm256_permutexvar_ps() {
55357        let idx = _mm256_set1_epi32(1);
55358        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55359        let r = _mm256_permutexvar_ps(idx, a);
55360        let e = _mm256_set1_ps(6.);
55361        assert_eq_m256(r, e);
55362    }
55363
55364    #[simd_test(enable = "avx512f,avx512vl")]
55365    fn test_mm256_mask_permutexvar_ps() {
55366        let idx = _mm256_set1_epi32(1);
55367        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55368        let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
55369        assert_eq_m256(r, a);
55370        let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
55371        let e = _mm256_set1_ps(6.);
55372        assert_eq_m256(r, e);
55373    }
55374
55375    #[simd_test(enable = "avx512f,avx512vl")]
55376    fn test_mm256_maskz_permutexvar_ps() {
55377        let idx = _mm256_set1_epi32(1);
55378        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55379        let r = _mm256_maskz_permutexvar_ps(0, idx, a);
55380        assert_eq_m256(r, _mm256_setzero_ps());
55381        let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
55382        let e = _mm256_set1_ps(6.);
55383        assert_eq_m256(r, e);
55384    }
55385
55386    #[simd_test(enable = "avx512f")]
55387    fn test_mm512_permutex2var_epi32() {
55388        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55389        #[rustfmt::skip]
55390        let idx = _mm512_set_epi32(
55391            1, 1 << 4, 2, 1 << 4,
55392            3, 1 << 4, 4, 1 << 4,
55393            5, 1 << 4, 6, 1 << 4,
55394            7, 1 << 4, 8, 1 << 4,
55395        );
55396        let b = _mm512_set1_epi32(100);
55397        let r = _mm512_permutex2var_epi32(a, idx, b);
55398        let e = _mm512_set_epi32(
55399            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
55400        );
55401        assert_eq_m512i(r, e);
55402    }
55403
55404    #[simd_test(enable = "avx512f")]
55405    fn test_mm512_mask_permutex2var_epi32() {
55406        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55407        #[rustfmt::skip]
55408        let idx = _mm512_set_epi32(
55409            1, 1 << 4, 2, 1 << 4,
55410            3, 1 << 4, 4, 1 << 4,
55411            5, 1 << 4, 6, 1 << 4,
55412            7, 1 << 4, 8, 1 << 4,
55413        );
55414        let b = _mm512_set1_epi32(100);
55415        let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
55416        assert_eq_m512i(r, a);
55417        let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
55418        let e = _mm512_set_epi32(
55419            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
55420        );
55421        assert_eq_m512i(r, e);
55422    }
55423
55424    #[simd_test(enable = "avx512f")]
55425    fn test_mm512_maskz_permutex2var_epi32() {
55426        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55427        #[rustfmt::skip]
55428        let idx = _mm512_set_epi32(
55429            1, 1 << 4, 2, 1 << 4,
55430            3, 1 << 4, 4, 1 << 4,
55431            5, 1 << 4, 6, 1 << 4,
55432            7, 1 << 4, 8, 1 << 4,
55433        );
55434        let b = _mm512_set1_epi32(100);
55435        let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
55436        assert_eq_m512i(r, _mm512_setzero_si512());
55437        let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
55438        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
55439        assert_eq_m512i(r, e);
55440    }
55441
55442    #[simd_test(enable = "avx512f")]
55443    fn test_mm512_mask2_permutex2var_epi32() {
55444        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55445        #[rustfmt::skip]
55446        let idx = _mm512_set_epi32(
55447            1000, 1 << 4, 2000, 1 << 4,
55448            3000, 1 << 4, 4000, 1 << 4,
55449            5, 1 << 4, 6, 1 << 4,
55450            7, 1 << 4, 8, 1 << 4,
55451        );
55452        let b = _mm512_set1_epi32(100);
55453        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
55454        assert_eq_m512i(r, idx);
55455        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
55456        #[rustfmt::skip]
55457        let e = _mm512_set_epi32(
55458            1000, 1 << 4, 2000, 1 << 4,
55459            3000, 1 << 4, 4000, 1 << 4,
55460            10, 100, 9, 100,
55461            8, 100, 7, 100,
55462        );
55463        assert_eq_m512i(r, e);
55464    }
55465
55466    #[simd_test(enable = "avx512f,avx512vl")]
55467    fn test_mm256_permutex2var_epi32() {
55468        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55469        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55470        let b = _mm256_set1_epi32(100);
55471        let r = _mm256_permutex2var_epi32(a, idx, b);
55472        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55473        assert_eq_m256i(r, e);
55474    }
55475
55476    #[simd_test(enable = "avx512f,avx512vl")]
55477    fn test_mm256_mask_permutex2var_epi32() {
55478        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55479        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55480        let b = _mm256_set1_epi32(100);
55481        let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
55482        assert_eq_m256i(r, a);
55483        let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
55484        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55485        assert_eq_m256i(r, e);
55486    }
55487
55488    #[simd_test(enable = "avx512f,avx512vl")]
55489    fn test_mm256_maskz_permutex2var_epi32() {
55490        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55491        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55492        let b = _mm256_set1_epi32(100);
55493        let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
55494        assert_eq_m256i(r, _mm256_setzero_si256());
55495        let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
55496        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55497        assert_eq_m256i(r, e);
55498    }
55499
55500    #[simd_test(enable = "avx512f,avx512vl")]
55501    fn test_mm256_mask2_permutex2var_epi32() {
55502        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55503        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55504        let b = _mm256_set1_epi32(100);
55505        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
55506        assert_eq_m256i(r, idx);
55507        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
55508        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55509        assert_eq_m256i(r, e);
55510    }
55511
55512    #[simd_test(enable = "avx512f,avx512vl")]
55513    fn test_mm_permutex2var_epi32() {
55514        let a = _mm_set_epi32(0, 1, 2, 3);
55515        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55516        let b = _mm_set1_epi32(100);
55517        let r = _mm_permutex2var_epi32(a, idx, b);
55518        let e = _mm_set_epi32(2, 100, 1, 100);
55519        assert_eq_m128i(r, e);
55520    }
55521
55522    #[simd_test(enable = "avx512f,avx512vl")]
55523    fn test_mm_mask_permutex2var_epi32() {
55524        let a = _mm_set_epi32(0, 1, 2, 3);
55525        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55526        let b = _mm_set1_epi32(100);
55527        let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
55528        assert_eq_m128i(r, a);
55529        let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
55530        let e = _mm_set_epi32(2, 100, 1, 100);
55531        assert_eq_m128i(r, e);
55532    }
55533
55534    #[simd_test(enable = "avx512f,avx512vl")]
55535    fn test_mm_maskz_permutex2var_epi32() {
55536        let a = _mm_set_epi32(0, 1, 2, 3);
55537        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55538        let b = _mm_set1_epi32(100);
55539        let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
55540        assert_eq_m128i(r, _mm_setzero_si128());
55541        let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
55542        let e = _mm_set_epi32(2, 100, 1, 100);
55543        assert_eq_m128i(r, e);
55544    }
55545
55546    #[simd_test(enable = "avx512f,avx512vl")]
55547    fn test_mm_mask2_permutex2var_epi32() {
55548        let a = _mm_set_epi32(0, 1, 2, 3);
55549        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55550        let b = _mm_set1_epi32(100);
55551        let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
55552        assert_eq_m128i(r, idx);
55553        let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
55554        let e = _mm_set_epi32(2, 100, 1, 100);
55555        assert_eq_m128i(r, e);
55556    }
55557
55558    #[simd_test(enable = "avx512f")]
55559    fn test_mm512_permutex2var_ps() {
55560        let a = _mm512_set_ps(
55561            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55562        );
55563        #[rustfmt::skip]
55564        let idx = _mm512_set_epi32(
55565            1, 1 << 4, 2, 1 << 4,
55566            3, 1 << 4, 4, 1 << 4,
55567            5, 1 << 4, 6, 1 << 4,
55568            7, 1 << 4, 8, 1 << 4,
55569        );
55570        let b = _mm512_set1_ps(100.);
55571        let r = _mm512_permutex2var_ps(a, idx, b);
55572        let e = _mm512_set_ps(
55573            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55574        );
55575        assert_eq_m512(r, e);
55576    }
55577
55578    #[simd_test(enable = "avx512f")]
55579    fn test_mm512_mask_permutex2var_ps() {
55580        let a = _mm512_set_ps(
55581            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55582        );
55583        #[rustfmt::skip]
55584        let idx = _mm512_set_epi32(
55585            1, 1 << 4, 2, 1 << 4,
55586            3, 1 << 4, 4, 1 << 4,
55587            5, 1 << 4, 6, 1 << 4,
55588            7, 1 << 4, 8, 1 << 4,
55589        );
55590        let b = _mm512_set1_ps(100.);
55591        let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
55592        assert_eq_m512(r, a);
55593        let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
55594        let e = _mm512_set_ps(
55595            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55596        );
55597        assert_eq_m512(r, e);
55598    }
55599
55600    #[simd_test(enable = "avx512f")]
55601    fn test_mm512_maskz_permutex2var_ps() {
55602        let a = _mm512_set_ps(
55603            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55604        );
55605        #[rustfmt::skip]
55606        let idx = _mm512_set_epi32(
55607            1, 1 << 4, 2, 1 << 4,
55608            3, 1 << 4, 4, 1 << 4,
55609            5, 1 << 4, 6, 1 << 4,
55610            7, 1 << 4, 8, 1 << 4,
55611        );
55612        let b = _mm512_set1_ps(100.);
55613        let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
55614        assert_eq_m512(r, _mm512_setzero_ps());
55615        let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
55616        let e = _mm512_set_ps(
55617            0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
55618        );
55619        assert_eq_m512(r, e);
55620    }
55621
55622    #[simd_test(enable = "avx512f")]
55623    fn test_mm512_mask2_permutex2var_ps() {
55624        let a = _mm512_set_ps(
55625            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55626        );
55627        #[rustfmt::skip]
55628        let idx = _mm512_set_epi32(
55629            1, 1 << 4, 2, 1 << 4,
55630            3, 1 << 4, 4, 1 << 4,
55631            5, 1 << 4, 6, 1 << 4,
55632            7, 1 << 4, 8, 1 << 4,
55633        );
55634        let b = _mm512_set1_ps(100.);
55635        let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
55636        assert_eq_m512(r, _mm512_castsi512_ps(idx));
55637        let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
55638        let e = _mm512_set_ps(
55639            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55640        );
55641        assert_eq_m512(r, e);
55642    }
55643
55644    #[simd_test(enable = "avx512f,avx512vl")]
55645    fn test_mm256_permutex2var_ps() {
55646        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55647        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55648        let b = _mm256_set1_ps(100.);
55649        let r = _mm256_permutex2var_ps(a, idx, b);
55650        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55651        assert_eq_m256(r, e);
55652    }
55653
55654    #[simd_test(enable = "avx512f,avx512vl")]
55655    fn test_mm256_mask_permutex2var_ps() {
55656        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55657        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55658        let b = _mm256_set1_ps(100.);
55659        let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
55660        assert_eq_m256(r, a);
55661        let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
55662        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55663        assert_eq_m256(r, e);
55664    }
55665
55666    #[simd_test(enable = "avx512f,avx512vl")]
55667    fn test_mm256_maskz_permutex2var_ps() {
55668        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55669        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55670        let b = _mm256_set1_ps(100.);
55671        let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
55672        assert_eq_m256(r, _mm256_setzero_ps());
55673        let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
55674        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55675        assert_eq_m256(r, e);
55676    }
55677
55678    #[simd_test(enable = "avx512f,avx512vl")]
55679    fn test_mm256_mask2_permutex2var_ps() {
55680        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55681        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55682        let b = _mm256_set1_ps(100.);
55683        let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
55684        assert_eq_m256(r, _mm256_castsi256_ps(idx));
55685        let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
55686        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55687        assert_eq_m256(r, e);
55688    }
55689
55690    #[simd_test(enable = "avx512f,avx512vl")]
55691    fn test_mm_permutex2var_ps() {
55692        let a = _mm_set_ps(0., 1., 2., 3.);
55693        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55694        let b = _mm_set1_ps(100.);
55695        let r = _mm_permutex2var_ps(a, idx, b);
55696        let e = _mm_set_ps(2., 100., 1., 100.);
55697        assert_eq_m128(r, e);
55698    }
55699
55700    #[simd_test(enable = "avx512f,avx512vl")]
55701    fn test_mm_mask_permutex2var_ps() {
55702        let a = _mm_set_ps(0., 1., 2., 3.);
55703        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55704        let b = _mm_set1_ps(100.);
55705        let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
55706        assert_eq_m128(r, a);
55707        let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
55708        let e = _mm_set_ps(2., 100., 1., 100.);
55709        assert_eq_m128(r, e);
55710    }
55711
55712    #[simd_test(enable = "avx512f,avx512vl")]
55713    fn test_mm_maskz_permutex2var_ps() {
55714        let a = _mm_set_ps(0., 1., 2., 3.);
55715        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55716        let b = _mm_set1_ps(100.);
55717        let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
55718        assert_eq_m128(r, _mm_setzero_ps());
55719        let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
55720        let e = _mm_set_ps(2., 100., 1., 100.);
55721        assert_eq_m128(r, e);
55722    }
55723
55724    #[simd_test(enable = "avx512f,avx512vl")]
55725    fn test_mm_mask2_permutex2var_ps() {
55726        let a = _mm_set_ps(0., 1., 2., 3.);
55727        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55728        let b = _mm_set1_ps(100.);
55729        let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
55730        assert_eq_m128(r, _mm_castsi128_ps(idx));
55731        let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
55732        let e = _mm_set_ps(2., 100., 1., 100.);
55733        assert_eq_m128(r, e);
55734    }
55735
55736    #[simd_test(enable = "avx512f")]
55737    const fn test_mm512_shuffle_epi32() {
55738        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55739        let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
55740        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
55741        assert_eq_m512i(r, e);
55742    }
55743
55744    #[simd_test(enable = "avx512f")]
55745    const fn test_mm512_mask_shuffle_epi32() {
55746        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55747        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55748        assert_eq_m512i(r, a);
55749        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
55750        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
55751        assert_eq_m512i(r, e);
55752    }
55753
55754    #[simd_test(enable = "avx512f")]
55755    const fn test_mm512_maskz_shuffle_epi32() {
55756        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55757        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55758        assert_eq_m512i(r, _mm512_setzero_si512());
55759        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
55760        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
55761        assert_eq_m512i(r, e);
55762    }
55763
55764    #[simd_test(enable = "avx512f,avx512vl")]
55765    const fn test_mm256_mask_shuffle_epi32() {
55766        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55767        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55768        assert_eq_m256i(r, a);
55769        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
55770        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
55771        assert_eq_m256i(r, e);
55772    }
55773
55774    #[simd_test(enable = "avx512f,avx512vl")]
55775    const fn test_mm256_maskz_shuffle_epi32() {
55776        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55777        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55778        assert_eq_m256i(r, _mm256_setzero_si256());
55779        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
55780        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
55781        assert_eq_m256i(r, e);
55782    }
55783
55784    #[simd_test(enable = "avx512f,avx512vl")]
55785    const fn test_mm_mask_shuffle_epi32() {
55786        let a = _mm_set_epi32(1, 4, 5, 8);
55787        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55788        assert_eq_m128i(r, a);
55789        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
55790        let e = _mm_set_epi32(8, 8, 1, 1);
55791        assert_eq_m128i(r, e);
55792    }
55793
55794    #[simd_test(enable = "avx512f,avx512vl")]
55795    const fn test_mm_maskz_shuffle_epi32() {
55796        let a = _mm_set_epi32(1, 4, 5, 8);
55797        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55798        assert_eq_m128i(r, _mm_setzero_si128());
55799        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
55800        let e = _mm_set_epi32(8, 8, 1, 1);
55801        assert_eq_m128i(r, e);
55802    }
55803
55804    #[simd_test(enable = "avx512f")]
55805    const fn test_mm512_shuffle_ps() {
55806        let a = _mm512_setr_ps(
55807            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55808        );
55809        let b = _mm512_setr_ps(
55810            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55811        );
55812        let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
55813        let e = _mm512_setr_ps(
55814            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
55815        );
55816        assert_eq_m512(r, e);
55817    }
55818
55819    #[simd_test(enable = "avx512f")]
55820    const fn test_mm512_mask_shuffle_ps() {
55821        let a = _mm512_setr_ps(
55822            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55823        );
55824        let b = _mm512_setr_ps(
55825            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55826        );
55827        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
55828        assert_eq_m512(r, a);
55829        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
55830        let e = _mm512_setr_ps(
55831            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
55832        );
55833        assert_eq_m512(r, e);
55834    }
55835
55836    #[simd_test(enable = "avx512f")]
55837    const fn test_mm512_maskz_shuffle_ps() {
55838        let a = _mm512_setr_ps(
55839            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55840        );
55841        let b = _mm512_setr_ps(
55842            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55843        );
55844        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
55845        assert_eq_m512(r, _mm512_setzero_ps());
55846        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
55847        let e = _mm512_setr_ps(
55848            8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
55849        );
55850        assert_eq_m512(r, e);
55851    }
55852
55853    #[simd_test(enable = "avx512f,avx512vl")]
55854    const fn test_mm256_mask_shuffle_ps() {
55855        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55856        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55857        let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
55858        assert_eq_m256(r, a);
55859        let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
55860        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
55861        assert_eq_m256(r, e);
55862    }
55863
55864    #[simd_test(enable = "avx512f,avx512vl")]
55865    const fn test_mm256_maskz_shuffle_ps() {
55866        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55867        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55868        let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
55869        assert_eq_m256(r, _mm256_setzero_ps());
55870        let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
55871        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
55872        assert_eq_m256(r, e);
55873    }
55874
55875    #[simd_test(enable = "avx512f,avx512vl")]
55876    const fn test_mm_mask_shuffle_ps() {
55877        let a = _mm_set_ps(1., 4., 5., 8.);
55878        let b = _mm_set_ps(2., 3., 6., 7.);
55879        let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
55880        assert_eq_m128(r, a);
55881        let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
55882        let e = _mm_set_ps(7., 7., 1., 1.);
55883        assert_eq_m128(r, e);
55884    }
55885
55886    #[simd_test(enable = "avx512f,avx512vl")]
55887    const fn test_mm_maskz_shuffle_ps() {
55888        let a = _mm_set_ps(1., 4., 5., 8.);
55889        let b = _mm_set_ps(2., 3., 6., 7.);
55890        let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
55891        assert_eq_m128(r, _mm_setzero_ps());
55892        let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
55893        let e = _mm_set_ps(7., 7., 1., 1.);
55894        assert_eq_m128(r, e);
55895    }
55896
55897    #[simd_test(enable = "avx512f")]
55898    const fn test_mm512_shuffle_i32x4() {
55899        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55900        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55901        let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
55902        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
55903        assert_eq_m512i(r, e);
55904    }
55905
55906    #[simd_test(enable = "avx512f")]
55907    const fn test_mm512_mask_shuffle_i32x4() {
55908        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55909        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55910        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
55911        assert_eq_m512i(r, a);
55912        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
55913        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
55914        assert_eq_m512i(r, e);
55915    }
55916
55917    #[simd_test(enable = "avx512f")]
55918    const fn test_mm512_maskz_shuffle_i32x4() {
55919        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55920        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55921        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
55922        assert_eq_m512i(r, _mm512_setzero_si512());
55923        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
55924        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
55925        assert_eq_m512i(r, e);
55926    }
55927
55928    #[simd_test(enable = "avx512f,avx512vl")]
55929    const fn test_mm256_shuffle_i32x4() {
55930        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55931        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55932        let r = _mm256_shuffle_i32x4::<0b00>(a, b);
55933        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55934        assert_eq_m256i(r, e);
55935    }
55936
55937    #[simd_test(enable = "avx512f,avx512vl")]
55938    const fn test_mm256_mask_shuffle_i32x4() {
55939        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55940        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55941        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
55942        assert_eq_m256i(r, a);
55943        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
55944        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55945        assert_eq_m256i(r, e);
55946    }
55947
55948    #[simd_test(enable = "avx512f,avx512vl")]
55949    const fn test_mm256_maskz_shuffle_i32x4() {
55950        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55951        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55952        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
55953        assert_eq_m256i(r, _mm256_setzero_si256());
55954        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
55955        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55956        assert_eq_m256i(r, e);
55957    }
55958
55959    #[simd_test(enable = "avx512f")]
55960    const fn test_mm512_shuffle_f32x4() {
55961        let a = _mm512_setr_ps(
55962            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55963        );
55964        let b = _mm512_setr_ps(
55965            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55966        );
55967        let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
55968        let e = _mm512_setr_ps(
55969            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
55970        );
55971        assert_eq_m512(r, e);
55972    }
55973
55974    #[simd_test(enable = "avx512f")]
55975    const fn test_mm512_mask_shuffle_f32x4() {
55976        let a = _mm512_setr_ps(
55977            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55978        );
55979        let b = _mm512_setr_ps(
55980            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55981        );
55982        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
55983        assert_eq_m512(r, a);
55984        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
55985        let e = _mm512_setr_ps(
55986            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
55987        );
55988        assert_eq_m512(r, e);
55989    }
55990
55991    #[simd_test(enable = "avx512f")]
55992    const fn test_mm512_maskz_shuffle_f32x4() {
55993        let a = _mm512_setr_ps(
55994            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55995        );
55996        let b = _mm512_setr_ps(
55997            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55998        );
55999        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
56000        assert_eq_m512(r, _mm512_setzero_ps());
56001        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
56002        let e = _mm512_setr_ps(
56003            1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
56004        );
56005        assert_eq_m512(r, e);
56006    }
56007
56008    #[simd_test(enable = "avx512f,avx512vl")]
56009    const fn test_mm256_shuffle_f32x4() {
56010        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
56011        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
56012        let r = _mm256_shuffle_f32x4::<0b00>(a, b);
56013        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
56014        assert_eq_m256(r, e);
56015    }
56016
56017    #[simd_test(enable = "avx512f,avx512vl")]
56018    const fn test_mm256_mask_shuffle_f32x4() {
56019        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
56020        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
56021        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
56022        assert_eq_m256(r, a);
56023        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
56024        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
56025        assert_eq_m256(r, e);
56026    }
56027
56028    #[simd_test(enable = "avx512f,avx512vl")]
56029    const fn test_mm256_maskz_shuffle_f32x4() {
56030        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
56031        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
56032        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
56033        assert_eq_m256(r, _mm256_setzero_ps());
56034        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
56035        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
56036        assert_eq_m256(r, e);
56037    }
56038
56039    #[simd_test(enable = "avx512f")]
56040    const fn test_mm512_extractf32x4_ps() {
56041        let a = _mm512_setr_ps(
56042            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56043        );
56044        let r = _mm512_extractf32x4_ps::<1>(a);
56045        let e = _mm_setr_ps(5., 6., 7., 8.);
56046        assert_eq_m128(r, e);
56047    }
56048
56049    #[simd_test(enable = "avx512f")]
56050    const fn test_mm512_mask_extractf32x4_ps() {
56051        let a = _mm512_setr_ps(
56052            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56053        );
56054        let src = _mm_set1_ps(100.);
56055        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
56056        assert_eq_m128(r, src);
56057        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
56058        let e = _mm_setr_ps(5., 6., 7., 8.);
56059        assert_eq_m128(r, e);
56060    }
56061
56062    #[simd_test(enable = "avx512f")]
56063    const fn test_mm512_maskz_extractf32x4_ps() {
56064        let a = _mm512_setr_ps(
56065            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56066        );
56067        let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
56068        assert_eq_m128(r, _mm_setzero_ps());
56069        let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
56070        let e = _mm_setr_ps(5., 0., 0., 0.);
56071        assert_eq_m128(r, e);
56072    }
56073
56074    #[simd_test(enable = "avx512f,avx512vl")]
56075    const fn test_mm256_extractf32x4_ps() {
56076        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56077        let r = _mm256_extractf32x4_ps::<1>(a);
56078        let e = _mm_set_ps(1., 2., 3., 4.);
56079        assert_eq_m128(r, e);
56080    }
56081
56082    #[simd_test(enable = "avx512f,avx512vl")]
56083    const fn test_mm256_mask_extractf32x4_ps() {
56084        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56085        let src = _mm_set1_ps(100.);
56086        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
56087        assert_eq_m128(r, src);
56088        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
56089        let e = _mm_set_ps(1., 2., 3., 4.);
56090        assert_eq_m128(r, e);
56091    }
56092
56093    #[simd_test(enable = "avx512f,avx512vl")]
56094    const fn test_mm256_maskz_extractf32x4_ps() {
56095        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56096        let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
56097        assert_eq_m128(r, _mm_setzero_ps());
56098        let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
56099        let e = _mm_set_ps(1., 2., 3., 4.);
56100        assert_eq_m128(r, e);
56101    }
56102
56103    #[simd_test(enable = "avx512f")]
56104    const fn test_mm512_extracti32x4_epi32() {
56105        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56106        let r = _mm512_extracti32x4_epi32::<1>(a);
56107        let e = _mm_setr_epi32(5, 6, 7, 8);
56108        assert_eq_m128i(r, e);
56109    }
56110
56111    #[simd_test(enable = "avx512f")]
56112    const fn test_mm512_mask_extracti32x4_epi32() {
56113        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56114        let src = _mm_set1_epi32(100);
56115        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
56116        assert_eq_m128i(r, src);
56117        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
56118        let e = _mm_setr_epi32(5, 6, 7, 8);
56119        assert_eq_m128i(r, e);
56120    }
56121
56122    #[simd_test(enable = "avx512f,avx512vl")]
56123    const fn test_mm512_maskz_extracti32x4_epi32() {
56124        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56125        let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
56126        assert_eq_m128i(r, _mm_setzero_si128());
56127        let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
56128        let e = _mm_setr_epi32(5, 0, 0, 0);
56129        assert_eq_m128i(r, e);
56130    }
56131
56132    #[simd_test(enable = "avx512f,avx512vl")]
56133    const fn test_mm256_extracti32x4_epi32() {
56134        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56135        let r = _mm256_extracti32x4_epi32::<1>(a);
56136        let e = _mm_set_epi32(1, 2, 3, 4);
56137        assert_eq_m128i(r, e);
56138    }
56139
56140    #[simd_test(enable = "avx512f,avx512vl")]
56141    const fn test_mm256_mask_extracti32x4_epi32() {
56142        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56143        let src = _mm_set1_epi32(100);
56144        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
56145        assert_eq_m128i(r, src);
56146        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
56147        let e = _mm_set_epi32(1, 2, 3, 4);
56148        assert_eq_m128i(r, e);
56149    }
56150
56151    #[simd_test(enable = "avx512f,avx512vl")]
56152    const fn test_mm256_maskz_extracti32x4_epi32() {
56153        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56154        let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
56155        assert_eq_m128i(r, _mm_setzero_si128());
56156        let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
56157        let e = _mm_set_epi32(1, 2, 3, 4);
56158        assert_eq_m128i(r, e);
56159    }
56160
56161    #[simd_test(enable = "avx512f")]
56162    const fn test_mm512_moveldup_ps() {
56163        let a = _mm512_setr_ps(
56164            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56165        );
56166        let r = _mm512_moveldup_ps(a);
56167        let e = _mm512_setr_ps(
56168            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
56169        );
56170        assert_eq_m512(r, e);
56171    }
56172
56173    #[simd_test(enable = "avx512f")]
56174    const fn test_mm512_mask_moveldup_ps() {
56175        let a = _mm512_setr_ps(
56176            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56177        );
56178        let r = _mm512_mask_moveldup_ps(a, 0, a);
56179        assert_eq_m512(r, a);
56180        let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
56181        let e = _mm512_setr_ps(
56182            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
56183        );
56184        assert_eq_m512(r, e);
56185    }
56186
56187    #[simd_test(enable = "avx512f")]
56188    const fn test_mm512_maskz_moveldup_ps() {
56189        let a = _mm512_setr_ps(
56190            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56191        );
56192        let r = _mm512_maskz_moveldup_ps(0, a);
56193        assert_eq_m512(r, _mm512_setzero_ps());
56194        let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
56195        let e = _mm512_setr_ps(
56196            1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
56197        );
56198        assert_eq_m512(r, e);
56199    }
56200
56201    #[simd_test(enable = "avx512f,avx512vl")]
56202    const fn test_mm256_mask_moveldup_ps() {
56203        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56204        let r = _mm256_mask_moveldup_ps(a, 0, a);
56205        assert_eq_m256(r, a);
56206        let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
56207        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
56208        assert_eq_m256(r, e);
56209    }
56210
56211    #[simd_test(enable = "avx512f,avx512vl")]
56212    const fn test_mm256_maskz_moveldup_ps() {
56213        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56214        let r = _mm256_maskz_moveldup_ps(0, a);
56215        assert_eq_m256(r, _mm256_setzero_ps());
56216        let r = _mm256_maskz_moveldup_ps(0b11111111, a);
56217        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
56218        assert_eq_m256(r, e);
56219    }
56220
56221    #[simd_test(enable = "avx512f,avx512vl")]
56222    const fn test_mm_mask_moveldup_ps() {
56223        let a = _mm_set_ps(1., 2., 3., 4.);
56224        let r = _mm_mask_moveldup_ps(a, 0, a);
56225        assert_eq_m128(r, a);
56226        let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
56227        let e = _mm_set_ps(2., 2., 4., 4.);
56228        assert_eq_m128(r, e);
56229    }
56230
56231    #[simd_test(enable = "avx512f,avx512vl")]
56232    const fn test_mm_maskz_moveldup_ps() {
56233        let a = _mm_set_ps(1., 2., 3., 4.);
56234        let r = _mm_maskz_moveldup_ps(0, a);
56235        assert_eq_m128(r, _mm_setzero_ps());
56236        let r = _mm_maskz_moveldup_ps(0b00001111, a);
56237        let e = _mm_set_ps(2., 2., 4., 4.);
56238        assert_eq_m128(r, e);
56239    }
56240
56241    #[simd_test(enable = "avx512f")]
56242    const fn test_mm512_movehdup_ps() {
56243        let a = _mm512_setr_ps(
56244            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56245        );
56246        let r = _mm512_movehdup_ps(a);
56247        let e = _mm512_setr_ps(
56248            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
56249        );
56250        assert_eq_m512(r, e);
56251    }
56252
56253    #[simd_test(enable = "avx512f")]
56254    const fn test_mm512_mask_movehdup_ps() {
56255        let a = _mm512_setr_ps(
56256            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56257        );
56258        let r = _mm512_mask_movehdup_ps(a, 0, a);
56259        assert_eq_m512(r, a);
56260        let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
56261        let e = _mm512_setr_ps(
56262            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
56263        );
56264        assert_eq_m512(r, e);
56265    }
56266
56267    #[simd_test(enable = "avx512f")]
56268    const fn test_mm512_maskz_movehdup_ps() {
56269        let a = _mm512_setr_ps(
56270            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56271        );
56272        let r = _mm512_maskz_movehdup_ps(0, a);
56273        assert_eq_m512(r, _mm512_setzero_ps());
56274        let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
56275        let e = _mm512_setr_ps(
56276            2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
56277        );
56278        assert_eq_m512(r, e);
56279    }
56280
56281    #[simd_test(enable = "avx512f,avx512vl")]
56282    const fn test_mm256_mask_movehdup_ps() {
56283        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56284        let r = _mm256_mask_movehdup_ps(a, 0, a);
56285        assert_eq_m256(r, a);
56286        let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
56287        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
56288        assert_eq_m256(r, e);
56289    }
56290
56291    #[simd_test(enable = "avx512f,avx512vl")]
56292    const fn test_mm256_maskz_movehdup_ps() {
56293        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56294        let r = _mm256_maskz_movehdup_ps(0, a);
56295        assert_eq_m256(r, _mm256_setzero_ps());
56296        let r = _mm256_maskz_movehdup_ps(0b11111111, a);
56297        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
56298        assert_eq_m256(r, e);
56299    }
56300
56301    #[simd_test(enable = "avx512f,avx512vl")]
56302    const fn test_mm_mask_movehdup_ps() {
56303        let a = _mm_set_ps(1., 2., 3., 4.);
56304        let r = _mm_mask_movehdup_ps(a, 0, a);
56305        assert_eq_m128(r, a);
56306        let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
56307        let e = _mm_set_ps(1., 1., 3., 3.);
56308        assert_eq_m128(r, e);
56309    }
56310
56311    #[simd_test(enable = "avx512f,avx512vl")]
56312    const fn test_mm_maskz_movehdup_ps() {
56313        let a = _mm_set_ps(1., 2., 3., 4.);
56314        let r = _mm_maskz_movehdup_ps(0, a);
56315        assert_eq_m128(r, _mm_setzero_ps());
56316        let r = _mm_maskz_movehdup_ps(0b00001111, a);
56317        let e = _mm_set_ps(1., 1., 3., 3.);
56318        assert_eq_m128(r, e);
56319    }
56320
56321    #[simd_test(enable = "avx512f")]
56322    const fn test_mm512_inserti32x4() {
56323        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56324        let b = _mm_setr_epi32(17, 18, 19, 20);
56325        let r = _mm512_inserti32x4::<0>(a, b);
56326        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56327        assert_eq_m512i(r, e);
56328    }
56329
56330    #[simd_test(enable = "avx512f")]
56331    const fn test_mm512_mask_inserti32x4() {
56332        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56333        let b = _mm_setr_epi32(17, 18, 19, 20);
56334        let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
56335        assert_eq_m512i(r, a);
56336        let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
56337        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56338        assert_eq_m512i(r, e);
56339    }
56340
56341    #[simd_test(enable = "avx512f")]
56342    const fn test_mm512_maskz_inserti32x4() {
56343        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56344        let b = _mm_setr_epi32(17, 18, 19, 20);
56345        let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
56346        assert_eq_m512i(r, _mm512_setzero_si512());
56347        let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
56348        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
56349        assert_eq_m512i(r, e);
56350    }
56351
56352    #[simd_test(enable = "avx512f,avx512vl")]
56353    const fn test_mm256_inserti32x4() {
56354        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56355        let b = _mm_set_epi32(17, 18, 19, 20);
56356        let r = _mm256_inserti32x4::<1>(a, b);
56357        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56358        assert_eq_m256i(r, e);
56359    }
56360
56361    #[simd_test(enable = "avx512f,avx512vl")]
56362    const fn test_mm256_mask_inserti32x4() {
56363        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56364        let b = _mm_set_epi32(17, 18, 19, 20);
56365        let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
56366        assert_eq_m256i(r, a);
56367        let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
56368        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56369        assert_eq_m256i(r, e);
56370    }
56371
56372    #[simd_test(enable = "avx512f,avx512vl")]
56373    const fn test_mm256_maskz_inserti32x4() {
56374        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56375        let b = _mm_set_epi32(17, 18, 19, 20);
56376        let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
56377        assert_eq_m256i(r, _mm256_setzero_si256());
56378        let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
56379        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56380        assert_eq_m256i(r, e);
56381    }
56382
56383    #[simd_test(enable = "avx512f")]
56384    const fn test_mm512_insertf32x4() {
56385        let a = _mm512_setr_ps(
56386            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56387        );
56388        let b = _mm_setr_ps(17., 18., 19., 20.);
56389        let r = _mm512_insertf32x4::<0>(a, b);
56390        let e = _mm512_setr_ps(
56391            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56392        );
56393        assert_eq_m512(r, e);
56394    }
56395
56396    #[simd_test(enable = "avx512f")]
56397    const fn test_mm512_mask_insertf32x4() {
56398        let a = _mm512_setr_ps(
56399            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56400        );
56401        let b = _mm_setr_ps(17., 18., 19., 20.);
56402        let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
56403        assert_eq_m512(r, a);
56404        let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
56405        let e = _mm512_setr_ps(
56406            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56407        );
56408        assert_eq_m512(r, e);
56409    }
56410
56411    #[simd_test(enable = "avx512f")]
56412    const fn test_mm512_maskz_insertf32x4() {
56413        let a = _mm512_setr_ps(
56414            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56415        );
56416        let b = _mm_setr_ps(17., 18., 19., 20.);
56417        let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
56418        assert_eq_m512(r, _mm512_setzero_ps());
56419        let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
56420        let e = _mm512_setr_ps(
56421            17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
56422        );
56423        assert_eq_m512(r, e);
56424    }
56425
56426    #[simd_test(enable = "avx512f,avx512vl")]
56427    const fn test_mm256_insertf32x4() {
56428        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56429        let b = _mm_set_ps(17., 18., 19., 20.);
56430        let r = _mm256_insertf32x4::<1>(a, b);
56431        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56432        assert_eq_m256(r, e);
56433    }
56434
56435    #[simd_test(enable = "avx512f,avx512vl")]
56436    const fn test_mm256_mask_insertf32x4() {
56437        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56438        let b = _mm_set_ps(17., 18., 19., 20.);
56439        let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
56440        assert_eq_m256(r, a);
56441        let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
56442        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56443        assert_eq_m256(r, e);
56444    }
56445
56446    #[simd_test(enable = "avx512f,avx512vl")]
56447    const fn test_mm256_maskz_insertf32x4() {
56448        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56449        let b = _mm_set_ps(17., 18., 19., 20.);
56450        let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
56451        assert_eq_m256(r, _mm256_setzero_ps());
56452        let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
56453        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56454        assert_eq_m256(r, e);
56455    }
56456
56457    #[simd_test(enable = "avx512f")]
56458    const fn test_mm512_castps128_ps512() {
56459        let a = _mm_setr_ps(17., 18., 19., 20.);
56460        let r = _mm512_castps128_ps512(a);
56461        assert_eq_m128(_mm512_castps512_ps128(r), a);
56462    }
56463
56464    #[simd_test(enable = "avx512f")]
56465    const fn test_mm512_castps256_ps512() {
56466        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56467        let r = _mm512_castps256_ps512(a);
56468        assert_eq_m256(_mm512_castps512_ps256(r), a);
56469    }
56470
56471    #[simd_test(enable = "avx512f")]
56472    const fn test_mm512_zextps128_ps512() {
56473        let a = _mm_setr_ps(17., 18., 19., 20.);
56474        let r = _mm512_zextps128_ps512(a);
56475        let e = _mm512_setr_ps(
56476            17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
56477        );
56478        assert_eq_m512(r, e);
56479    }
56480
56481    #[simd_test(enable = "avx512f")]
56482    const fn test_mm512_zextps256_ps512() {
56483        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56484        let r = _mm512_zextps256_ps512(a);
56485        let e = _mm512_setr_ps(
56486            17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
56487        );
56488        assert_eq_m512(r, e);
56489    }
56490
56491    #[simd_test(enable = "avx512f")]
56492    const fn test_mm512_castps512_ps128() {
56493        let a = _mm512_setr_ps(
56494            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
56495        );
56496        let r = _mm512_castps512_ps128(a);
56497        let e = _mm_setr_ps(17., 18., 19., 20.);
56498        assert_eq_m128(r, e);
56499    }
56500
56501    #[simd_test(enable = "avx512f")]
56502    const fn test_mm512_castps512_ps256() {
56503        let a = _mm512_setr_ps(
56504            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
56505        );
56506        let r = _mm512_castps512_ps256(a);
56507        let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56508        assert_eq_m256(r, e);
56509    }
56510
56511    #[simd_test(enable = "avx512f")]
56512    const fn test_mm512_castps_pd() {
56513        let a = _mm512_set1_ps(1.);
56514        let r = _mm512_castps_pd(a);
56515        let e = _mm512_set1_pd(0.007812501848093234);
56516        assert_eq_m512d(r, e);
56517    }
56518
56519    #[simd_test(enable = "avx512f")]
56520    const fn test_mm512_castps_si512() {
56521        let a = _mm512_set1_ps(1.);
56522        let r = _mm512_castps_si512(a);
56523        let e = _mm512_set1_epi32(1065353216);
56524        assert_eq_m512i(r, e);
56525    }
56526
56527    #[simd_test(enable = "avx512f")]
56528    const fn test_mm512_broadcastd_epi32() {
56529        let a = _mm_set_epi32(17, 18, 19, 20);
56530        let r = _mm512_broadcastd_epi32(a);
56531        let e = _mm512_set1_epi32(20);
56532        assert_eq_m512i(r, e);
56533    }
56534
56535    #[simd_test(enable = "avx512f")]
56536    const fn test_mm512_mask_broadcastd_epi32() {
56537        let src = _mm512_set1_epi32(20);
56538        let a = _mm_set_epi32(17, 18, 19, 20);
56539        let r = _mm512_mask_broadcastd_epi32(src, 0, a);
56540        assert_eq_m512i(r, src);
56541        let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
56542        let e = _mm512_set1_epi32(20);
56543        assert_eq_m512i(r, e);
56544    }
56545
56546    #[simd_test(enable = "avx512f")]
56547    const fn test_mm512_maskz_broadcastd_epi32() {
56548        let a = _mm_set_epi32(17, 18, 19, 20);
56549        let r = _mm512_maskz_broadcastd_epi32(0, a);
56550        assert_eq_m512i(r, _mm512_setzero_si512());
56551        let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
56552        let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
56553        assert_eq_m512i(r, e);
56554    }
56555
56556    #[simd_test(enable = "avx512f,avx512vl")]
56557    const fn test_mm256_mask_broadcastd_epi32() {
56558        let src = _mm256_set1_epi32(20);
56559        let a = _mm_set_epi32(17, 18, 19, 20);
56560        let r = _mm256_mask_broadcastd_epi32(src, 0, a);
56561        assert_eq_m256i(r, src);
56562        let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
56563        let e = _mm256_set1_epi32(20);
56564        assert_eq_m256i(r, e);
56565    }
56566
56567    #[simd_test(enable = "avx512f,avx512vl")]
56568    const fn test_mm256_maskz_broadcastd_epi32() {
56569        let a = _mm_set_epi32(17, 18, 19, 20);
56570        let r = _mm256_maskz_broadcastd_epi32(0, a);
56571        assert_eq_m256i(r, _mm256_setzero_si256());
56572        let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
56573        let e = _mm256_set1_epi32(20);
56574        assert_eq_m256i(r, e);
56575    }
56576
56577    #[simd_test(enable = "avx512f,avx512vl")]
56578    const fn test_mm_mask_broadcastd_epi32() {
56579        let src = _mm_set1_epi32(20);
56580        let a = _mm_set_epi32(17, 18, 19, 20);
56581        let r = _mm_mask_broadcastd_epi32(src, 0, a);
56582        assert_eq_m128i(r, src);
56583        let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
56584        let e = _mm_set1_epi32(20);
56585        assert_eq_m128i(r, e);
56586    }
56587
56588    #[simd_test(enable = "avx512f,avx512vl")]
56589    const fn test_mm_maskz_broadcastd_epi32() {
56590        let a = _mm_set_epi32(17, 18, 19, 20);
56591        let r = _mm_maskz_broadcastd_epi32(0, a);
56592        assert_eq_m128i(r, _mm_setzero_si128());
56593        let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
56594        let e = _mm_set1_epi32(20);
56595        assert_eq_m128i(r, e);
56596    }
56597
56598    #[simd_test(enable = "avx512f")]
56599    const fn test_mm512_broadcastss_ps() {
56600        let a = _mm_set_ps(17., 18., 19., 20.);
56601        let r = _mm512_broadcastss_ps(a);
56602        let e = _mm512_set1_ps(20.);
56603        assert_eq_m512(r, e);
56604    }
56605
56606    #[simd_test(enable = "avx512f")]
56607    const fn test_mm512_mask_broadcastss_ps() {
56608        let src = _mm512_set1_ps(20.);
56609        let a = _mm_set_ps(17., 18., 19., 20.);
56610        let r = _mm512_mask_broadcastss_ps(src, 0, a);
56611        assert_eq_m512(r, src);
56612        let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
56613        let e = _mm512_set1_ps(20.);
56614        assert_eq_m512(r, e);
56615    }
56616
56617    #[simd_test(enable = "avx512f")]
56618    const fn test_mm512_maskz_broadcastss_ps() {
56619        let a = _mm_set_ps(17., 18., 19., 20.);
56620        let r = _mm512_maskz_broadcastss_ps(0, a);
56621        assert_eq_m512(r, _mm512_setzero_ps());
56622        let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
56623        let e = _mm512_setr_ps(
56624            20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
56625        );
56626        assert_eq_m512(r, e);
56627    }
56628
56629    #[simd_test(enable = "avx512f,avx512vl")]
56630    const fn test_mm256_mask_broadcastss_ps() {
56631        let src = _mm256_set1_ps(20.);
56632        let a = _mm_set_ps(17., 18., 19., 20.);
56633        let r = _mm256_mask_broadcastss_ps(src, 0, a);
56634        assert_eq_m256(r, src);
56635        let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
56636        let e = _mm256_set1_ps(20.);
56637        assert_eq_m256(r, e);
56638    }
56639
56640    #[simd_test(enable = "avx512f,avx512vl")]
56641    const fn test_mm256_maskz_broadcastss_ps() {
56642        let a = _mm_set_ps(17., 18., 19., 20.);
56643        let r = _mm256_maskz_broadcastss_ps(0, a);
56644        assert_eq_m256(r, _mm256_setzero_ps());
56645        let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
56646        let e = _mm256_set1_ps(20.);
56647        assert_eq_m256(r, e);
56648    }
56649
56650    #[simd_test(enable = "avx512f,avx512vl")]
56651    const fn test_mm_mask_broadcastss_ps() {
56652        let src = _mm_set1_ps(20.);
56653        let a = _mm_set_ps(17., 18., 19., 20.);
56654        let r = _mm_mask_broadcastss_ps(src, 0, a);
56655        assert_eq_m128(r, src);
56656        let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
56657        let e = _mm_set1_ps(20.);
56658        assert_eq_m128(r, e);
56659    }
56660
56661    #[simd_test(enable = "avx512f,avx512vl")]
56662    const fn test_mm_maskz_broadcastss_ps() {
56663        let a = _mm_set_ps(17., 18., 19., 20.);
56664        let r = _mm_maskz_broadcastss_ps(0, a);
56665        assert_eq_m128(r, _mm_setzero_ps());
56666        let r = _mm_maskz_broadcastss_ps(0b00001111, a);
56667        let e = _mm_set1_ps(20.);
56668        assert_eq_m128(r, e);
56669    }
56670
56671    #[simd_test(enable = "avx512f")]
56672    const fn test_mm512_broadcast_i32x4() {
56673        let a = _mm_set_epi32(17, 18, 19, 20);
56674        let r = _mm512_broadcast_i32x4(a);
56675        let e = _mm512_set_epi32(
56676            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
56677        );
56678        assert_eq_m512i(r, e);
56679    }
56680
56681    #[simd_test(enable = "avx512f")]
56682    const fn test_mm512_mask_broadcast_i32x4() {
56683        let src = _mm512_set1_epi32(20);
56684        let a = _mm_set_epi32(17, 18, 19, 20);
56685        let r = _mm512_mask_broadcast_i32x4(src, 0, a);
56686        assert_eq_m512i(r, src);
56687        let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
56688        let e = _mm512_set_epi32(
56689            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
56690        );
56691        assert_eq_m512i(r, e);
56692    }
56693
56694    #[simd_test(enable = "avx512f")]
56695    const fn test_mm512_maskz_broadcast_i32x4() {
56696        let a = _mm_set_epi32(17, 18, 19, 20);
56697        let r = _mm512_maskz_broadcast_i32x4(0, a);
56698        assert_eq_m512i(r, _mm512_setzero_si512());
56699        let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
56700        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
56701        assert_eq_m512i(r, e);
56702    }
56703
56704    #[simd_test(enable = "avx512f,avx512vl")]
56705    const fn test_mm256_broadcast_i32x4() {
56706        let a = _mm_set_epi32(17, 18, 19, 20);
56707        let r = _mm256_broadcast_i32x4(a);
56708        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56709        assert_eq_m256i(r, e);
56710    }
56711
56712    #[simd_test(enable = "avx512f,avx512vl")]
56713    const fn test_mm256_mask_broadcast_i32x4() {
56714        let src = _mm256_set1_epi32(20);
56715        let a = _mm_set_epi32(17, 18, 19, 20);
56716        let r = _mm256_mask_broadcast_i32x4(src, 0, a);
56717        assert_eq_m256i(r, src);
56718        let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
56719        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56720        assert_eq_m256i(r, e);
56721    }
56722
56723    #[simd_test(enable = "avx512f,avx512vl")]
56724    const fn test_mm256_maskz_broadcast_i32x4() {
56725        let a = _mm_set_epi32(17, 18, 19, 20);
56726        let r = _mm256_maskz_broadcast_i32x4(0, a);
56727        assert_eq_m256i(r, _mm256_setzero_si256());
56728        let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
56729        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56730        assert_eq_m256i(r, e);
56731    }
56732
56733    #[simd_test(enable = "avx512f")]
56734    const fn test_mm512_broadcast_f32x4() {
56735        let a = _mm_set_ps(17., 18., 19., 20.);
56736        let r = _mm512_broadcast_f32x4(a);
56737        let e = _mm512_set_ps(
56738            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
56739        );
56740        assert_eq_m512(r, e);
56741    }
56742
56743    #[simd_test(enable = "avx512f")]
56744    const fn test_mm512_mask_broadcast_f32x4() {
56745        let src = _mm512_set1_ps(20.);
56746        let a = _mm_set_ps(17., 18., 19., 20.);
56747        let r = _mm512_mask_broadcast_f32x4(src, 0, a);
56748        assert_eq_m512(r, src);
56749        let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
56750        let e = _mm512_set_ps(
56751            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
56752        );
56753        assert_eq_m512(r, e);
56754    }
56755
56756    #[simd_test(enable = "avx512f")]
56757    const fn test_mm512_maskz_broadcast_f32x4() {
56758        let a = _mm_set_ps(17., 18., 19., 20.);
56759        let r = _mm512_maskz_broadcast_f32x4(0, a);
56760        assert_eq_m512(r, _mm512_setzero_ps());
56761        let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
56762        let e = _mm512_set_ps(
56763            0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
56764        );
56765        assert_eq_m512(r, e);
56766    }
56767
56768    #[simd_test(enable = "avx512f,avx512vl")]
56769    const fn test_mm256_broadcast_f32x4() {
56770        let a = _mm_set_ps(17., 18., 19., 20.);
56771        let r = _mm256_broadcast_f32x4(a);
56772        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56773        assert_eq_m256(r, e);
56774    }
56775
56776    #[simd_test(enable = "avx512f,avx512vl")]
56777    const fn test_mm256_mask_broadcast_f32x4() {
56778        let src = _mm256_set1_ps(20.);
56779        let a = _mm_set_ps(17., 18., 19., 20.);
56780        let r = _mm256_mask_broadcast_f32x4(src, 0, a);
56781        assert_eq_m256(r, src);
56782        let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
56783        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56784        assert_eq_m256(r, e);
56785    }
56786
56787    #[simd_test(enable = "avx512f,avx512vl")]
56788    const fn test_mm256_maskz_broadcast_f32x4() {
56789        let a = _mm_set_ps(17., 18., 19., 20.);
56790        let r = _mm256_maskz_broadcast_f32x4(0, a);
56791        assert_eq_m256(r, _mm256_setzero_ps());
56792        let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
56793        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56794        assert_eq_m256(r, e);
56795    }
56796
56797    #[simd_test(enable = "avx512f")]
56798    const fn test_mm512_mask_blend_epi32() {
56799        let a = _mm512_set1_epi32(1);
56800        let b = _mm512_set1_epi32(2);
56801        let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
56802        let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
56803        assert_eq_m512i(r, e);
56804    }
56805
56806    #[simd_test(enable = "avx512f,avx512vl")]
56807    const fn test_mm256_mask_blend_epi32() {
56808        let a = _mm256_set1_epi32(1);
56809        let b = _mm256_set1_epi32(2);
56810        let r = _mm256_mask_blend_epi32(0b11111111, a, b);
56811        let e = _mm256_set1_epi32(2);
56812        assert_eq_m256i(r, e);
56813    }
56814
56815    #[simd_test(enable = "avx512f,avx512vl")]
56816    const fn test_mm_mask_blend_epi32() {
56817        let a = _mm_set1_epi32(1);
56818        let b = _mm_set1_epi32(2);
56819        let r = _mm_mask_blend_epi32(0b00001111, a, b);
56820        let e = _mm_set1_epi32(2);
56821        assert_eq_m128i(r, e);
56822    }
56823
56824    #[simd_test(enable = "avx512f")]
56825    const fn test_mm512_mask_blend_ps() {
56826        let a = _mm512_set1_ps(1.);
56827        let b = _mm512_set1_ps(2.);
56828        let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
56829        let e = _mm512_set_ps(
56830            2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
56831        );
56832        assert_eq_m512(r, e);
56833    }
56834
56835    #[simd_test(enable = "avx512f,avx512vl")]
56836    const fn test_mm256_mask_blend_ps() {
56837        let a = _mm256_set1_ps(1.);
56838        let b = _mm256_set1_ps(2.);
56839        let r = _mm256_mask_blend_ps(0b11111111, a, b);
56840        let e = _mm256_set1_ps(2.);
56841        assert_eq_m256(r, e);
56842    }
56843
56844    #[simd_test(enable = "avx512f,avx512vl")]
56845    const fn test_mm_mask_blend_ps() {
56846        let a = _mm_set1_ps(1.);
56847        let b = _mm_set1_ps(2.);
56848        let r = _mm_mask_blend_ps(0b00001111, a, b);
56849        let e = _mm_set1_ps(2.);
56850        assert_eq_m128(r, e);
56851    }
56852
56853    #[simd_test(enable = "avx512f")]
56854    const fn test_mm512_unpackhi_epi32() {
56855        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56856        let b = _mm512_set_epi32(
56857            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56858        );
56859        let r = _mm512_unpackhi_epi32(a, b);
56860        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
56861        assert_eq_m512i(r, e);
56862    }
56863
56864    #[simd_test(enable = "avx512f")]
56865    const fn test_mm512_mask_unpackhi_epi32() {
56866        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56867        let b = _mm512_set_epi32(
56868            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56869        );
56870        let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
56871        assert_eq_m512i(r, a);
56872        let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
56873        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
56874        assert_eq_m512i(r, e);
56875    }
56876
56877    #[simd_test(enable = "avx512f")]
56878    const fn test_mm512_maskz_unpackhi_epi32() {
56879        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56880        let b = _mm512_set_epi32(
56881            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56882        );
56883        let r = _mm512_maskz_unpackhi_epi32(0, a, b);
56884        assert_eq_m512i(r, _mm512_setzero_si512());
56885        let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
56886        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
56887        assert_eq_m512i(r, e);
56888    }
56889
56890    #[simd_test(enable = "avx512f,avx512vl")]
56891    const fn test_mm256_mask_unpackhi_epi32() {
56892        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56893        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
56894        let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
56895        assert_eq_m256i(r, a);
56896        let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
56897        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
56898        assert_eq_m256i(r, e);
56899    }
56900
56901    #[simd_test(enable = "avx512f,avx512vl")]
56902    const fn test_mm256_maskz_unpackhi_epi32() {
56903        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56904        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
56905        let r = _mm256_maskz_unpackhi_epi32(0, a, b);
56906        assert_eq_m256i(r, _mm256_setzero_si256());
56907        let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
56908        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
56909        assert_eq_m256i(r, e);
56910    }
56911
56912    #[simd_test(enable = "avx512f,avx512vl")]
56913    const fn test_mm_mask_unpackhi_epi32() {
56914        let a = _mm_set_epi32(1, 2, 3, 4);
56915        let b = _mm_set_epi32(17, 18, 19, 20);
56916        let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
56917        assert_eq_m128i(r, a);
56918        let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
56919        let e = _mm_set_epi32(17, 1, 18, 2);
56920        assert_eq_m128i(r, e);
56921    }
56922
56923    #[simd_test(enable = "avx512f,avx512vl")]
56924    const fn test_mm_maskz_unpackhi_epi32() {
56925        let a = _mm_set_epi32(1, 2, 3, 4);
56926        let b = _mm_set_epi32(17, 18, 19, 20);
56927        let r = _mm_maskz_unpackhi_epi32(0, a, b);
56928        assert_eq_m128i(r, _mm_setzero_si128());
56929        let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
56930        let e = _mm_set_epi32(17, 1, 18, 2);
56931        assert_eq_m128i(r, e);
56932    }
56933
56934    #[simd_test(enable = "avx512f")]
56935    const fn test_mm512_unpackhi_ps() {
56936        let a = _mm512_set_ps(
56937            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56938        );
56939        let b = _mm512_set_ps(
56940            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56941        );
56942        let r = _mm512_unpackhi_ps(a, b);
56943        let e = _mm512_set_ps(
56944            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
56945        );
56946        assert_eq_m512(r, e);
56947    }
56948
56949    #[simd_test(enable = "avx512f")]
56950    const fn test_mm512_mask_unpackhi_ps() {
56951        let a = _mm512_set_ps(
56952            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56953        );
56954        let b = _mm512_set_ps(
56955            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56956        );
56957        let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
56958        assert_eq_m512(r, a);
56959        let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
56960        let e = _mm512_set_ps(
56961            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
56962        );
56963        assert_eq_m512(r, e);
56964    }
56965
56966    #[simd_test(enable = "avx512f")]
56967    const fn test_mm512_maskz_unpackhi_ps() {
56968        let a = _mm512_set_ps(
56969            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56970        );
56971        let b = _mm512_set_ps(
56972            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56973        );
56974        let r = _mm512_maskz_unpackhi_ps(0, a, b);
56975        assert_eq_m512(r, _mm512_setzero_ps());
56976        let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
56977        let e = _mm512_set_ps(
56978            0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
56979        );
56980        assert_eq_m512(r, e);
56981    }
56982
56983    #[simd_test(enable = "avx512f,avx512vl")]
56984    const fn test_mm256_mask_unpackhi_ps() {
56985        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56986        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56987        let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
56988        assert_eq_m256(r, a);
56989        let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
56990        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
56991        assert_eq_m256(r, e);
56992    }
56993
56994    #[simd_test(enable = "avx512f,avx512vl")]
56995    const fn test_mm256_maskz_unpackhi_ps() {
56996        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56997        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56998        let r = _mm256_maskz_unpackhi_ps(0, a, b);
56999        assert_eq_m256(r, _mm256_setzero_ps());
57000        let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
57001        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
57002        assert_eq_m256(r, e);
57003    }
57004
57005    #[simd_test(enable = "avx512f,avx512vl")]
57006    const fn test_mm_mask_unpackhi_ps() {
57007        let a = _mm_set_ps(1., 2., 3., 4.);
57008        let b = _mm_set_ps(17., 18., 19., 20.);
57009        let r = _mm_mask_unpackhi_ps(a, 0, a, b);
57010        assert_eq_m128(r, a);
57011        let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
57012        let e = _mm_set_ps(17., 1., 18., 2.);
57013        assert_eq_m128(r, e);
57014    }
57015
57016    #[simd_test(enable = "avx512f,avx512vl")]
57017    const fn test_mm_maskz_unpackhi_ps() {
57018        let a = _mm_set_ps(1., 2., 3., 4.);
57019        let b = _mm_set_ps(17., 18., 19., 20.);
57020        let r = _mm_maskz_unpackhi_ps(0, a, b);
57021        assert_eq_m128(r, _mm_setzero_ps());
57022        let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
57023        let e = _mm_set_ps(17., 1., 18., 2.);
57024        assert_eq_m128(r, e);
57025    }
57026
57027    #[simd_test(enable = "avx512f")]
57028    const fn test_mm512_unpacklo_epi32() {
57029        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
57030        let b = _mm512_set_epi32(
57031            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
57032        );
57033        let r = _mm512_unpacklo_epi32(a, b);
57034        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
57035        assert_eq_m512i(r, e);
57036    }
57037
57038    #[simd_test(enable = "avx512f")]
57039    const fn test_mm512_mask_unpacklo_epi32() {
57040        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
57041        let b = _mm512_set_epi32(
57042            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
57043        );
57044        let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
57045        assert_eq_m512i(r, a);
57046        let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
57047        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
57048        assert_eq_m512i(r, e);
57049    }
57050
57051    #[simd_test(enable = "avx512f")]
57052    const fn test_mm512_maskz_unpacklo_epi32() {
57053        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
57054        let b = _mm512_set_epi32(
57055            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
57056        );
57057        let r = _mm512_maskz_unpacklo_epi32(0, a, b);
57058        assert_eq_m512i(r, _mm512_setzero_si512());
57059        let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
57060        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
57061        assert_eq_m512i(r, e);
57062    }
57063
57064    #[simd_test(enable = "avx512f,avx512vl")]
57065    const fn test_mm256_mask_unpacklo_epi32() {
57066        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
57067        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
57068        let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
57069        assert_eq_m256i(r, a);
57070        let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
57071        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
57072        assert_eq_m256i(r, e);
57073    }
57074
57075    #[simd_test(enable = "avx512f,avx512vl")]
57076    const fn test_mm256_maskz_unpacklo_epi32() {
57077        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
57078        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
57079        let r = _mm256_maskz_unpacklo_epi32(0, a, b);
57080        assert_eq_m256i(r, _mm256_setzero_si256());
57081        let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
57082        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
57083        assert_eq_m256i(r, e);
57084    }
57085
57086    #[simd_test(enable = "avx512f,avx512vl")]
57087    const fn test_mm_mask_unpacklo_epi32() {
57088        let a = _mm_set_epi32(1, 2, 3, 4);
57089        let b = _mm_set_epi32(17, 18, 19, 20);
57090        let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
57091        assert_eq_m128i(r, a);
57092        let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
57093        let e = _mm_set_epi32(19, 3, 20, 4);
57094        assert_eq_m128i(r, e);
57095    }
57096
57097    #[simd_test(enable = "avx512f,avx512vl")]
57098    const fn test_mm_maskz_unpacklo_epi32() {
57099        let a = _mm_set_epi32(1, 2, 3, 4);
57100        let b = _mm_set_epi32(17, 18, 19, 20);
57101        let r = _mm_maskz_unpacklo_epi32(0, a, b);
57102        assert_eq_m128i(r, _mm_setzero_si128());
57103        let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
57104        let e = _mm_set_epi32(19, 3, 20, 4);
57105        assert_eq_m128i(r, e);
57106    }
57107
57108    #[simd_test(enable = "avx512f")]
57109    const fn test_mm512_unpacklo_ps() {
57110        let a = _mm512_set_ps(
57111            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57112        );
57113        let b = _mm512_set_ps(
57114            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57115        );
57116        let r = _mm512_unpacklo_ps(a, b);
57117        let e = _mm512_set_ps(
57118            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
57119        );
57120        assert_eq_m512(r, e);
57121    }
57122
57123    #[simd_test(enable = "avx512f")]
57124    const fn test_mm512_mask_unpacklo_ps() {
57125        let a = _mm512_set_ps(
57126            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57127        );
57128        let b = _mm512_set_ps(
57129            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57130        );
57131        let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
57132        assert_eq_m512(r, a);
57133        let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
57134        let e = _mm512_set_ps(
57135            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
57136        );
57137        assert_eq_m512(r, e);
57138    }
57139
57140    #[simd_test(enable = "avx512f")]
57141    const fn test_mm512_maskz_unpacklo_ps() {
57142        let a = _mm512_set_ps(
57143            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57144        );
57145        let b = _mm512_set_ps(
57146            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57147        );
57148        let r = _mm512_maskz_unpacklo_ps(0, a, b);
57149        assert_eq_m512(r, _mm512_setzero_ps());
57150        let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
57151        let e = _mm512_set_ps(
57152            0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
57153        );
57154        assert_eq_m512(r, e);
57155    }
57156
57157    #[simd_test(enable = "avx512f,avx512vl")]
57158    const fn test_mm256_mask_unpacklo_ps() {
57159        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
57160        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
57161        let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
57162        assert_eq_m256(r, a);
57163        let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
57164        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
57165        assert_eq_m256(r, e);
57166    }
57167
57168    #[simd_test(enable = "avx512f,avx512vl")]
57169    const fn test_mm256_maskz_unpacklo_ps() {
57170        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
57171        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
57172        let r = _mm256_maskz_unpacklo_ps(0, a, b);
57173        assert_eq_m256(r, _mm256_setzero_ps());
57174        let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
57175        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
57176        assert_eq_m256(r, e);
57177    }
57178
57179    #[simd_test(enable = "avx512f,avx512vl")]
57180    const fn test_mm_mask_unpacklo_ps() {
57181        let a = _mm_set_ps(1., 2., 3., 4.);
57182        let b = _mm_set_ps(17., 18., 19., 20.);
57183        let r = _mm_mask_unpacklo_ps(a, 0, a, b);
57184        assert_eq_m128(r, a);
57185        let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
57186        let e = _mm_set_ps(19., 3., 20., 4.);
57187        assert_eq_m128(r, e);
57188    }
57189
57190    #[simd_test(enable = "avx512f,avx512vl")]
57191    const fn test_mm_maskz_unpacklo_ps() {
57192        let a = _mm_set_ps(1., 2., 3., 4.);
57193        let b = _mm_set_ps(17., 18., 19., 20.);
57194        let r = _mm_maskz_unpacklo_ps(0, a, b);
57195        assert_eq_m128(r, _mm_setzero_ps());
57196        let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
57197        let e = _mm_set_ps(19., 3., 20., 4.);
57198        assert_eq_m128(r, e);
57199    }
57200
57201    #[simd_test(enable = "avx512f")]
57202    const fn test_mm512_alignr_epi32() {
57203        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57204        let b = _mm512_set_epi32(
57205            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57206        );
57207        let r = _mm512_alignr_epi32::<0>(a, b);
57208        assert_eq_m512i(r, b);
57209        let r = _mm512_alignr_epi32::<16>(a, b);
57210        assert_eq_m512i(r, b);
57211        let r = _mm512_alignr_epi32::<1>(a, b);
57212        let e = _mm512_set_epi32(
57213            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
57214        );
57215        assert_eq_m512i(r, e);
57216    }
57217
57218    #[simd_test(enable = "avx512f")]
57219    const fn test_mm512_mask_alignr_epi32() {
57220        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57221        let b = _mm512_set_epi32(
57222            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57223        );
57224        let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
57225        assert_eq_m512i(r, a);
57226        let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
57227        let e = _mm512_set_epi32(
57228            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
57229        );
57230        assert_eq_m512i(r, e);
57231    }
57232
57233    #[simd_test(enable = "avx512f")]
57234    const fn test_mm512_maskz_alignr_epi32() {
57235        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57236        let b = _mm512_set_epi32(
57237            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57238        );
57239        let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
57240        assert_eq_m512i(r, _mm512_setzero_si512());
57241        let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
57242        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
57243        assert_eq_m512i(r, e);
57244    }
57245
57246    #[simd_test(enable = "avx512f,avx512vl")]
57247    const fn test_mm256_alignr_epi32() {
57248        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57249        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57250        let r = _mm256_alignr_epi32::<0>(a, b);
57251        assert_eq_m256i(r, b);
57252        let r = _mm256_alignr_epi32::<1>(a, b);
57253        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57254        assert_eq_m256i(r, e);
57255    }
57256
57257    #[simd_test(enable = "avx512f,avx512vl")]
57258    const fn test_mm256_mask_alignr_epi32() {
57259        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57260        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57261        let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
57262        assert_eq_m256i(r, a);
57263        let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
57264        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57265        assert_eq_m256i(r, e);
57266    }
57267
57268    #[simd_test(enable = "avx512f,avx512vl")]
57269    const fn test_mm256_maskz_alignr_epi32() {
57270        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57271        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57272        let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
57273        assert_eq_m256i(r, _mm256_setzero_si256());
57274        let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
57275        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57276        assert_eq_m256i(r, e);
57277    }
57278
57279    #[simd_test(enable = "avx512f,avx512vl")]
57280    const fn test_mm_alignr_epi32() {
57281        let a = _mm_set_epi32(4, 3, 2, 1);
57282        let b = _mm_set_epi32(8, 7, 6, 5);
57283        let r = _mm_alignr_epi32::<0>(a, b);
57284        assert_eq_m128i(r, b);
57285        let r = _mm_alignr_epi32::<1>(a, b);
57286        let e = _mm_set_epi32(1, 8, 7, 6);
57287        assert_eq_m128i(r, e);
57288    }
57289
57290    #[simd_test(enable = "avx512f,avx512vl")]
57291    const fn test_mm_mask_alignr_epi32() {
57292        let a = _mm_set_epi32(4, 3, 2, 1);
57293        let b = _mm_set_epi32(8, 7, 6, 5);
57294        let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
57295        assert_eq_m128i(r, a);
57296        let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
57297        let e = _mm_set_epi32(1, 8, 7, 6);
57298        assert_eq_m128i(r, e);
57299    }
57300
57301    #[simd_test(enable = "avx512f,avx512vl")]
57302    const fn test_mm_maskz_alignr_epi32() {
57303        let a = _mm_set_epi32(4, 3, 2, 1);
57304        let b = _mm_set_epi32(8, 7, 6, 5);
57305        let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
57306        assert_eq_m128i(r, _mm_setzero_si128());
57307        let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
57308        let e = _mm_set_epi32(1, 8, 7, 6);
57309        assert_eq_m128i(r, e);
57310    }
57311
57312    #[simd_test(enable = "avx512f")]
57313    const fn test_mm512_and_epi32() {
57314        #[rustfmt::skip]
57315        let a = _mm512_set_epi32(
57316            1 << 1 | 1 << 2, 0, 0, 0,
57317            0, 0, 0, 0,
57318            0, 0, 0, 0,
57319            0, 0, 0, 1 << 1 | 1 << 3,
57320        );
57321        #[rustfmt::skip]
57322        let b = _mm512_set_epi32(
57323            1 << 1, 0, 0, 0,
57324            0, 0, 0, 0,
57325            0, 0, 0, 0,
57326            0, 0, 0, 1 << 3 | 1 << 4,
57327        );
57328        let r = _mm512_and_epi32(a, b);
57329        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57330        assert_eq_m512i(r, e);
57331    }
57332
57333    #[simd_test(enable = "avx512f")]
57334    const fn test_mm512_mask_and_epi32() {
57335        #[rustfmt::skip]
57336        let a = _mm512_set_epi32(
57337            1 << 1 | 1 << 2, 0, 0, 0,
57338            0, 0, 0, 0,
57339            0, 0, 0, 0,
57340            0, 0, 0, 1 << 1 | 1 << 3,
57341        );
57342        #[rustfmt::skip]
57343        let b = _mm512_set_epi32(
57344            1 << 1, 0, 0, 0,
57345            0, 0, 0, 0,
57346            0, 0, 0, 0,
57347            0, 0, 0, 1 << 3 | 1 << 4,
57348        );
57349        let r = _mm512_mask_and_epi32(a, 0, a, b);
57350        assert_eq_m512i(r, a);
57351        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
57352        #[rustfmt::skip]
57353        let e = _mm512_set_epi32(
57354            1 << 1 | 1 << 2, 0, 0, 0,
57355            0, 0, 0, 0,
57356            0, 0, 0, 0,
57357            0, 0, 0, 1 << 3,
57358        );
57359        assert_eq_m512i(r, e);
57360    }
57361
57362    #[simd_test(enable = "avx512f")]
57363    const fn test_mm512_maskz_and_epi32() {
57364        #[rustfmt::skip]
57365        let a = _mm512_set_epi32(
57366            1 << 1 | 1 << 2, 0, 0, 0,
57367            0, 0, 0, 0,
57368            0, 0, 0, 0,
57369            0, 0, 0, 1 << 1 | 1 << 3,
57370        );
57371        #[rustfmt::skip]
57372        let b = _mm512_set_epi32(
57373            1 << 1, 0, 0, 0,
57374            0, 0, 0, 0,
57375            0, 0, 0, 0,
57376            0, 0, 0, 1 << 3 | 1 << 4,
57377        );
57378        let r = _mm512_maskz_and_epi32(0, a, b);
57379        assert_eq_m512i(r, _mm512_setzero_si512());
57380        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
57381        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57382        assert_eq_m512i(r, e);
57383    }
57384
57385    #[simd_test(enable = "avx512f,avx512vl")]
57386    const fn test_mm256_mask_and_epi32() {
57387        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57388        let b = _mm256_set1_epi32(1 << 1);
57389        let r = _mm256_mask_and_epi32(a, 0, a, b);
57390        assert_eq_m256i(r, a);
57391        let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
57392        let e = _mm256_set1_epi32(1 << 1);
57393        assert_eq_m256i(r, e);
57394    }
57395
57396    #[simd_test(enable = "avx512f,avx512vl")]
57397    const fn test_mm256_maskz_and_epi32() {
57398        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57399        let b = _mm256_set1_epi32(1 << 1);
57400        let r = _mm256_maskz_and_epi32(0, a, b);
57401        assert_eq_m256i(r, _mm256_setzero_si256());
57402        let r = _mm256_maskz_and_epi32(0b11111111, a, b);
57403        let e = _mm256_set1_epi32(1 << 1);
57404        assert_eq_m256i(r, e);
57405    }
57406
57407    #[simd_test(enable = "avx512f,avx512vl")]
57408    const fn test_mm_mask_and_epi32() {
57409        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57410        let b = _mm_set1_epi32(1 << 1);
57411        let r = _mm_mask_and_epi32(a, 0, a, b);
57412        assert_eq_m128i(r, a);
57413        let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
57414        let e = _mm_set1_epi32(1 << 1);
57415        assert_eq_m128i(r, e);
57416    }
57417
57418    #[simd_test(enable = "avx512f,avx512vl")]
57419    const fn test_mm_maskz_and_epi32() {
57420        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57421        let b = _mm_set1_epi32(1 << 1);
57422        let r = _mm_maskz_and_epi32(0, a, b);
57423        assert_eq_m128i(r, _mm_setzero_si128());
57424        let r = _mm_maskz_and_epi32(0b00001111, a, b);
57425        let e = _mm_set1_epi32(1 << 1);
57426        assert_eq_m128i(r, e);
57427    }
57428
57429    #[simd_test(enable = "avx512f")]
57430    const fn test_mm512_and_si512() {
57431        #[rustfmt::skip]
57432        let a = _mm512_set_epi32(
57433            1 << 1 | 1 << 2, 0, 0, 0,
57434            0, 0, 0, 0,
57435            0, 0, 0, 0,
57436            0, 0, 0, 1 << 1 | 1 << 3,
57437        );
57438        #[rustfmt::skip]
57439        let b = _mm512_set_epi32(
57440            1 << 1, 0, 0, 0,
57441            0, 0, 0, 0,
57442            0, 0, 0, 0,
57443            0, 0, 0, 1 << 3 | 1 << 4,
57444        );
57445        let r = _mm512_and_si512(a, b);
57446        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57447        assert_eq_m512i(r, e);
57448    }
57449
57450    #[simd_test(enable = "avx512f")]
57451    const fn test_mm512_or_epi32() {
57452        #[rustfmt::skip]
57453        let a = _mm512_set_epi32(
57454            1 << 1 | 1 << 2, 0, 0, 0,
57455            0, 0, 0, 0,
57456            0, 0, 0, 0,
57457            0, 0, 0, 1 << 1 | 1 << 3,
57458        );
57459        #[rustfmt::skip]
57460        let b = _mm512_set_epi32(
57461            1 << 1, 0, 0, 0,
57462            0, 0, 0, 0,
57463            0, 0, 0, 0,
57464            0, 0, 0, 1 << 3 | 1 << 4,
57465        );
57466        let r = _mm512_or_epi32(a, b);
57467        #[rustfmt::skip]
57468        let e = _mm512_set_epi32(
57469            1 << 1 | 1 << 2, 0, 0, 0,
57470            0, 0, 0, 0,
57471            0, 0, 0, 0,
57472            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57473        );
57474        assert_eq_m512i(r, e);
57475    }
57476
57477    #[simd_test(enable = "avx512f")]
57478    const fn test_mm512_mask_or_epi32() {
57479        #[rustfmt::skip]
57480        let a = _mm512_set_epi32(
57481            1 << 1 | 1 << 2, 0, 0, 0,
57482            0, 0, 0, 0,
57483            0, 0, 0, 0,
57484            0, 0, 0, 1 << 1 | 1 << 3,
57485        );
57486        #[rustfmt::skip]
57487        let b = _mm512_set_epi32(
57488            1 << 1, 0, 0, 0,
57489            0, 0, 0, 0,
57490            0, 0, 0, 0,
57491            0, 0, 0, 1 << 3 | 1 << 4,
57492        );
57493        let r = _mm512_mask_or_epi32(a, 0, a, b);
57494        assert_eq_m512i(r, a);
57495        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
57496        #[rustfmt::skip]
57497        let e = _mm512_set_epi32(
57498            1 << 1 | 1 << 2, 0, 0, 0,
57499            0, 0, 0, 0,
57500            0, 0, 0, 0,
57501            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57502        );
57503        assert_eq_m512i(r, e);
57504    }
57505
57506    #[simd_test(enable = "avx512f")]
57507    const fn test_mm512_maskz_or_epi32() {
57508        #[rustfmt::skip]
57509        let a = _mm512_set_epi32(
57510            1 << 1 | 1 << 2, 0, 0, 0,
57511            0, 0, 0, 0,
57512            0, 0, 0, 0,
57513            0, 0, 0, 1 << 1 | 1 << 3,
57514        );
57515        #[rustfmt::skip]
57516        let b = _mm512_set_epi32(
57517            1 << 1, 0, 0, 0,
57518            0, 0, 0, 0,
57519            0, 0, 0, 0,
57520            0, 0, 0, 1 << 3 | 1 << 4,
57521        );
57522        let r = _mm512_maskz_or_epi32(0, a, b);
57523        assert_eq_m512i(r, _mm512_setzero_si512());
57524        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
57525        #[rustfmt::skip]
57526        let e = _mm512_set_epi32(
57527            0, 0, 0, 0,
57528            0, 0, 0, 0,
57529            0, 0, 0, 0,
57530            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57531        );
57532        assert_eq_m512i(r, e);
57533    }
57534
57535    #[simd_test(enable = "avx512f,avx512vl")]
57536    const fn test_mm256_or_epi32() {
57537        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57538        let b = _mm256_set1_epi32(1 << 1);
57539        let r = _mm256_or_epi32(a, b);
57540        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57541        assert_eq_m256i(r, e);
57542    }
57543
57544    #[simd_test(enable = "avx512f,avx512vl")]
57545    const fn test_mm256_mask_or_epi32() {
57546        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57547        let b = _mm256_set1_epi32(1 << 1);
57548        let r = _mm256_mask_or_epi32(a, 0, a, b);
57549        assert_eq_m256i(r, a);
57550        let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
57551        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57552        assert_eq_m256i(r, e);
57553    }
57554
57555    #[simd_test(enable = "avx512f,avx512vl")]
57556    const fn test_mm256_maskz_or_epi32() {
57557        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57558        let b = _mm256_set1_epi32(1 << 1);
57559        let r = _mm256_maskz_or_epi32(0, a, b);
57560        assert_eq_m256i(r, _mm256_setzero_si256());
57561        let r = _mm256_maskz_or_epi32(0b11111111, a, b);
57562        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57563        assert_eq_m256i(r, e);
57564    }
57565
57566    #[simd_test(enable = "avx512f,avx512vl")]
57567    const fn test_mm_or_epi32() {
57568        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57569        let b = _mm_set1_epi32(1 << 1);
57570        let r = _mm_or_epi32(a, b);
57571        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57572        assert_eq_m128i(r, e);
57573    }
57574
57575    #[simd_test(enable = "avx512f,avx512vl")]
57576    const fn test_mm_mask_or_epi32() {
57577        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57578        let b = _mm_set1_epi32(1 << 1);
57579        let r = _mm_mask_or_epi32(a, 0, a, b);
57580        assert_eq_m128i(r, a);
57581        let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
57582        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57583        assert_eq_m128i(r, e);
57584    }
57585
57586    #[simd_test(enable = "avx512f,avx512vl")]
57587    const fn test_mm_maskz_or_epi32() {
57588        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57589        let b = _mm_set1_epi32(1 << 1);
57590        let r = _mm_maskz_or_epi32(0, a, b);
57591        assert_eq_m128i(r, _mm_setzero_si128());
57592        let r = _mm_maskz_or_epi32(0b00001111, a, b);
57593        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57594        assert_eq_m128i(r, e);
57595    }
57596
57597    #[simd_test(enable = "avx512f")]
57598    const fn test_mm512_or_si512() {
57599        #[rustfmt::skip]
57600        let a = _mm512_set_epi32(
57601            1 << 1 | 1 << 2, 0, 0, 0,
57602            0, 0, 0, 0,
57603            0, 0, 0, 0,
57604            0, 0, 0, 1 << 1 | 1 << 3,
57605        );
57606        #[rustfmt::skip]
57607        let b = _mm512_set_epi32(
57608            1 << 1, 0, 0, 0,
57609            0, 0, 0, 0,
57610            0, 0, 0, 0,
57611            0, 0, 0, 1 << 3 | 1 << 4,
57612        );
57613        let r = _mm512_or_si512(a, b);
57614        #[rustfmt::skip]
57615        let e = _mm512_set_epi32(
57616            1 << 1 | 1 << 2, 0, 0, 0,
57617            0, 0, 0, 0,
57618            0, 0, 0, 0,
57619            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57620        );
57621        assert_eq_m512i(r, e);
57622    }
57623
57624    #[simd_test(enable = "avx512f")]
57625    const fn test_mm512_xor_epi32() {
57626        #[rustfmt::skip]
57627        let a = _mm512_set_epi32(
57628            1 << 1 | 1 << 2, 0, 0, 0,
57629            0, 0, 0, 0,
57630            0, 0, 0, 0,
57631            0, 0, 0, 1 << 1 | 1 << 3,
57632        );
57633        #[rustfmt::skip]
57634        let b = _mm512_set_epi32(
57635            1 << 1, 0, 0, 0,
57636            0, 0, 0, 0,
57637            0, 0, 0, 0,
57638            0, 0, 0, 1 << 3 | 1 << 4,
57639        );
57640        let r = _mm512_xor_epi32(a, b);
57641        #[rustfmt::skip]
57642        let e = _mm512_set_epi32(
57643            1 << 2, 0, 0, 0,
57644            0, 0, 0, 0,
57645            0, 0, 0, 0,
57646            0, 0, 0, 1 << 1 | 1 << 4,
57647        );
57648        assert_eq_m512i(r, e);
57649    }
57650
57651    #[simd_test(enable = "avx512f")]
57652    const fn test_mm512_mask_xor_epi32() {
57653        #[rustfmt::skip]
57654        let a = _mm512_set_epi32(
57655            1 << 1 | 1 << 2, 0, 0, 0,
57656            0, 0, 0, 0,
57657            0, 0, 0, 0,
57658            0, 0, 0, 1 << 1 | 1 << 3,
57659        );
57660        #[rustfmt::skip]
57661        let b = _mm512_set_epi32(
57662            1 << 1, 0, 0, 0,
57663            0, 0, 0, 0,
57664            0, 0, 0, 0,
57665            0, 0, 0, 1 << 3 | 1 << 4,
57666        );
57667        let r = _mm512_mask_xor_epi32(a, 0, a, b);
57668        assert_eq_m512i(r, a);
57669        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
57670        #[rustfmt::skip]
57671        let e = _mm512_set_epi32(
57672            1 << 1 | 1 << 2, 0, 0, 0,
57673            0, 0, 0, 0,
57674            0, 0, 0, 0,
57675            0, 0, 0, 1 << 1 | 1 << 4,
57676        );
57677        assert_eq_m512i(r, e);
57678    }
57679
57680    #[simd_test(enable = "avx512f")]
57681    const fn test_mm512_maskz_xor_epi32() {
57682        #[rustfmt::skip]
57683        let a = _mm512_set_epi32(
57684            1 << 1 | 1 << 2, 0, 0, 0,
57685            0, 0, 0, 0,
57686            0, 0, 0, 0,
57687            0, 0, 0, 1 << 1 | 1 << 3,
57688        );
57689        #[rustfmt::skip]
57690        let b = _mm512_set_epi32(
57691            1 << 1, 0, 0, 0,
57692            0, 0, 0, 0,
57693            0, 0, 0, 0,
57694            0, 0, 0, 1 << 3 | 1 << 4,
57695        );
57696        let r = _mm512_maskz_xor_epi32(0, a, b);
57697        assert_eq_m512i(r, _mm512_setzero_si512());
57698        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
57699        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
57700        assert_eq_m512i(r, e);
57701    }
57702
57703    #[simd_test(enable = "avx512f,avx512vl")]
57704    const fn test_mm256_xor_epi32() {
57705        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57706        let b = _mm256_set1_epi32(1 << 1);
57707        let r = _mm256_xor_epi32(a, b);
57708        let e = _mm256_set1_epi32(1 << 2);
57709        assert_eq_m256i(r, e);
57710    }
57711
57712    #[simd_test(enable = "avx512f,avx512vl")]
57713    const fn test_mm256_mask_xor_epi32() {
57714        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57715        let b = _mm256_set1_epi32(1 << 1);
57716        let r = _mm256_mask_xor_epi32(a, 0, a, b);
57717        assert_eq_m256i(r, a);
57718        let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
57719        let e = _mm256_set1_epi32(1 << 2);
57720        assert_eq_m256i(r, e);
57721    }
57722
57723    #[simd_test(enable = "avx512f,avx512vl")]
57724    const fn test_mm256_maskz_xor_epi32() {
57725        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57726        let b = _mm256_set1_epi32(1 << 1);
57727        let r = _mm256_maskz_xor_epi32(0, a, b);
57728        assert_eq_m256i(r, _mm256_setzero_si256());
57729        let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
57730        let e = _mm256_set1_epi32(1 << 2);
57731        assert_eq_m256i(r, e);
57732    }
57733
57734    #[simd_test(enable = "avx512f,avx512vl")]
57735    const fn test_mm_xor_epi32() {
57736        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57737        let b = _mm_set1_epi32(1 << 1);
57738        let r = _mm_xor_epi32(a, b);
57739        let e = _mm_set1_epi32(1 << 2);
57740        assert_eq_m128i(r, e);
57741    }
57742
57743    #[simd_test(enable = "avx512f,avx512vl")]
57744    const fn test_mm_mask_xor_epi32() {
57745        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57746        let b = _mm_set1_epi32(1 << 1);
57747        let r = _mm_mask_xor_epi32(a, 0, a, b);
57748        assert_eq_m128i(r, a);
57749        let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
57750        let e = _mm_set1_epi32(1 << 2);
57751        assert_eq_m128i(r, e);
57752    }
57753
57754    #[simd_test(enable = "avx512f,avx512vl")]
57755    const fn test_mm_maskz_xor_epi32() {
57756        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57757        let b = _mm_set1_epi32(1 << 1);
57758        let r = _mm_maskz_xor_epi32(0, a, b);
57759        assert_eq_m128i(r, _mm_setzero_si128());
57760        let r = _mm_maskz_xor_epi32(0b00001111, a, b);
57761        let e = _mm_set1_epi32(1 << 2);
57762        assert_eq_m128i(r, e);
57763    }
57764
57765    #[simd_test(enable = "avx512f")]
57766    const fn test_mm512_xor_si512() {
57767        #[rustfmt::skip]
57768        let a = _mm512_set_epi32(
57769            1 << 1 | 1 << 2, 0, 0, 0,
57770            0, 0, 0, 0,
57771            0, 0, 0, 0,
57772            0, 0, 0, 1 << 1 | 1 << 3,
57773        );
57774        #[rustfmt::skip]
57775        let b = _mm512_set_epi32(
57776            1 << 1, 0, 0, 0,
57777            0, 0, 0, 0,
57778            0, 0, 0, 0,
57779            0, 0, 0, 1 << 3 | 1 << 4,
57780        );
57781        let r = _mm512_xor_si512(a, b);
57782        #[rustfmt::skip]
57783        let e = _mm512_set_epi32(
57784            1 << 2, 0, 0, 0,
57785            0, 0, 0, 0,
57786            0, 0, 0, 0,
57787            0, 0, 0, 1 << 1 | 1 << 4,
57788        );
57789        assert_eq_m512i(r, e);
57790    }
57791
57792    #[simd_test(enable = "avx512f")]
57793    const fn test_mm512_andnot_epi32() {
57794        let a = _mm512_set1_epi32(0);
57795        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57796        let r = _mm512_andnot_epi32(a, b);
57797        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
57798        assert_eq_m512i(r, e);
57799    }
57800
57801    #[simd_test(enable = "avx512f")]
57802    const fn test_mm512_mask_andnot_epi32() {
57803        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
57804        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57805        let r = _mm512_mask_andnot_epi32(a, 0, a, b);
57806        assert_eq_m512i(r, a);
57807        let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
57808        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
57809        assert_eq_m512i(r, e);
57810    }
57811
57812    #[simd_test(enable = "avx512f")]
57813    const fn test_mm512_maskz_andnot_epi32() {
57814        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
57815        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57816        let r = _mm512_maskz_andnot_epi32(0, a, b);
57817        assert_eq_m512i(r, _mm512_setzero_si512());
57818        let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
57819        #[rustfmt::skip]
57820        let e = _mm512_set_epi32(
57821            0, 0, 0, 0,
57822            0, 0, 0, 0,
57823            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
57824            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
57825        );
57826        assert_eq_m512i(r, e);
57827    }
57828
57829    #[simd_test(enable = "avx512f,avx512vl")]
57830    const fn test_mm256_mask_andnot_epi32() {
57831        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57832        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
57833        let r = _mm256_mask_andnot_epi32(a, 0, a, b);
57834        assert_eq_m256i(r, a);
57835        let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
57836        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
57837        assert_eq_m256i(r, e);
57838    }
57839
57840    #[simd_test(enable = "avx512f,avx512vl")]
57841    const fn test_mm256_maskz_andnot_epi32() {
57842        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57843        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
57844        let r = _mm256_maskz_andnot_epi32(0, a, b);
57845        assert_eq_m256i(r, _mm256_setzero_si256());
57846        let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
57847        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
57848        assert_eq_m256i(r, e);
57849    }
57850
57851    #[simd_test(enable = "avx512f,avx512vl")]
57852    const fn test_mm_mask_andnot_epi32() {
57853        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57854        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
57855        let r = _mm_mask_andnot_epi32(a, 0, a, b);
57856        assert_eq_m128i(r, a);
57857        let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
57858        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
57859        assert_eq_m128i(r, e);
57860    }
57861
57862    #[simd_test(enable = "avx512f,avx512vl")]
57863    const fn test_mm_maskz_andnot_epi32() {
57864        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57865        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
57866        let r = _mm_maskz_andnot_epi32(0, a, b);
57867        assert_eq_m128i(r, _mm_setzero_si128());
57868        let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
57869        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
57870        assert_eq_m128i(r, e);
57871    }
57872
57873    #[simd_test(enable = "avx512f")]
57874    const fn test_cvtmask16_u32() {
57875        let a: __mmask16 = 0b11001100_00110011;
57876        let r = _cvtmask16_u32(a);
57877        let e: u32 = 0b11001100_00110011;
57878        assert_eq!(r, e);
57879    }
57880
57881    #[simd_test(enable = "avx512f")]
57882    const fn test_cvtu32_mask16() {
57883        let a: u32 = 0b11001100_00110011;
57884        let r = _cvtu32_mask16(a);
57885        let e: __mmask16 = 0b11001100_00110011;
57886        assert_eq!(r, e);
57887    }
57888
57889    #[simd_test(enable = "avx512f")]
57890    const fn test_mm512_kand() {
57891        let a: u16 = 0b11001100_00110011;
57892        let b: u16 = 0b11001100_00110011;
57893        let r = _mm512_kand(a, b);
57894        let e: u16 = 0b11001100_00110011;
57895        assert_eq!(r, e);
57896    }
57897
57898    #[simd_test(enable = "avx512f")]
57899    const fn test_kand_mask16() {
57900        let a: u16 = 0b11001100_00110011;
57901        let b: u16 = 0b11001100_00110011;
57902        let r = _kand_mask16(a, b);
57903        let e: u16 = 0b11001100_00110011;
57904        assert_eq!(r, e);
57905    }
57906
57907    #[simd_test(enable = "avx512f")]
57908    const fn test_mm512_kor() {
57909        let a: u16 = 0b11001100_00110011;
57910        let b: u16 = 0b00101110_00001011;
57911        let r = _mm512_kor(a, b);
57912        let e: u16 = 0b11101110_00111011;
57913        assert_eq!(r, e);
57914    }
57915
57916    #[simd_test(enable = "avx512f")]
57917    const fn test_kor_mask16() {
57918        let a: u16 = 0b11001100_00110011;
57919        let b: u16 = 0b00101110_00001011;
57920        let r = _kor_mask16(a, b);
57921        let e: u16 = 0b11101110_00111011;
57922        assert_eq!(r, e);
57923    }
57924
57925    #[simd_test(enable = "avx512f")]
57926    const fn test_mm512_kxor() {
57927        let a: u16 = 0b11001100_00110011;
57928        let b: u16 = 0b00101110_00001011;
57929        let r = _mm512_kxor(a, b);
57930        let e: u16 = 0b11100010_00111000;
57931        assert_eq!(r, e);
57932    }
57933
57934    #[simd_test(enable = "avx512f")]
57935    const fn test_kxor_mask16() {
57936        let a: u16 = 0b11001100_00110011;
57937        let b: u16 = 0b00101110_00001011;
57938        let r = _kxor_mask16(a, b);
57939        let e: u16 = 0b11100010_00111000;
57940        assert_eq!(r, e);
57941    }
57942
57943    #[simd_test(enable = "avx512f")]
57944    const fn test_mm512_knot() {
57945        let a: u16 = 0b11001100_00110011;
57946        let r = _mm512_knot(a);
57947        let e: u16 = 0b00110011_11001100;
57948        assert_eq!(r, e);
57949    }
57950
57951    #[simd_test(enable = "avx512f")]
57952    const fn test_knot_mask16() {
57953        let a: u16 = 0b11001100_00110011;
57954        let r = _knot_mask16(a);
57955        let e: u16 = 0b00110011_11001100;
57956        assert_eq!(r, e);
57957    }
57958
57959    #[simd_test(enable = "avx512f")]
57960    const fn test_mm512_kandn() {
57961        let a: u16 = 0b11001100_00110011;
57962        let b: u16 = 0b00101110_00001011;
57963        let r = _mm512_kandn(a, b);
57964        let e: u16 = 0b00100010_00001000;
57965        assert_eq!(r, e);
57966    }
57967
57968    #[simd_test(enable = "avx512f")]
57969    const fn test_kandn_mask16() {
57970        let a: u16 = 0b11001100_00110011;
57971        let b: u16 = 0b00101110_00001011;
57972        let r = _kandn_mask16(a, b);
57973        let e: u16 = 0b00100010_00001000;
57974        assert_eq!(r, e);
57975    }
57976
57977    #[simd_test(enable = "avx512f")]
57978    const fn test_mm512_kxnor() {
57979        let a: u16 = 0b11001100_00110011;
57980        let b: u16 = 0b00101110_00001011;
57981        let r = _mm512_kxnor(a, b);
57982        let e: u16 = 0b00011101_11000111;
57983        assert_eq!(r, e);
57984    }
57985
57986    #[simd_test(enable = "avx512f")]
57987    const fn test_kxnor_mask16() {
57988        let a: u16 = 0b11001100_00110011;
57989        let b: u16 = 0b00101110_00001011;
57990        let r = _kxnor_mask16(a, b);
57991        let e: u16 = 0b00011101_11000111;
57992        assert_eq!(r, e);
57993    }
57994
57995    #[simd_test(enable = "avx512f")]
57996    const fn test_kortest_mask16_u8() {
57997        let a: __mmask16 = 0b0110100101101001;
57998        let b: __mmask16 = 0b1011011010110110;
57999        let mut all_ones: u8 = 0;
58000        let r = unsafe { _kortest_mask16_u8(a, b, &mut all_ones) };
58001        assert_eq!(r, 0);
58002        assert_eq!(all_ones, 1);
58003    }
58004
58005    #[simd_test(enable = "avx512f")]
58006    const fn test_kortestc_mask16_u8() {
58007        let a: __mmask16 = 0b0110100101101001;
58008        let b: __mmask16 = 0b1011011010110110;
58009        let r = _kortestc_mask16_u8(a, b);
58010        assert_eq!(r, 1);
58011    }
58012
58013    #[simd_test(enable = "avx512f")]
58014    const fn test_kortestz_mask16_u8() {
58015        let a: __mmask16 = 0b0110100101101001;
58016        let b: __mmask16 = 0b1011011010110110;
58017        let r = _kortestz_mask16_u8(a, b);
58018        assert_eq!(r, 0);
58019    }
58020
58021    #[simd_test(enable = "avx512f")]
58022    const fn test_kshiftli_mask16() {
58023        let a: __mmask16 = 0b1001011011000011;
58024        let r = _kshiftli_mask16::<3>(a);
58025        let e: __mmask16 = 0b1011011000011000;
58026        assert_eq!(r, e);
58027
58028        let r = _kshiftli_mask16::<15>(a);
58029        let e: __mmask16 = 0b1000000000000000;
58030        assert_eq!(r, e);
58031
58032        let r = _kshiftli_mask16::<16>(a);
58033        let e: __mmask16 = 0b0000000000000000;
58034        assert_eq!(r, e);
58035
58036        let r = _kshiftli_mask16::<17>(a);
58037        let e: __mmask16 = 0b0000000000000000;
58038        assert_eq!(r, e);
58039    }
58040
58041    #[simd_test(enable = "avx512f")]
58042    const fn test_kshiftri_mask16() {
58043        let a: __mmask16 = 0b1010100100111100;
58044        let r = _kshiftri_mask16::<3>(a);
58045        let e: __mmask16 = 0b0001010100100111;
58046        assert_eq!(r, e);
58047
58048        let r = _kshiftri_mask16::<15>(a);
58049        let e: __mmask16 = 0b0000000000000001;
58050        assert_eq!(r, e);
58051
58052        let r = _kshiftri_mask16::<16>(a);
58053        let e: __mmask16 = 0b0000000000000000;
58054        assert_eq!(r, e);
58055
58056        let r = _kshiftri_mask16::<17>(a);
58057        let e: __mmask16 = 0b0000000000000000;
58058        assert_eq!(r, e);
58059    }
58060
58061    #[simd_test(enable = "avx512f")]
58062    const fn test_load_mask16() {
58063        let a: __mmask16 = 0b1001011011000011;
58064        let r = unsafe { _load_mask16(&a) };
58065        let e: __mmask16 = 0b1001011011000011;
58066        assert_eq!(r, e);
58067    }
58068
58069    #[simd_test(enable = "avx512f")]
58070    const fn test_store_mask16() {
58071        let a: __mmask16 = 0b0110100100111100;
58072        let mut r = 0;
58073        unsafe {
58074            _store_mask16(&mut r, a);
58075        }
58076        let e: __mmask16 = 0b0110100100111100;
58077        assert_eq!(r, e);
58078    }
58079
58080    #[simd_test(enable = "avx512f")]
58081    const fn test_mm512_kmov() {
58082        let a: u16 = 0b11001100_00110011;
58083        let r = _mm512_kmov(a);
58084        let e: u16 = 0b11001100_00110011;
58085        assert_eq!(r, e);
58086    }
58087
58088    #[simd_test(enable = "avx512f")]
58089    const fn test_mm512_int2mask() {
58090        let a: i32 = 0b11001100_00110011;
58091        let r = _mm512_int2mask(a);
58092        let e: u16 = 0b11001100_00110011;
58093        assert_eq!(r, e);
58094    }
58095
58096    #[simd_test(enable = "avx512f")]
58097    const fn test_mm512_mask2int() {
58098        let k1: __mmask16 = 0b11001100_00110011;
58099        let r = _mm512_mask2int(k1);
58100        let e: i32 = 0b11001100_00110011;
58101        assert_eq!(r, e);
58102    }
58103
58104    #[simd_test(enable = "avx512f")]
58105    const fn test_mm512_kunpackb() {
58106        let a: u16 = 0b11001100_00110011;
58107        let b: u16 = 0b00101110_00001011;
58108        let r = _mm512_kunpackb(a, b);
58109        let e: u16 = 0b00110011_00001011;
58110        assert_eq!(r, e);
58111    }
58112
58113    #[simd_test(enable = "avx512f")]
58114    const fn test_mm512_kortestc() {
58115        let a: u16 = 0b11001100_00110011;
58116        let b: u16 = 0b00101110_00001011;
58117        let r = _mm512_kortestc(a, b);
58118        assert_eq!(r, 0);
58119        let b: u16 = 0b11111111_11111111;
58120        let r = _mm512_kortestc(a, b);
58121        assert_eq!(r, 1);
58122    }
58123
58124    #[simd_test(enable = "avx512f")]
58125    const fn test_mm512_kortestz() {
58126        let a: u16 = 0b11001100_00110011;
58127        let b: u16 = 0b00101110_00001011;
58128        let r = _mm512_kortestz(a, b);
58129        assert_eq!(r, 0);
58130        let r = _mm512_kortestz(0, 0);
58131        assert_eq!(r, 1);
58132    }
58133
58134    #[simd_test(enable = "avx512f")]
58135    const fn test_mm512_test_epi32_mask() {
58136        let a = _mm512_set1_epi32(1 << 0);
58137        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58138        let r = _mm512_test_epi32_mask(a, b);
58139        let e: __mmask16 = 0b11111111_11111111;
58140        assert_eq!(r, e);
58141    }
58142
58143    #[simd_test(enable = "avx512f")]
58144    const fn test_mm512_mask_test_epi32_mask() {
58145        let a = _mm512_set1_epi32(1 << 0);
58146        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58147        let r = _mm512_mask_test_epi32_mask(0, a, b);
58148        assert_eq!(r, 0);
58149        let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
58150        let e: __mmask16 = 0b11111111_11111111;
58151        assert_eq!(r, e);
58152    }
58153
58154    #[simd_test(enable = "avx512f,avx512vl")]
58155    const fn test_mm256_test_epi32_mask() {
58156        let a = _mm256_set1_epi32(1 << 0);
58157        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
58158        let r = _mm256_test_epi32_mask(a, b);
58159        let e: __mmask8 = 0b11111111;
58160        assert_eq!(r, e);
58161    }
58162
58163    #[simd_test(enable = "avx512f,avx512vl")]
58164    const fn test_mm256_mask_test_epi32_mask() {
58165        let a = _mm256_set1_epi32(1 << 0);
58166        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
58167        let r = _mm256_mask_test_epi32_mask(0, a, b);
58168        assert_eq!(r, 0);
58169        let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
58170        let e: __mmask8 = 0b11111111;
58171        assert_eq!(r, e);
58172    }
58173
58174    #[simd_test(enable = "avx512f,avx512vl")]
58175    const fn test_mm_test_epi32_mask() {
58176        let a = _mm_set1_epi32(1 << 0);
58177        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
58178        let r = _mm_test_epi32_mask(a, b);
58179        let e: __mmask8 = 0b00001111;
58180        assert_eq!(r, e);
58181    }
58182
58183    #[simd_test(enable = "avx512f,avx512vl")]
58184    const fn test_mm_mask_test_epi32_mask() {
58185        let a = _mm_set1_epi32(1 << 0);
58186        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
58187        let r = _mm_mask_test_epi32_mask(0, a, b);
58188        assert_eq!(r, 0);
58189        let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
58190        let e: __mmask8 = 0b00001111;
58191        assert_eq!(r, e);
58192    }
58193
58194    #[simd_test(enable = "avx512f")]
58195    const fn test_mm512_testn_epi32_mask() {
58196        let a = _mm512_set1_epi32(1 << 0);
58197        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58198        let r = _mm512_testn_epi32_mask(a, b);
58199        let e: __mmask16 = 0b00000000_00000000;
58200        assert_eq!(r, e);
58201    }
58202
58203    #[simd_test(enable = "avx512f")]
58204    const fn test_mm512_mask_testn_epi32_mask() {
58205        let a = _mm512_set1_epi32(1 << 0);
58206        let b = _mm512_set1_epi32(1 << 1);
58207        let r = _mm512_mask_test_epi32_mask(0, a, b);
58208        assert_eq!(r, 0);
58209        let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
58210        let e: __mmask16 = 0b11111111_11111111;
58211        assert_eq!(r, e);
58212    }
58213
58214    #[simd_test(enable = "avx512f,avx512vl")]
58215    const fn test_mm256_testn_epi32_mask() {
58216        let a = _mm256_set1_epi32(1 << 0);
58217        let b = _mm256_set1_epi32(1 << 1);
58218        let r = _mm256_testn_epi32_mask(a, b);
58219        let e: __mmask8 = 0b11111111;
58220        assert_eq!(r, e);
58221    }
58222
58223    #[simd_test(enable = "avx512f,avx512vl")]
58224    const fn test_mm256_mask_testn_epi32_mask() {
58225        let a = _mm256_set1_epi32(1 << 0);
58226        let b = _mm256_set1_epi32(1 << 1);
58227        let r = _mm256_mask_test_epi32_mask(0, a, b);
58228        assert_eq!(r, 0);
58229        let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
58230        let e: __mmask8 = 0b11111111;
58231        assert_eq!(r, e);
58232    }
58233
58234    #[simd_test(enable = "avx512f,avx512vl")]
58235    const fn test_mm_testn_epi32_mask() {
58236        let a = _mm_set1_epi32(1 << 0);
58237        let b = _mm_set1_epi32(1 << 1);
58238        let r = _mm_testn_epi32_mask(a, b);
58239        let e: __mmask8 = 0b00001111;
58240        assert_eq!(r, e);
58241    }
58242
58243    #[simd_test(enable = "avx512f,avx512vl")]
58244    const fn test_mm_mask_testn_epi32_mask() {
58245        let a = _mm_set1_epi32(1 << 0);
58246        let b = _mm_set1_epi32(1 << 1);
58247        let r = _mm_mask_test_epi32_mask(0, a, b);
58248        assert_eq!(r, 0);
58249        let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
58250        let e: __mmask8 = 0b00001111;
58251        assert_eq!(r, e);
58252    }
58253
58254    #[simd_test(enable = "avx512f")]
58255    #[cfg_attr(miri, ignore)]
58256    fn test_mm512_stream_ps() {
58257        #[repr(align(64))]
58258        struct Memory {
58259            pub data: [f32; 16], // 64 bytes
58260        }
58261        let a = _mm512_set1_ps(7.0);
58262        let mut mem = Memory { data: [-1.0; 16] };
58263
58264        unsafe {
58265            _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
58266        }
58267        _mm_sfence();
58268        for i in 0..16 {
58269            assert_eq!(mem.data[i], get_m512(a, i));
58270        }
58271    }
58272
58273    #[simd_test(enable = "avx512f")]
58274    #[cfg_attr(miri, ignore)]
58275    fn test_mm512_stream_pd() {
58276        #[repr(align(64))]
58277        struct Memory {
58278            pub data: [f64; 8],
58279        }
58280        let a = _mm512_set1_pd(7.0);
58281        let mut mem = Memory { data: [-1.0; 8] };
58282
58283        unsafe {
58284            _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
58285        }
58286        _mm_sfence();
58287        for i in 0..8 {
58288            assert_eq!(mem.data[i], get_m512d(a, i));
58289        }
58290    }
58291
58292    #[simd_test(enable = "avx512f")]
58293    #[cfg_attr(miri, ignore)]
58294    fn test_mm512_stream_si512() {
58295        #[repr(align(64))]
58296        struct Memory {
58297            pub data: [i64; 8],
58298        }
58299        let a = _mm512_set1_epi32(7);
58300        let mut mem = Memory { data: [-1; 8] };
58301
58302        unsafe {
58303            _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
58304        }
58305        _mm_sfence();
58306        for i in 0..8 {
58307            assert_eq!(mem.data[i], get_m512i(a, i));
58308        }
58309    }
58310
58311    #[simd_test(enable = "avx512f")]
58312    fn test_mm512_stream_load_si512() {
58313        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
58314        let r = unsafe { _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _) };
58315        assert_eq_m512i(a, r);
58316    }
58317
58318    #[simd_test(enable = "avx512f")]
58319    const fn test_mm512_reduce_add_epi32() {
58320        let a = _mm512_set1_epi32(1);
58321        let e: i32 = _mm512_reduce_add_epi32(a);
58322        assert_eq!(16, e);
58323    }
58324
58325    #[simd_test(enable = "avx512f")]
58326    const fn test_mm512_mask_reduce_add_epi32() {
58327        let a = _mm512_set1_epi32(1);
58328        let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
58329        assert_eq!(8, e);
58330    }
58331
58332    #[simd_test(enable = "avx512f")]
58333    const fn test_mm512_reduce_add_ps() {
58334        let a = _mm512_set1_ps(1.);
58335        let e: f32 = _mm512_reduce_add_ps(a);
58336        assert_eq!(16., e);
58337    }
58338
58339    #[simd_test(enable = "avx512f")]
58340    const fn test_mm512_mask_reduce_add_ps() {
58341        let a = _mm512_set1_ps(1.);
58342        let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
58343        assert_eq!(8., e);
58344    }
58345
58346    #[simd_test(enable = "avx512f")]
58347    const fn test_mm512_reduce_mul_epi32() {
58348        let a = _mm512_set1_epi32(2);
58349        let e: i32 = _mm512_reduce_mul_epi32(a);
58350        assert_eq!(65536, e);
58351    }
58352
58353    #[simd_test(enable = "avx512f")]
58354    const fn test_mm512_mask_reduce_mul_epi32() {
58355        let a = _mm512_set1_epi32(2);
58356        let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
58357        assert_eq!(256, e);
58358    }
58359
58360    #[simd_test(enable = "avx512f")]
58361    const fn test_mm512_reduce_mul_ps() {
58362        let a = _mm512_set1_ps(2.);
58363        let e: f32 = _mm512_reduce_mul_ps(a);
58364        assert_eq!(65536., e);
58365    }
58366
58367    #[simd_test(enable = "avx512f")]
58368    const fn test_mm512_mask_reduce_mul_ps() {
58369        let a = _mm512_set1_ps(2.);
58370        let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
58371        assert_eq!(256., e);
58372    }
58373
58374    #[simd_test(enable = "avx512f")]
58375    const fn test_mm512_reduce_max_epi32() {
58376        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58377        let e: i32 = _mm512_reduce_max_epi32(a);
58378        assert_eq!(15, e);
58379    }
58380
58381    #[simd_test(enable = "avx512f")]
58382    const fn test_mm512_mask_reduce_max_epi32() {
58383        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58384        let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
58385        assert_eq!(7, e);
58386    }
58387
58388    #[simd_test(enable = "avx512f")]
58389    const fn test_mm512_reduce_max_epu32() {
58390        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58391        let e: u32 = _mm512_reduce_max_epu32(a);
58392        assert_eq!(15, e);
58393    }
58394
58395    #[simd_test(enable = "avx512f")]
58396    const fn test_mm512_mask_reduce_max_epu32() {
58397        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58398        let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
58399        assert_eq!(7, e);
58400    }
58401
58402    #[simd_test(enable = "avx512f")]
58403    fn test_mm512_reduce_max_ps() {
58404        let a = _mm512_set_ps(
58405            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58406        );
58407        let e: f32 = _mm512_reduce_max_ps(a);
58408        assert_eq!(15., e);
58409    }
58410
58411    #[simd_test(enable = "avx512f")]
58412    fn test_mm512_mask_reduce_max_ps() {
58413        let a = _mm512_set_ps(
58414            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58415        );
58416        let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
58417        assert_eq!(7., e);
58418    }
58419
58420    #[simd_test(enable = "avx512f")]
58421    const fn test_mm512_reduce_min_epi32() {
58422        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58423        let e: i32 = _mm512_reduce_min_epi32(a);
58424        assert_eq!(0, e);
58425    }
58426
58427    #[simd_test(enable = "avx512f")]
58428    const fn test_mm512_mask_reduce_min_epi32() {
58429        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58430        let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
58431        assert_eq!(0, e);
58432    }
58433
58434    #[simd_test(enable = "avx512f")]
58435    const fn test_mm512_reduce_min_epu32() {
58436        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58437        let e: u32 = _mm512_reduce_min_epu32(a);
58438        assert_eq!(0, e);
58439    }
58440
58441    #[simd_test(enable = "avx512f")]
58442    const fn test_mm512_mask_reduce_min_epu32() {
58443        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58444        let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
58445        assert_eq!(0, e);
58446    }
58447
58448    #[simd_test(enable = "avx512f")]
58449    fn test_mm512_reduce_min_ps() {
58450        let a = _mm512_set_ps(
58451            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58452        );
58453        let e: f32 = _mm512_reduce_min_ps(a);
58454        assert_eq!(0., e);
58455    }
58456
58457    #[simd_test(enable = "avx512f")]
58458    fn test_mm512_mask_reduce_min_ps() {
58459        let a = _mm512_set_ps(
58460            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58461        );
58462        let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
58463        assert_eq!(0., e);
58464    }
58465
58466    #[simd_test(enable = "avx512f")]
58467    const fn test_mm512_reduce_and_epi32() {
58468        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58469        let e: i32 = _mm512_reduce_and_epi32(a);
58470        assert_eq!(0, e);
58471    }
58472
58473    #[simd_test(enable = "avx512f")]
58474    const fn test_mm512_mask_reduce_and_epi32() {
58475        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58476        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
58477        assert_eq!(1, e);
58478    }
58479
58480    #[simd_test(enable = "avx512f")]
58481    const fn test_mm512_reduce_or_epi32() {
58482        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58483        let e: i32 = _mm512_reduce_or_epi32(a);
58484        assert_eq!(3, e);
58485    }
58486
58487    #[simd_test(enable = "avx512f")]
58488    const fn test_mm512_mask_reduce_or_epi32() {
58489        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58490        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
58491        assert_eq!(1, e);
58492    }
58493
58494    #[simd_test(enable = "avx512f")]
58495    fn test_mm512_mask_compress_epi32() {
58496        let src = _mm512_set1_epi32(200);
58497        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58498        let r = _mm512_mask_compress_epi32(src, 0, a);
58499        assert_eq_m512i(r, src);
58500        let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
58501        let e = _mm512_set_epi32(
58502            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
58503        );
58504        assert_eq_m512i(r, e);
58505    }
58506
58507    #[simd_test(enable = "avx512f")]
58508    fn test_mm512_maskz_compress_epi32() {
58509        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58510        let r = _mm512_maskz_compress_epi32(0, a);
58511        assert_eq_m512i(r, _mm512_setzero_si512());
58512        let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
58513        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
58514        assert_eq_m512i(r, e);
58515    }
58516
58517    #[simd_test(enable = "avx512f,avx512vl")]
58518    fn test_mm256_mask_compress_epi32() {
58519        let src = _mm256_set1_epi32(200);
58520        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58521        let r = _mm256_mask_compress_epi32(src, 0, a);
58522        assert_eq_m256i(r, src);
58523        let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
58524        let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
58525        assert_eq_m256i(r, e);
58526    }
58527
58528    #[simd_test(enable = "avx512f,avx512vl")]
58529    fn test_mm256_maskz_compress_epi32() {
58530        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58531        let r = _mm256_maskz_compress_epi32(0, a);
58532        assert_eq_m256i(r, _mm256_setzero_si256());
58533        let r = _mm256_maskz_compress_epi32(0b01010101, a);
58534        let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
58535        assert_eq_m256i(r, e);
58536    }
58537
58538    #[simd_test(enable = "avx512f,avx512vl")]
58539    fn test_mm_mask_compress_epi32() {
58540        let src = _mm_set1_epi32(200);
58541        let a = _mm_set_epi32(0, 1, 2, 3);
58542        let r = _mm_mask_compress_epi32(src, 0, a);
58543        assert_eq_m128i(r, src);
58544        let r = _mm_mask_compress_epi32(src, 0b00000101, a);
58545        let e = _mm_set_epi32(200, 200, 1, 3);
58546        assert_eq_m128i(r, e);
58547    }
58548
58549    #[simd_test(enable = "avx512f,avx512vl")]
58550    fn test_mm_maskz_compress_epi32() {
58551        let a = _mm_set_epi32(0, 1, 2, 3);
58552        let r = _mm_maskz_compress_epi32(0, a);
58553        assert_eq_m128i(r, _mm_setzero_si128());
58554        let r = _mm_maskz_compress_epi32(0b00000101, a);
58555        let e = _mm_set_epi32(0, 0, 1, 3);
58556        assert_eq_m128i(r, e);
58557    }
58558
58559    #[simd_test(enable = "avx512f")]
58560    fn test_mm512_mask_compress_ps() {
58561        let src = _mm512_set1_ps(200.);
58562        let a = _mm512_set_ps(
58563            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58564        );
58565        let r = _mm512_mask_compress_ps(src, 0, a);
58566        assert_eq_m512(r, src);
58567        let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
58568        let e = _mm512_set_ps(
58569            200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
58570        );
58571        assert_eq_m512(r, e);
58572    }
58573
58574    #[simd_test(enable = "avx512f")]
58575    fn test_mm512_maskz_compress_ps() {
58576        let a = _mm512_set_ps(
58577            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58578        );
58579        let r = _mm512_maskz_compress_ps(0, a);
58580        assert_eq_m512(r, _mm512_setzero_ps());
58581        let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
58582        let e = _mm512_set_ps(
58583            0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
58584        );
58585        assert_eq_m512(r, e);
58586    }
58587
58588    #[simd_test(enable = "avx512f,avx512vl")]
58589    fn test_mm256_mask_compress_ps() {
58590        let src = _mm256_set1_ps(200.);
58591        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58592        let r = _mm256_mask_compress_ps(src, 0, a);
58593        assert_eq_m256(r, src);
58594        let r = _mm256_mask_compress_ps(src, 0b01010101, a);
58595        let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
58596        assert_eq_m256(r, e);
58597    }
58598
58599    #[simd_test(enable = "avx512f,avx512vl")]
58600    fn test_mm256_maskz_compress_ps() {
58601        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58602        let r = _mm256_maskz_compress_ps(0, a);
58603        assert_eq_m256(r, _mm256_setzero_ps());
58604        let r = _mm256_maskz_compress_ps(0b01010101, a);
58605        let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
58606        assert_eq_m256(r, e);
58607    }
58608
58609    #[simd_test(enable = "avx512f,avx512vl")]
58610    fn test_mm_mask_compress_ps() {
58611        let src = _mm_set1_ps(200.);
58612        let a = _mm_set_ps(0., 1., 2., 3.);
58613        let r = _mm_mask_compress_ps(src, 0, a);
58614        assert_eq_m128(r, src);
58615        let r = _mm_mask_compress_ps(src, 0b00000101, a);
58616        let e = _mm_set_ps(200., 200., 1., 3.);
58617        assert_eq_m128(r, e);
58618    }
58619
58620    #[simd_test(enable = "avx512f,avx512vl")]
58621    fn test_mm_maskz_compress_ps() {
58622        let a = _mm_set_ps(0., 1., 2., 3.);
58623        let r = _mm_maskz_compress_ps(0, a);
58624        assert_eq_m128(r, _mm_setzero_ps());
58625        let r = _mm_maskz_compress_ps(0b00000101, a);
58626        let e = _mm_set_ps(0., 0., 1., 3.);
58627        assert_eq_m128(r, e);
58628    }
58629
58630    #[simd_test(enable = "avx512f")]
58631    fn test_mm512_mask_compressstoreu_epi32() {
58632        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
58633        let mut r = [0_i32; 16];
58634        unsafe {
58635            _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58636        }
58637        assert_eq!(&r, &[0_i32; 16]);
58638        unsafe {
58639            _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
58640        }
58641        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
58642    }
58643
58644    #[simd_test(enable = "avx512f,avx512vl")]
58645    fn test_mm256_mask_compressstoreu_epi32() {
58646        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
58647        let mut r = [0_i32; 8];
58648        unsafe {
58649            _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58650        }
58651        assert_eq!(&r, &[0_i32; 8]);
58652        unsafe {
58653            _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
58654        }
58655        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
58656    }
58657
58658    #[simd_test(enable = "avx512f,avx512vl")]
58659    fn test_mm_mask_compressstoreu_epi32() {
58660        let a = _mm_setr_epi32(1, 2, 3, 4);
58661        let mut r = [0_i32; 4];
58662        unsafe {
58663            _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58664        }
58665        assert_eq!(&r, &[0_i32; 4]);
58666        unsafe {
58667            _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
58668        }
58669        assert_eq!(&r, &[1, 2, 4, 0]);
58670    }
58671
58672    #[simd_test(enable = "avx512f")]
58673    fn test_mm512_mask_compressstoreu_epi64() {
58674        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
58675        let mut r = [0_i64; 8];
58676        unsafe {
58677            _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58678        }
58679        assert_eq!(&r, &[0_i64; 8]);
58680        unsafe {
58681            _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
58682        }
58683        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
58684    }
58685
58686    #[simd_test(enable = "avx512f,avx512vl")]
58687    fn test_mm256_mask_compressstoreu_epi64() {
58688        let a = _mm256_setr_epi64x(1, 2, 3, 4);
58689        let mut r = [0_i64; 4];
58690        unsafe {
58691            _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58692        }
58693        assert_eq!(&r, &[0_i64; 4]);
58694        unsafe {
58695            _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
58696        }
58697        assert_eq!(&r, &[1, 2, 4, 0]);
58698    }
58699
58700    #[simd_test(enable = "avx512f,avx512vl")]
58701    fn test_mm_mask_compressstoreu_epi64() {
58702        let a = _mm_setr_epi64x(1, 2);
58703        let mut r = [0_i64; 2];
58704        unsafe {
58705            _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58706        }
58707        assert_eq!(&r, &[0_i64; 2]);
58708        unsafe {
58709            _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
58710        }
58711        assert_eq!(&r, &[2, 0]);
58712    }
58713
58714    #[simd_test(enable = "avx512f")]
58715    fn test_mm512_mask_compressstoreu_ps() {
58716        let a = _mm512_setr_ps(
58717            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
58718            13_f32, 14_f32, 15_f32, 16_f32,
58719        );
58720        let mut r = [0_f32; 16];
58721        unsafe {
58722            _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58723        }
58724        assert_eq!(&r, &[0_f32; 16]);
58725        unsafe {
58726            _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
58727        }
58728        assert_eq!(
58729            &r,
58730            &[
58731                2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
58732                0_f32, 0_f32, 0_f32, 0_f32, 0_f32
58733            ]
58734        );
58735    }
58736
58737    #[simd_test(enable = "avx512f,avx512vl")]
58738    fn test_mm256_mask_compressstoreu_ps() {
58739        let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
58740        let mut r = [0_f32; 8];
58741        unsafe {
58742            _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58743        }
58744        assert_eq!(&r, &[0_f32; 8]);
58745        unsafe {
58746            _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
58747        }
58748        assert_eq!(
58749            &r,
58750            &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
58751        );
58752    }
58753
58754    #[simd_test(enable = "avx512f,avx512vl")]
58755    fn test_mm_mask_compressstoreu_ps() {
58756        let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
58757        let mut r = [0.; 4];
58758        unsafe {
58759            _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58760        }
58761        assert_eq!(&r, &[0.; 4]);
58762        unsafe {
58763            _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
58764        }
58765        assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
58766    }
58767
58768    #[simd_test(enable = "avx512f")]
58769    fn test_mm512_mask_compressstoreu_pd() {
58770        let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
58771        let mut r = [0.; 8];
58772        unsafe {
58773            _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58774        }
58775        assert_eq!(&r, &[0.; 8]);
58776        unsafe {
58777            _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
58778        }
58779        assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
58780    }
58781
58782    #[simd_test(enable = "avx512f,avx512vl")]
58783    fn test_mm256_mask_compressstoreu_pd() {
58784        let a = _mm256_setr_pd(1., 2., 3., 4.);
58785        let mut r = [0.; 4];
58786        unsafe {
58787            _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58788        }
58789        assert_eq!(&r, &[0.; 4]);
58790        unsafe {
58791            _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
58792        }
58793        assert_eq!(&r, &[1., 2., 4., 0.]);
58794    }
58795
58796    #[simd_test(enable = "avx512f,avx512vl")]
58797    fn test_mm_mask_compressstoreu_pd() {
58798        let a = _mm_setr_pd(1., 2.);
58799        let mut r = [0.; 2];
58800        unsafe {
58801            _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58802        }
58803        assert_eq!(&r, &[0.; 2]);
58804        unsafe {
58805            _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
58806        }
58807        assert_eq!(&r, &[2., 0.]);
58808    }
58809
58810    #[simd_test(enable = "avx512f")]
58811    fn test_mm512_mask_expand_epi32() {
58812        let src = _mm512_set1_epi32(200);
58813        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58814        let r = _mm512_mask_expand_epi32(src, 0, a);
58815        assert_eq_m512i(r, src);
58816        let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
58817        let e = _mm512_set_epi32(
58818            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
58819        );
58820        assert_eq_m512i(r, e);
58821    }
58822
58823    #[simd_test(enable = "avx512f")]
58824    fn test_mm512_maskz_expand_epi32() {
58825        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58826        let r = _mm512_maskz_expand_epi32(0, a);
58827        assert_eq_m512i(r, _mm512_setzero_si512());
58828        let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
58829        let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
58830        assert_eq_m512i(r, e);
58831    }
58832
58833    #[simd_test(enable = "avx512f,avx512vl")]
58834    fn test_mm256_mask_expand_epi32() {
58835        let src = _mm256_set1_epi32(200);
58836        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58837        let r = _mm256_mask_expand_epi32(src, 0, a);
58838        assert_eq_m256i(r, src);
58839        let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
58840        let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
58841        assert_eq_m256i(r, e);
58842    }
58843
58844    #[simd_test(enable = "avx512f,avx512vl")]
58845    fn test_mm256_maskz_expand_epi32() {
58846        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58847        let r = _mm256_maskz_expand_epi32(0, a);
58848        assert_eq_m256i(r, _mm256_setzero_si256());
58849        let r = _mm256_maskz_expand_epi32(0b01010101, a);
58850        let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
58851        assert_eq_m256i(r, e);
58852    }
58853
58854    #[simd_test(enable = "avx512f,avx512vl")]
58855    fn test_mm_mask_expand_epi32() {
58856        let src = _mm_set1_epi32(200);
58857        let a = _mm_set_epi32(0, 1, 2, 3);
58858        let r = _mm_mask_expand_epi32(src, 0, a);
58859        assert_eq_m128i(r, src);
58860        let r = _mm_mask_expand_epi32(src, 0b00000101, a);
58861        let e = _mm_set_epi32(200, 2, 200, 3);
58862        assert_eq_m128i(r, e);
58863    }
58864
58865    #[simd_test(enable = "avx512f,avx512vl")]
58866    fn test_mm_maskz_expand_epi32() {
58867        let a = _mm_set_epi32(0, 1, 2, 3);
58868        let r = _mm_maskz_expand_epi32(0, a);
58869        assert_eq_m128i(r, _mm_setzero_si128());
58870        let r = _mm_maskz_expand_epi32(0b00000101, a);
58871        let e = _mm_set_epi32(0, 2, 0, 3);
58872        assert_eq_m128i(r, e);
58873    }
58874
58875    #[simd_test(enable = "avx512f")]
58876    fn test_mm512_mask_expand_ps() {
58877        let src = _mm512_set1_ps(200.);
58878        let a = _mm512_set_ps(
58879            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58880        );
58881        let r = _mm512_mask_expand_ps(src, 0, a);
58882        assert_eq_m512(r, src);
58883        let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
58884        let e = _mm512_set_ps(
58885            200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
58886        );
58887        assert_eq_m512(r, e);
58888    }
58889
58890    #[simd_test(enable = "avx512f")]
58891    fn test_mm512_maskz_expand_ps() {
58892        let a = _mm512_set_ps(
58893            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58894        );
58895        let r = _mm512_maskz_expand_ps(0, a);
58896        assert_eq_m512(r, _mm512_setzero_ps());
58897        let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
58898        let e = _mm512_set_ps(
58899            0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
58900        );
58901        assert_eq_m512(r, e);
58902    }
58903
58904    #[simd_test(enable = "avx512f,avx512vl")]
58905    fn test_mm256_mask_expand_ps() {
58906        let src = _mm256_set1_ps(200.);
58907        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58908        let r = _mm256_mask_expand_ps(src, 0, a);
58909        assert_eq_m256(r, src);
58910        let r = _mm256_mask_expand_ps(src, 0b01010101, a);
58911        let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
58912        assert_eq_m256(r, e);
58913    }
58914
58915    #[simd_test(enable = "avx512f,avx512vl")]
58916    fn test_mm256_maskz_expand_ps() {
58917        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58918        let r = _mm256_maskz_expand_ps(0, a);
58919        assert_eq_m256(r, _mm256_setzero_ps());
58920        let r = _mm256_maskz_expand_ps(0b01010101, a);
58921        let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
58922        assert_eq_m256(r, e);
58923    }
58924
58925    #[simd_test(enable = "avx512f,avx512vl")]
58926    fn test_mm_mask_expand_ps() {
58927        let src = _mm_set1_ps(200.);
58928        let a = _mm_set_ps(0., 1., 2., 3.);
58929        let r = _mm_mask_expand_ps(src, 0, a);
58930        assert_eq_m128(r, src);
58931        let r = _mm_mask_expand_ps(src, 0b00000101, a);
58932        let e = _mm_set_ps(200., 2., 200., 3.);
58933        assert_eq_m128(r, e);
58934    }
58935
58936    #[simd_test(enable = "avx512f,avx512vl")]
58937    fn test_mm_maskz_expand_ps() {
58938        let a = _mm_set_ps(0., 1., 2., 3.);
58939        let r = _mm_maskz_expand_ps(0, a);
58940        assert_eq_m128(r, _mm_setzero_ps());
58941        let r = _mm_maskz_expand_ps(0b00000101, a);
58942        let e = _mm_set_ps(0., 2., 0., 3.);
58943        assert_eq_m128(r, e);
58944    }
58945
58946    #[simd_test(enable = "avx512f")]
58947    const fn test_mm512_loadu_epi32() {
58948        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
58949        let p = a.as_ptr();
58950        let r = unsafe { _mm512_loadu_epi32(black_box(p)) };
58951        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
58952        assert_eq_m512i(r, e);
58953    }
58954
58955    #[simd_test(enable = "avx512f,avx512vl")]
58956    const fn test_mm256_loadu_epi32() {
58957        let a = &[4, 3, 2, 5, 8, 9, 64, 50];
58958        let p = a.as_ptr();
58959        let r = unsafe { _mm256_loadu_epi32(black_box(p)) };
58960        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
58961        assert_eq_m256i(r, e);
58962    }
58963
58964    #[simd_test(enable = "avx512f,avx512vl")]
58965    const fn test_mm_loadu_epi32() {
58966        let a = &[4, 3, 2, 5];
58967        let p = a.as_ptr();
58968        let r = unsafe { _mm_loadu_epi32(black_box(p)) };
58969        let e = _mm_setr_epi32(4, 3, 2, 5);
58970        assert_eq_m128i(r, e);
58971    }
58972
58973    #[simd_test(enable = "avx512f")]
58974    fn test_mm512_mask_cvtepi32_storeu_epi16() {
58975        let a = _mm512_set1_epi32(9);
58976        let mut r = _mm256_undefined_si256();
58977        unsafe {
58978            _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
58979        }
58980        let e = _mm256_set1_epi16(9);
58981        assert_eq_m256i(r, e);
58982    }
58983
58984    #[simd_test(enable = "avx512f,avx512vl")]
58985    fn test_mm256_mask_cvtepi32_storeu_epi16() {
58986        let a = _mm256_set1_epi32(9);
58987        let mut r = _mm_undefined_si128();
58988        unsafe {
58989            _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
58990        }
58991        let e = _mm_set1_epi16(9);
58992        assert_eq_m128i(r, e);
58993    }
58994
58995    #[simd_test(enable = "avx512f,avx512vl")]
58996    fn test_mm_mask_cvtepi32_storeu_epi16() {
58997        let a = _mm_set1_epi32(9);
58998        let mut r = _mm_set1_epi8(0);
58999        unsafe {
59000            _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59001        }
59002        let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
59003        assert_eq_m128i(r, e);
59004    }
59005
59006    #[simd_test(enable = "avx512f")]
59007    fn test_mm512_mask_cvtsepi32_storeu_epi16() {
59008        let a = _mm512_set1_epi32(i32::MAX);
59009        let mut r = _mm256_undefined_si256();
59010        unsafe {
59011            _mm512_mask_cvtsepi32_storeu_epi16(
59012                &mut r as *mut _ as *mut i16,
59013                0b11111111_11111111,
59014                a,
59015            );
59016        }
59017        let e = _mm256_set1_epi16(i16::MAX);
59018        assert_eq_m256i(r, e);
59019    }
59020
59021    #[simd_test(enable = "avx512f,avx512vl")]
59022    fn test_mm256_mask_cvtsepi32_storeu_epi16() {
59023        let a = _mm256_set1_epi32(i32::MAX);
59024        let mut r = _mm_undefined_si128();
59025        unsafe {
59026            _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59027        }
59028        let e = _mm_set1_epi16(i16::MAX);
59029        assert_eq_m128i(r, e);
59030    }
59031
59032    #[simd_test(enable = "avx512f,avx512vl")]
59033    fn test_mm_mask_cvtsepi32_storeu_epi16() {
59034        let a = _mm_set1_epi32(i32::MAX);
59035        let mut r = _mm_set1_epi8(0);
59036        unsafe {
59037            _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59038        }
59039        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
59040        assert_eq_m128i(r, e);
59041    }
59042
59043    #[simd_test(enable = "avx512f")]
59044    fn test_mm512_mask_cvtusepi32_storeu_epi16() {
59045        let a = _mm512_set1_epi32(i32::MAX);
59046        let mut r = _mm256_undefined_si256();
59047        unsafe {
59048            _mm512_mask_cvtusepi32_storeu_epi16(
59049                &mut r as *mut _ as *mut i16,
59050                0b11111111_11111111,
59051                a,
59052            );
59053        }
59054        let e = _mm256_set1_epi16(u16::MAX as i16);
59055        assert_eq_m256i(r, e);
59056    }
59057
59058    #[simd_test(enable = "avx512f,avx512vl")]
59059    fn test_mm256_mask_cvtusepi32_storeu_epi16() {
59060        let a = _mm256_set1_epi32(i32::MAX);
59061        let mut r = _mm_undefined_si128();
59062        unsafe {
59063            _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59064        }
59065        let e = _mm_set1_epi16(u16::MAX as i16);
59066        assert_eq_m128i(r, e);
59067    }
59068
59069    #[simd_test(enable = "avx512f,avx512vl")]
59070    fn test_mm_mask_cvtusepi32_storeu_epi16() {
59071        let a = _mm_set1_epi32(i32::MAX);
59072        let mut r = _mm_set1_epi8(0);
59073        unsafe {
59074            _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59075        }
59076        let e = _mm_set_epi16(
59077            0,
59078            0,
59079            0,
59080            0,
59081            u16::MAX as i16,
59082            u16::MAX as i16,
59083            u16::MAX as i16,
59084            u16::MAX as i16,
59085        );
59086        assert_eq_m128i(r, e);
59087    }
59088
59089    #[simd_test(enable = "avx512f")]
59090    fn test_mm512_mask_cvtepi32_storeu_epi8() {
59091        let a = _mm512_set1_epi32(9);
59092        let mut r = _mm_undefined_si128();
59093        unsafe {
59094            _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
59095        }
59096        let e = _mm_set1_epi8(9);
59097        assert_eq_m128i(r, e);
59098    }
59099
59100    #[simd_test(enable = "avx512f,avx512vl")]
59101    fn test_mm256_mask_cvtepi32_storeu_epi8() {
59102        let a = _mm256_set1_epi32(9);
59103        let mut r = _mm_set1_epi8(0);
59104        unsafe {
59105            _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59106        }
59107        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
59108        assert_eq_m128i(r, e);
59109    }
59110
59111    #[simd_test(enable = "avx512f,avx512vl")]
59112    fn test_mm_mask_cvtepi32_storeu_epi8() {
59113        let a = _mm_set1_epi32(9);
59114        let mut r = _mm_set1_epi8(0);
59115        unsafe {
59116            _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59117        }
59118        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
59119        assert_eq_m128i(r, e);
59120    }
59121
59122    #[simd_test(enable = "avx512f")]
59123    fn test_mm512_mask_cvtsepi32_storeu_epi8() {
59124        let a = _mm512_set1_epi32(i32::MAX);
59125        let mut r = _mm_undefined_si128();
59126        unsafe {
59127            _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
59128        }
59129        let e = _mm_set1_epi8(i8::MAX);
59130        assert_eq_m128i(r, e);
59131    }
59132
59133    #[simd_test(enable = "avx512f,avx512vl")]
59134    fn test_mm256_mask_cvtsepi32_storeu_epi8() {
59135        let a = _mm256_set1_epi32(i32::MAX);
59136        let mut r = _mm_set1_epi8(0);
59137        unsafe {
59138            _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59139        }
59140        #[rustfmt::skip]
59141        let e = _mm_set_epi8(
59142            0, 0, 0, 0,
59143            0, 0, 0, 0,
59144            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59145            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59146        );
59147        assert_eq_m128i(r, e);
59148    }
59149
59150    #[simd_test(enable = "avx512f,avx512vl")]
59151    fn test_mm_mask_cvtsepi32_storeu_epi8() {
59152        let a = _mm_set1_epi32(i32::MAX);
59153        let mut r = _mm_set1_epi8(0);
59154        unsafe {
59155            _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59156        }
59157        #[rustfmt::skip]
59158        let e = _mm_set_epi8(
59159            0, 0, 0, 0,
59160            0, 0, 0, 0,
59161            0, 0, 0, 0,
59162            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59163        );
59164        assert_eq_m128i(r, e);
59165    }
59166
59167    #[simd_test(enable = "avx512f")]
59168    fn test_mm512_mask_cvtusepi32_storeu_epi8() {
59169        let a = _mm512_set1_epi32(i32::MAX);
59170        let mut r = _mm_undefined_si128();
59171        unsafe {
59172            _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
59173        }
59174        let e = _mm_set1_epi8(u8::MAX as i8);
59175        assert_eq_m128i(r, e);
59176    }
59177
59178    #[simd_test(enable = "avx512f,avx512vl")]
59179    fn test_mm256_mask_cvtusepi32_storeu_epi8() {
59180        let a = _mm256_set1_epi32(i32::MAX);
59181        let mut r = _mm_set1_epi8(0);
59182        unsafe {
59183            _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59184        }
59185        #[rustfmt::skip]
59186        let e = _mm_set_epi8(
59187            0, 0, 0, 0,
59188            0, 0, 0, 0,
59189            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59190            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59191        );
59192        assert_eq_m128i(r, e);
59193    }
59194
59195    #[simd_test(enable = "avx512f,avx512vl")]
59196    fn test_mm_mask_cvtusepi32_storeu_epi8() {
59197        let a = _mm_set1_epi32(i32::MAX);
59198        let mut r = _mm_set1_epi8(0);
59199        unsafe {
59200            _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59201        }
59202        #[rustfmt::skip]
59203        let e = _mm_set_epi8(
59204            0, 0, 0, 0,
59205            0, 0, 0, 0,
59206            0, 0, 0, 0,
59207            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59208        );
59209        assert_eq_m128i(r, e);
59210    }
59211
59212    #[simd_test(enable = "avx512f")]
59213    const fn test_mm512_storeu_epi32() {
59214        let a = _mm512_set1_epi32(9);
59215        let mut r = _mm512_undefined_epi32();
59216        unsafe {
59217            _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59218        }
59219        assert_eq_m512i(r, a);
59220    }
59221
59222    #[simd_test(enable = "avx512f,avx512vl")]
59223    const fn test_mm256_storeu_epi32() {
59224        let a = _mm256_set1_epi32(9);
59225        let mut r = _mm256_undefined_si256();
59226        unsafe {
59227            _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59228        }
59229        assert_eq_m256i(r, a);
59230    }
59231
59232    #[simd_test(enable = "avx512f,avx512vl")]
59233    const fn test_mm_storeu_epi32() {
59234        let a = _mm_set1_epi32(9);
59235        let mut r = _mm_undefined_si128();
59236        unsafe {
59237            _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59238        }
59239        assert_eq_m128i(r, a);
59240    }
59241
59242    #[simd_test(enable = "avx512f")]
59243    const fn test_mm512_loadu_si512() {
59244        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
59245        let p = a.as_ptr().cast();
59246        let r = unsafe { _mm512_loadu_si512(black_box(p)) };
59247        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59248        assert_eq_m512i(r, e);
59249    }
59250
59251    #[simd_test(enable = "avx512f")]
59252    const fn test_mm512_storeu_si512() {
59253        let a = _mm512_set1_epi32(9);
59254        let mut r = _mm512_undefined_epi32();
59255        unsafe {
59256            _mm512_storeu_si512(&mut r as *mut _, a);
59257        }
59258        assert_eq_m512i(r, a);
59259    }
59260
59261    #[simd_test(enable = "avx512f")]
59262    const fn test_mm512_load_si512() {
59263        #[repr(align(64))]
59264        struct Align {
59265            data: [i32; 16], // 64 bytes
59266        }
59267        let a = Align {
59268            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
59269        };
59270        let p = (a.data).as_ptr().cast();
59271        let r = unsafe { _mm512_load_si512(black_box(p)) };
59272        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59273        assert_eq_m512i(r, e);
59274    }
59275
59276    #[simd_test(enable = "avx512f")]
59277    const fn test_mm512_store_si512() {
59278        let a = _mm512_set1_epi32(9);
59279        let mut r = _mm512_undefined_epi32();
59280        unsafe {
59281            _mm512_store_si512(&mut r as *mut _, a);
59282        }
59283        assert_eq_m512i(r, a);
59284    }
59285
59286    #[simd_test(enable = "avx512f")]
59287    const fn test_mm512_load_epi32() {
59288        #[repr(align(64))]
59289        struct Align {
59290            data: [i32; 16], // 64 bytes
59291        }
59292        let a = Align {
59293            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
59294        };
59295        let p = (a.data).as_ptr();
59296        let r = unsafe { _mm512_load_epi32(black_box(p)) };
59297        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59298        assert_eq_m512i(r, e);
59299    }
59300
59301    #[simd_test(enable = "avx512f,avx512vl")]
59302    const fn test_mm256_load_epi32() {
59303        #[repr(align(64))]
59304        struct Align {
59305            data: [i32; 8],
59306        }
59307        let a = Align {
59308            data: [4, 3, 2, 5, 8, 9, 64, 50],
59309        };
59310        let p = (a.data).as_ptr();
59311        let r = unsafe { _mm256_load_epi32(black_box(p)) };
59312        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
59313        assert_eq_m256i(r, e);
59314    }
59315
59316    #[simd_test(enable = "avx512f,avx512vl")]
59317    const fn test_mm_load_epi32() {
59318        #[repr(align(64))]
59319        struct Align {
59320            data: [i32; 4],
59321        }
59322        let a = Align { data: [4, 3, 2, 5] };
59323        let p = (a.data).as_ptr();
59324        let r = unsafe { _mm_load_epi32(black_box(p)) };
59325        let e = _mm_setr_epi32(4, 3, 2, 5);
59326        assert_eq_m128i(r, e);
59327    }
59328
59329    #[simd_test(enable = "avx512f")]
59330    const fn test_mm512_store_epi32() {
59331        let a = _mm512_set1_epi32(9);
59332        let mut r = _mm512_undefined_epi32();
59333        unsafe {
59334            _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
59335        }
59336        assert_eq_m512i(r, a);
59337    }
59338
59339    #[simd_test(enable = "avx512f,avx512vl")]
59340    const fn test_mm256_store_epi32() {
59341        let a = _mm256_set1_epi32(9);
59342        let mut r = _mm256_undefined_si256();
59343        unsafe {
59344            _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
59345        }
59346        assert_eq_m256i(r, a);
59347    }
59348
59349    #[simd_test(enable = "avx512f,avx512vl")]
59350    const fn test_mm_store_epi32() {
59351        let a = _mm_set1_epi32(9);
59352        let mut r = _mm_undefined_si128();
59353        unsafe {
59354            _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
59355        }
59356        assert_eq_m128i(r, a);
59357    }
59358
59359    #[simd_test(enable = "avx512f")]
59360    const fn test_mm512_load_ps() {
59361        #[repr(align(64))]
59362        struct Align {
59363            data: [f32; 16], // 64 bytes
59364        }
59365        let a = Align {
59366            data: [
59367                4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
59368            ],
59369        };
59370        let p = (a.data).as_ptr();
59371        let r = unsafe { _mm512_load_ps(black_box(p)) };
59372        let e = _mm512_setr_ps(
59373            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
59374        );
59375        assert_eq_m512(r, e);
59376    }
59377
59378    #[simd_test(enable = "avx512f")]
59379    const fn test_mm512_store_ps() {
59380        let a = _mm512_set1_ps(9.);
59381        let mut r = _mm512_undefined_ps();
59382        unsafe {
59383            _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
59384        }
59385        assert_eq_m512(r, a);
59386    }
59387
59388    #[simd_test(enable = "avx512f")]
59389    const fn test_mm512_mask_set1_epi32() {
59390        let src = _mm512_set1_epi32(2);
59391        let a: i32 = 11;
59392        let r = _mm512_mask_set1_epi32(src, 0, a);
59393        assert_eq_m512i(r, src);
59394        let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
59395        let e = _mm512_set1_epi32(11);
59396        assert_eq_m512i(r, e);
59397    }
59398
59399    #[simd_test(enable = "avx512f")]
59400    const fn test_mm512_maskz_set1_epi32() {
59401        let a: i32 = 11;
59402        let r = _mm512_maskz_set1_epi32(0, a);
59403        assert_eq_m512i(r, _mm512_setzero_si512());
59404        let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
59405        let e = _mm512_set1_epi32(11);
59406        assert_eq_m512i(r, e);
59407    }
59408
59409    #[simd_test(enable = "avx512f,avx512vl")]
59410    const fn test_mm256_mask_set1_epi32() {
59411        let src = _mm256_set1_epi32(2);
59412        let a: i32 = 11;
59413        let r = _mm256_mask_set1_epi32(src, 0, a);
59414        assert_eq_m256i(r, src);
59415        let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
59416        let e = _mm256_set1_epi32(11);
59417        assert_eq_m256i(r, e);
59418    }
59419
59420    #[simd_test(enable = "avx512f,avx512vl")]
59421    const fn test_mm256_maskz_set1_epi32() {
59422        let a: i32 = 11;
59423        let r = _mm256_maskz_set1_epi32(0, a);
59424        assert_eq_m256i(r, _mm256_setzero_si256());
59425        let r = _mm256_maskz_set1_epi32(0b11111111, a);
59426        let e = _mm256_set1_epi32(11);
59427        assert_eq_m256i(r, e);
59428    }
59429
59430    #[simd_test(enable = "avx512f,avx512vl")]
59431    const fn test_mm_mask_set1_epi32() {
59432        let src = _mm_set1_epi32(2);
59433        let a: i32 = 11;
59434        let r = _mm_mask_set1_epi32(src, 0, a);
59435        assert_eq_m128i(r, src);
59436        let r = _mm_mask_set1_epi32(src, 0b00001111, a);
59437        let e = _mm_set1_epi32(11);
59438        assert_eq_m128i(r, e);
59439    }
59440
59441    #[simd_test(enable = "avx512f,avx512vl")]
59442    const fn test_mm_maskz_set1_epi32() {
59443        let a: i32 = 11;
59444        let r = _mm_maskz_set1_epi32(0, a);
59445        assert_eq_m128i(r, _mm_setzero_si128());
59446        let r = _mm_maskz_set1_epi32(0b00001111, a);
59447        let e = _mm_set1_epi32(11);
59448        assert_eq_m128i(r, e);
59449    }
59450
59451    #[simd_test(enable = "avx512f")]
59452    const fn test_mm_mask_move_ss() {
59453        let src = _mm_set_ps(10., 11., 100., 110.);
59454        let a = _mm_set_ps(1., 2., 10., 20.);
59455        let b = _mm_set_ps(3., 4., 30., 40.);
59456        let r = _mm_mask_move_ss(src, 0, a, b);
59457        let e = _mm_set_ps(1., 2., 10., 110.);
59458        assert_eq_m128(r, e);
59459        let r = _mm_mask_move_ss(src, 0b11111111, a, b);
59460        let e = _mm_set_ps(1., 2., 10., 40.);
59461        assert_eq_m128(r, e);
59462    }
59463
59464    #[simd_test(enable = "avx512f")]
59465    const fn test_mm_maskz_move_ss() {
59466        let a = _mm_set_ps(1., 2., 10., 20.);
59467        let b = _mm_set_ps(3., 4., 30., 40.);
59468        let r = _mm_maskz_move_ss(0, a, b);
59469        let e = _mm_set_ps(1., 2., 10., 0.);
59470        assert_eq_m128(r, e);
59471        let r = _mm_maskz_move_ss(0b11111111, a, b);
59472        let e = _mm_set_ps(1., 2., 10., 40.);
59473        assert_eq_m128(r, e);
59474    }
59475
59476    #[simd_test(enable = "avx512f")]
59477    const fn test_mm_mask_move_sd() {
59478        let src = _mm_set_pd(10., 11.);
59479        let a = _mm_set_pd(1., 2.);
59480        let b = _mm_set_pd(3., 4.);
59481        let r = _mm_mask_move_sd(src, 0, a, b);
59482        let e = _mm_set_pd(1., 11.);
59483        assert_eq_m128d(r, e);
59484        let r = _mm_mask_move_sd(src, 0b11111111, a, b);
59485        let e = _mm_set_pd(1., 4.);
59486        assert_eq_m128d(r, e);
59487    }
59488
59489    #[simd_test(enable = "avx512f")]
59490    const fn test_mm_maskz_move_sd() {
59491        let a = _mm_set_pd(1., 2.);
59492        let b = _mm_set_pd(3., 4.);
59493        let r = _mm_maskz_move_sd(0, a, b);
59494        let e = _mm_set_pd(1., 0.);
59495        assert_eq_m128d(r, e);
59496        let r = _mm_maskz_move_sd(0b11111111, a, b);
59497        let e = _mm_set_pd(1., 4.);
59498        assert_eq_m128d(r, e);
59499    }
59500
59501    #[simd_test(enable = "avx512f")]
59502    const fn test_mm_mask_add_ss() {
59503        let src = _mm_set_ps(10., 11., 100., 110.);
59504        let a = _mm_set_ps(1., 2., 10., 20.);
59505        let b = _mm_set_ps(3., 4., 30., 40.);
59506        let r = _mm_mask_add_ss(src, 0, a, b);
59507        let e = _mm_set_ps(1., 2., 10., 110.);
59508        assert_eq_m128(r, e);
59509        let r = _mm_mask_add_ss(src, 0b11111111, a, b);
59510        let e = _mm_set_ps(1., 2., 10., 60.);
59511        assert_eq_m128(r, e);
59512    }
59513
59514    #[simd_test(enable = "avx512f")]
59515    const fn test_mm_maskz_add_ss() {
59516        let a = _mm_set_ps(1., 2., 10., 20.);
59517        let b = _mm_set_ps(3., 4., 30., 40.);
59518        let r = _mm_maskz_add_ss(0, a, b);
59519        let e = _mm_set_ps(1., 2., 10., 0.);
59520        assert_eq_m128(r, e);
59521        let r = _mm_maskz_add_ss(0b11111111, a, b);
59522        let e = _mm_set_ps(1., 2., 10., 60.);
59523        assert_eq_m128(r, e);
59524    }
59525
59526    #[simd_test(enable = "avx512f")]
59527    const fn test_mm_mask_add_sd() {
59528        let src = _mm_set_pd(10., 11.);
59529        let a = _mm_set_pd(1., 2.);
59530        let b = _mm_set_pd(3., 4.);
59531        let r = _mm_mask_add_sd(src, 0, a, b);
59532        let e = _mm_set_pd(1., 11.);
59533        assert_eq_m128d(r, e);
59534        let r = _mm_mask_add_sd(src, 0b11111111, a, b);
59535        let e = _mm_set_pd(1., 6.);
59536        assert_eq_m128d(r, e);
59537    }
59538
59539    #[simd_test(enable = "avx512f")]
59540    const fn test_mm_maskz_add_sd() {
59541        let a = _mm_set_pd(1., 2.);
59542        let b = _mm_set_pd(3., 4.);
59543        let r = _mm_maskz_add_sd(0, a, b);
59544        let e = _mm_set_pd(1., 0.);
59545        assert_eq_m128d(r, e);
59546        let r = _mm_maskz_add_sd(0b11111111, a, b);
59547        let e = _mm_set_pd(1., 6.);
59548        assert_eq_m128d(r, e);
59549    }
59550
59551    #[simd_test(enable = "avx512f")]
59552    const fn test_mm_mask_sub_ss() {
59553        let src = _mm_set_ps(10., 11., 100., 110.);
59554        let a = _mm_set_ps(1., 2., 10., 20.);
59555        let b = _mm_set_ps(3., 4., 30., 40.);
59556        let r = _mm_mask_sub_ss(src, 0, a, b);
59557        let e = _mm_set_ps(1., 2., 10., 110.);
59558        assert_eq_m128(r, e);
59559        let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
59560        let e = _mm_set_ps(1., 2., 10., -20.);
59561        assert_eq_m128(r, e);
59562    }
59563
59564    #[simd_test(enable = "avx512f")]
59565    const fn test_mm_maskz_sub_ss() {
59566        let a = _mm_set_ps(1., 2., 10., 20.);
59567        let b = _mm_set_ps(3., 4., 30., 40.);
59568        let r = _mm_maskz_sub_ss(0, a, b);
59569        let e = _mm_set_ps(1., 2., 10., 0.);
59570        assert_eq_m128(r, e);
59571        let r = _mm_maskz_sub_ss(0b11111111, a, b);
59572        let e = _mm_set_ps(1., 2., 10., -20.);
59573        assert_eq_m128(r, e);
59574    }
59575
59576    #[simd_test(enable = "avx512f")]
59577    const fn test_mm_mask_sub_sd() {
59578        let src = _mm_set_pd(10., 11.);
59579        let a = _mm_set_pd(1., 2.);
59580        let b = _mm_set_pd(3., 4.);
59581        let r = _mm_mask_sub_sd(src, 0, a, b);
59582        let e = _mm_set_pd(1., 11.);
59583        assert_eq_m128d(r, e);
59584        let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
59585        let e = _mm_set_pd(1., -2.);
59586        assert_eq_m128d(r, e);
59587    }
59588
59589    #[simd_test(enable = "avx512f")]
59590    const fn test_mm_maskz_sub_sd() {
59591        let a = _mm_set_pd(1., 2.);
59592        let b = _mm_set_pd(3., 4.);
59593        let r = _mm_maskz_sub_sd(0, a, b);
59594        let e = _mm_set_pd(1., 0.);
59595        assert_eq_m128d(r, e);
59596        let r = _mm_maskz_sub_sd(0b11111111, a, b);
59597        let e = _mm_set_pd(1., -2.);
59598        assert_eq_m128d(r, e);
59599    }
59600
59601    #[simd_test(enable = "avx512f")]
59602    const fn test_mm_mask_mul_ss() {
59603        let src = _mm_set_ps(10., 11., 100., 110.);
59604        let a = _mm_set_ps(1., 2., 10., 20.);
59605        let b = _mm_set_ps(3., 4., 30., 40.);
59606        let r = _mm_mask_mul_ss(src, 0, a, b);
59607        let e = _mm_set_ps(1., 2., 10., 110.);
59608        assert_eq_m128(r, e);
59609        let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
59610        let e = _mm_set_ps(1., 2., 10., 800.);
59611        assert_eq_m128(r, e);
59612    }
59613
59614    #[simd_test(enable = "avx512f")]
59615    const fn test_mm_maskz_mul_ss() {
59616        let a = _mm_set_ps(1., 2., 10., 20.);
59617        let b = _mm_set_ps(3., 4., 30., 40.);
59618        let r = _mm_maskz_mul_ss(0, a, b);
59619        let e = _mm_set_ps(1., 2., 10., 0.);
59620        assert_eq_m128(r, e);
59621        let r = _mm_maskz_mul_ss(0b11111111, a, b);
59622        let e = _mm_set_ps(1., 2., 10., 800.);
59623        assert_eq_m128(r, e);
59624    }
59625
59626    #[simd_test(enable = "avx512f")]
59627    const fn test_mm_mask_mul_sd() {
59628        let src = _mm_set_pd(10., 11.);
59629        let a = _mm_set_pd(1., 2.);
59630        let b = _mm_set_pd(3., 4.);
59631        let r = _mm_mask_mul_sd(src, 0, a, b);
59632        let e = _mm_set_pd(1., 11.);
59633        assert_eq_m128d(r, e);
59634        let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
59635        let e = _mm_set_pd(1., 8.);
59636        assert_eq_m128d(r, e);
59637    }
59638
59639    #[simd_test(enable = "avx512f")]
59640    const fn test_mm_maskz_mul_sd() {
59641        let a = _mm_set_pd(1., 2.);
59642        let b = _mm_set_pd(3., 4.);
59643        let r = _mm_maskz_mul_sd(0, a, b);
59644        let e = _mm_set_pd(1., 0.);
59645        assert_eq_m128d(r, e);
59646        let r = _mm_maskz_mul_sd(0b11111111, a, b);
59647        let e = _mm_set_pd(1., 8.);
59648        assert_eq_m128d(r, e);
59649    }
59650
59651    #[simd_test(enable = "avx512f")]
59652    const fn test_mm_mask_div_ss() {
59653        let src = _mm_set_ps(10., 11., 100., 110.);
59654        let a = _mm_set_ps(1., 2., 10., 20.);
59655        let b = _mm_set_ps(3., 4., 30., 40.);
59656        let r = _mm_mask_div_ss(src, 0, a, b);
59657        let e = _mm_set_ps(1., 2., 10., 110.);
59658        assert_eq_m128(r, e);
59659        let r = _mm_mask_div_ss(src, 0b11111111, a, b);
59660        let e = _mm_set_ps(1., 2., 10., 0.5);
59661        assert_eq_m128(r, e);
59662    }
59663
59664    #[simd_test(enable = "avx512f")]
59665    const fn test_mm_maskz_div_ss() {
59666        let a = _mm_set_ps(1., 2., 10., 20.);
59667        let b = _mm_set_ps(3., 4., 30., 40.);
59668        let r = _mm_maskz_div_ss(0, a, b);
59669        let e = _mm_set_ps(1., 2., 10., 0.);
59670        assert_eq_m128(r, e);
59671        let r = _mm_maskz_div_ss(0b11111111, a, b);
59672        let e = _mm_set_ps(1., 2., 10., 0.5);
59673        assert_eq_m128(r, e);
59674    }
59675
59676    #[simd_test(enable = "avx512f")]
59677    const fn test_mm_mask_div_sd() {
59678        let src = _mm_set_pd(10., 11.);
59679        let a = _mm_set_pd(1., 2.);
59680        let b = _mm_set_pd(3., 4.);
59681        let r = _mm_mask_div_sd(src, 0, a, b);
59682        let e = _mm_set_pd(1., 11.);
59683        assert_eq_m128d(r, e);
59684        let r = _mm_mask_div_sd(src, 0b11111111, a, b);
59685        let e = _mm_set_pd(1., 0.5);
59686        assert_eq_m128d(r, e);
59687    }
59688
59689    #[simd_test(enable = "avx512f")]
59690    const fn test_mm_maskz_div_sd() {
59691        let a = _mm_set_pd(1., 2.);
59692        let b = _mm_set_pd(3., 4.);
59693        let r = _mm_maskz_div_sd(0, a, b);
59694        let e = _mm_set_pd(1., 0.);
59695        assert_eq_m128d(r, e);
59696        let r = _mm_maskz_div_sd(0b11111111, a, b);
59697        let e = _mm_set_pd(1., 0.5);
59698        assert_eq_m128d(r, e);
59699    }
59700
59701    #[simd_test(enable = "avx512f")]
59702    fn test_mm_mask_max_ss() {
59703        let a = _mm_set_ps(0., 1., 2., 3.);
59704        let b = _mm_set_ps(4., 5., 6., 7.);
59705        let r = _mm_mask_max_ss(a, 0, a, b);
59706        let e = _mm_set_ps(0., 1., 2., 3.);
59707        assert_eq_m128(r, e);
59708        let r = _mm_mask_max_ss(a, 0b11111111, a, b);
59709        let e = _mm_set_ps(0., 1., 2., 7.);
59710        assert_eq_m128(r, e);
59711    }
59712
59713    #[simd_test(enable = "avx512f")]
59714    fn test_mm_maskz_max_ss() {
59715        let a = _mm_set_ps(0., 1., 2., 3.);
59716        let b = _mm_set_ps(4., 5., 6., 7.);
59717        let r = _mm_maskz_max_ss(0, a, b);
59718        let e = _mm_set_ps(0., 1., 2., 0.);
59719        assert_eq_m128(r, e);
59720        let r = _mm_maskz_max_ss(0b11111111, a, b);
59721        let e = _mm_set_ps(0., 1., 2., 7.);
59722        assert_eq_m128(r, e);
59723    }
59724
59725    #[simd_test(enable = "avx512f")]
59726    fn test_mm_mask_max_sd() {
59727        let a = _mm_set_pd(0., 1.);
59728        let b = _mm_set_pd(2., 3.);
59729        let r = _mm_mask_max_sd(a, 0, a, b);
59730        let e = _mm_set_pd(0., 1.);
59731        assert_eq_m128d(r, e);
59732        let r = _mm_mask_max_sd(a, 0b11111111, a, b);
59733        let e = _mm_set_pd(0., 3.);
59734        assert_eq_m128d(r, e);
59735    }
59736
59737    #[simd_test(enable = "avx512f")]
59738    fn test_mm_maskz_max_sd() {
59739        let a = _mm_set_pd(0., 1.);
59740        let b = _mm_set_pd(2., 3.);
59741        let r = _mm_maskz_max_sd(0, a, b);
59742        let e = _mm_set_pd(0., 0.);
59743        assert_eq_m128d(r, e);
59744        let r = _mm_maskz_max_sd(0b11111111, a, b);
59745        let e = _mm_set_pd(0., 3.);
59746        assert_eq_m128d(r, e);
59747    }
59748
59749    #[simd_test(enable = "avx512f")]
59750    fn test_mm_mask_min_ss() {
59751        let a = _mm_set_ps(0., 1., 2., 3.);
59752        let b = _mm_set_ps(4., 5., 6., 7.);
59753        let r = _mm_mask_min_ss(a, 0, a, b);
59754        let e = _mm_set_ps(0., 1., 2., 3.);
59755        assert_eq_m128(r, e);
59756        let r = _mm_mask_min_ss(a, 0b11111111, a, b);
59757        let e = _mm_set_ps(0., 1., 2., 3.);
59758        assert_eq_m128(r, e);
59759    }
59760
59761    #[simd_test(enable = "avx512f")]
59762    fn test_mm_maskz_min_ss() {
59763        let a = _mm_set_ps(0., 1., 2., 3.);
59764        let b = _mm_set_ps(4., 5., 6., 7.);
59765        let r = _mm_maskz_min_ss(0, a, b);
59766        let e = _mm_set_ps(0., 1., 2., 0.);
59767        assert_eq_m128(r, e);
59768        let r = _mm_maskz_min_ss(0b11111111, a, b);
59769        let e = _mm_set_ps(0., 1., 2., 3.);
59770        assert_eq_m128(r, e);
59771    }
59772
59773    #[simd_test(enable = "avx512f")]
59774    fn test_mm_mask_min_sd() {
59775        let a = _mm_set_pd(0., 1.);
59776        let b = _mm_set_pd(2., 3.);
59777        let r = _mm_mask_min_sd(a, 0, a, b);
59778        let e = _mm_set_pd(0., 1.);
59779        assert_eq_m128d(r, e);
59780        let r = _mm_mask_min_sd(a, 0b11111111, a, b);
59781        let e = _mm_set_pd(0., 1.);
59782        assert_eq_m128d(r, e);
59783    }
59784
59785    #[simd_test(enable = "avx512f")]
59786    fn test_mm_maskz_min_sd() {
59787        let a = _mm_set_pd(0., 1.);
59788        let b = _mm_set_pd(2., 3.);
59789        let r = _mm_maskz_min_sd(0, a, b);
59790        let e = _mm_set_pd(0., 0.);
59791        assert_eq_m128d(r, e);
59792        let r = _mm_maskz_min_sd(0b11111111, a, b);
59793        let e = _mm_set_pd(0., 1.);
59794        assert_eq_m128d(r, e);
59795    }
59796
59797    #[simd_test(enable = "avx512f")]
59798    fn test_mm_mask_sqrt_ss() {
59799        let src = _mm_set_ps(10., 11., 100., 110.);
59800        let a = _mm_set_ps(1., 2., 10., 20.);
59801        let b = _mm_set_ps(3., 4., 30., 4.);
59802        let r = _mm_mask_sqrt_ss(src, 0, a, b);
59803        let e = _mm_set_ps(1., 2., 10., 110.);
59804        assert_eq_m128(r, e);
59805        let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
59806        let e = _mm_set_ps(1., 2., 10., 2.);
59807        assert_eq_m128(r, e);
59808    }
59809
59810    #[simd_test(enable = "avx512f")]
59811    fn test_mm_maskz_sqrt_ss() {
59812        let a = _mm_set_ps(1., 2., 10., 20.);
59813        let b = _mm_set_ps(3., 4., 30., 4.);
59814        let r = _mm_maskz_sqrt_ss(0, a, b);
59815        let e = _mm_set_ps(1., 2., 10., 0.);
59816        assert_eq_m128(r, e);
59817        let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
59818        let e = _mm_set_ps(1., 2., 10., 2.);
59819        assert_eq_m128(r, e);
59820    }
59821
59822    #[simd_test(enable = "avx512f")]
59823    fn test_mm_mask_sqrt_sd() {
59824        let src = _mm_set_pd(10., 11.);
59825        let a = _mm_set_pd(1., 2.);
59826        let b = _mm_set_pd(3., 4.);
59827        let r = _mm_mask_sqrt_sd(src, 0, a, b);
59828        let e = _mm_set_pd(1., 11.);
59829        assert_eq_m128d(r, e);
59830        let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
59831        let e = _mm_set_pd(1., 2.);
59832        assert_eq_m128d(r, e);
59833    }
59834
59835    #[simd_test(enable = "avx512f")]
59836    fn test_mm_maskz_sqrt_sd() {
59837        let a = _mm_set_pd(1., 2.);
59838        let b = _mm_set_pd(3., 4.);
59839        let r = _mm_maskz_sqrt_sd(0, a, b);
59840        let e = _mm_set_pd(1., 0.);
59841        assert_eq_m128d(r, e);
59842        let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
59843        let e = _mm_set_pd(1., 2.);
59844        assert_eq_m128d(r, e);
59845    }
59846
59847    #[simd_test(enable = "avx512f")]
59848    fn test_mm_rsqrt14_ss() {
59849        let a = _mm_set_ps(1., 2., 10., 20.);
59850        let b = _mm_set_ps(3., 4., 30., 4.);
59851        let r = _mm_rsqrt14_ss(a, b);
59852        let e = _mm_set_ps(1., 2., 10., 0.5);
59853        assert_eq_m128(r, e);
59854    }
59855
59856    #[simd_test(enable = "avx512f")]
59857    fn test_mm_mask_rsqrt14_ss() {
59858        let src = _mm_set_ps(10., 11., 100., 110.);
59859        let a = _mm_set_ps(1., 2., 10., 20.);
59860        let b = _mm_set_ps(3., 4., 30., 4.);
59861        let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
59862        let e = _mm_set_ps(1., 2., 10., 110.);
59863        assert_eq_m128(r, e);
59864        let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
59865        let e = _mm_set_ps(1., 2., 10., 0.5);
59866        assert_eq_m128(r, e);
59867    }
59868
59869    #[simd_test(enable = "avx512f")]
59870    fn test_mm_maskz_rsqrt14_ss() {
59871        let a = _mm_set_ps(1., 2., 10., 20.);
59872        let b = _mm_set_ps(3., 4., 30., 4.);
59873        let r = _mm_maskz_rsqrt14_ss(0, a, b);
59874        let e = _mm_set_ps(1., 2., 10., 0.);
59875        assert_eq_m128(r, e);
59876        let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
59877        let e = _mm_set_ps(1., 2., 10., 0.5);
59878        assert_eq_m128(r, e);
59879    }
59880
59881    #[simd_test(enable = "avx512f")]
59882    fn test_mm_rsqrt14_sd() {
59883        let a = _mm_set_pd(1., 2.);
59884        let b = _mm_set_pd(3., 4.);
59885        let r = _mm_rsqrt14_sd(a, b);
59886        let e = _mm_set_pd(1., 0.5);
59887        assert_eq_m128d(r, e);
59888    }
59889
59890    #[simd_test(enable = "avx512f")]
59891    fn test_mm_mask_rsqrt14_sd() {
59892        let src = _mm_set_pd(10., 11.);
59893        let a = _mm_set_pd(1., 2.);
59894        let b = _mm_set_pd(3., 4.);
59895        let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
59896        let e = _mm_set_pd(1., 11.);
59897        assert_eq_m128d(r, e);
59898        let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
59899        let e = _mm_set_pd(1., 0.5);
59900        assert_eq_m128d(r, e);
59901    }
59902
59903    #[simd_test(enable = "avx512f")]
59904    fn test_mm_maskz_rsqrt14_sd() {
59905        let a = _mm_set_pd(1., 2.);
59906        let b = _mm_set_pd(3., 4.);
59907        let r = _mm_maskz_rsqrt14_sd(0, a, b);
59908        let e = _mm_set_pd(1., 0.);
59909        assert_eq_m128d(r, e);
59910        let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
59911        let e = _mm_set_pd(1., 0.5);
59912        assert_eq_m128d(r, e);
59913    }
59914
59915    #[simd_test(enable = "avx512f")]
59916    fn test_mm_rcp14_ss() {
59917        let a = _mm_set_ps(1., 2., 10., 20.);
59918        let b = _mm_set_ps(3., 4., 30., 4.);
59919        let r = _mm_rcp14_ss(a, b);
59920        let e = _mm_set_ps(1., 2., 10., 0.25);
59921        assert_eq_m128(r, e);
59922    }
59923
59924    #[simd_test(enable = "avx512f")]
59925    fn test_mm_mask_rcp14_ss() {
59926        let src = _mm_set_ps(10., 11., 100., 110.);
59927        let a = _mm_set_ps(1., 2., 10., 20.);
59928        let b = _mm_set_ps(3., 4., 30., 4.);
59929        let r = _mm_mask_rcp14_ss(src, 0, a, b);
59930        let e = _mm_set_ps(1., 2., 10., 110.);
59931        assert_eq_m128(r, e);
59932        let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
59933        let e = _mm_set_ps(1., 2., 10., 0.25);
59934        assert_eq_m128(r, e);
59935    }
59936
59937    #[simd_test(enable = "avx512f")]
59938    fn test_mm_maskz_rcp14_ss() {
59939        let a = _mm_set_ps(1., 2., 10., 20.);
59940        let b = _mm_set_ps(3., 4., 30., 4.);
59941        let r = _mm_maskz_rcp14_ss(0, a, b);
59942        let e = _mm_set_ps(1., 2., 10., 0.);
59943        assert_eq_m128(r, e);
59944        let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
59945        let e = _mm_set_ps(1., 2., 10., 0.25);
59946        assert_eq_m128(r, e);
59947    }
59948
59949    #[simd_test(enable = "avx512f")]
59950    fn test_mm_rcp14_sd() {
59951        let a = _mm_set_pd(1., 2.);
59952        let b = _mm_set_pd(3., 4.);
59953        let r = _mm_rcp14_sd(a, b);
59954        let e = _mm_set_pd(1., 0.25);
59955        assert_eq_m128d(r, e);
59956    }
59957
59958    #[simd_test(enable = "avx512f")]
59959    fn test_mm_mask_rcp14_sd() {
59960        let src = _mm_set_pd(10., 11.);
59961        let a = _mm_set_pd(1., 2.);
59962        let b = _mm_set_pd(3., 4.);
59963        let r = _mm_mask_rcp14_sd(src, 0, a, b);
59964        let e = _mm_set_pd(1., 11.);
59965        assert_eq_m128d(r, e);
59966        let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
59967        let e = _mm_set_pd(1., 0.25);
59968        assert_eq_m128d(r, e);
59969    }
59970
59971    #[simd_test(enable = "avx512f")]
59972    fn test_mm_maskz_rcp14_sd() {
59973        let a = _mm_set_pd(1., 2.);
59974        let b = _mm_set_pd(3., 4.);
59975        let r = _mm_maskz_rcp14_sd(0, a, b);
59976        let e = _mm_set_pd(1., 0.);
59977        assert_eq_m128d(r, e);
59978        let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
59979        let e = _mm_set_pd(1., 0.25);
59980        assert_eq_m128d(r, e);
59981    }
59982
59983    #[simd_test(enable = "avx512f")]
59984    fn test_mm_getexp_ss() {
59985        let a = _mm_set1_ps(2.);
59986        let b = _mm_set1_ps(3.);
59987        let r = _mm_getexp_ss(a, b);
59988        let e = _mm_set_ps(2., 2., 2., 1.);
59989        assert_eq_m128(r, e);
59990    }
59991
59992    #[simd_test(enable = "avx512f")]
59993    fn test_mm_mask_getexp_ss() {
59994        let a = _mm_set1_ps(2.);
59995        let b = _mm_set1_ps(3.);
59996        let r = _mm_mask_getexp_ss(a, 0, a, b);
59997        let e = _mm_set_ps(2., 2., 2., 2.);
59998        assert_eq_m128(r, e);
59999        let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
60000        let e = _mm_set_ps(2., 2., 2., 1.);
60001        assert_eq_m128(r, e);
60002    }
60003
60004    #[simd_test(enable = "avx512f")]
60005    fn test_mm_maskz_getexp_ss() {
60006        let a = _mm_set1_ps(2.);
60007        let b = _mm_set1_ps(3.);
60008        let r = _mm_maskz_getexp_ss(0, a, b);
60009        let e = _mm_set_ps(2., 2., 2., 0.);
60010        assert_eq_m128(r, e);
60011        let r = _mm_maskz_getexp_ss(0b11111111, a, b);
60012        let e = _mm_set_ps(2., 2., 2., 1.);
60013        assert_eq_m128(r, e);
60014    }
60015
60016    #[simd_test(enable = "avx512f")]
60017    fn test_mm_getexp_sd() {
60018        let a = _mm_set1_pd(2.);
60019        let b = _mm_set1_pd(3.);
60020        let r = _mm_getexp_sd(a, b);
60021        let e = _mm_set_pd(2., 1.);
60022        assert_eq_m128d(r, e);
60023    }
60024
60025    #[simd_test(enable = "avx512f")]
60026    fn test_mm_mask_getexp_sd() {
60027        let a = _mm_set1_pd(2.);
60028        let b = _mm_set1_pd(3.);
60029        let r = _mm_mask_getexp_sd(a, 0, a, b);
60030        let e = _mm_set_pd(2., 2.);
60031        assert_eq_m128d(r, e);
60032        let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
60033        let e = _mm_set_pd(2., 1.);
60034        assert_eq_m128d(r, e);
60035    }
60036
60037    #[simd_test(enable = "avx512f")]
60038    fn test_mm_maskz_getexp_sd() {
60039        let a = _mm_set1_pd(2.);
60040        let b = _mm_set1_pd(3.);
60041        let r = _mm_maskz_getexp_sd(0, a, b);
60042        let e = _mm_set_pd(2., 0.);
60043        assert_eq_m128d(r, e);
60044        let r = _mm_maskz_getexp_sd(0b11111111, a, b);
60045        let e = _mm_set_pd(2., 1.);
60046        assert_eq_m128d(r, e);
60047    }
60048
60049    #[simd_test(enable = "avx512f")]
60050    fn test_mm_getmant_ss() {
60051        let a = _mm_set1_ps(20.);
60052        let b = _mm_set1_ps(10.);
60053        let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
60054        let e = _mm_set_ps(20., 20., 20., 1.25);
60055        assert_eq_m128(r, e);
60056    }
60057
60058    #[simd_test(enable = "avx512f")]
60059    fn test_mm_mask_getmant_ss() {
60060        let a = _mm_set1_ps(20.);
60061        let b = _mm_set1_ps(10.);
60062        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
60063        let e = _mm_set_ps(20., 20., 20., 20.);
60064        assert_eq_m128(r, e);
60065        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
60066        let e = _mm_set_ps(20., 20., 20., 1.25);
60067        assert_eq_m128(r, e);
60068    }
60069
60070    #[simd_test(enable = "avx512f")]
60071    fn test_mm_maskz_getmant_ss() {
60072        let a = _mm_set1_ps(20.);
60073        let b = _mm_set1_ps(10.);
60074        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
60075        let e = _mm_set_ps(20., 20., 20., 0.);
60076        assert_eq_m128(r, e);
60077        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
60078        let e = _mm_set_ps(20., 20., 20., 1.25);
60079        assert_eq_m128(r, e);
60080    }
60081
60082    #[simd_test(enable = "avx512f")]
60083    fn test_mm_getmant_sd() {
60084        let a = _mm_set1_pd(20.);
60085        let b = _mm_set1_pd(10.);
60086        let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
60087        let e = _mm_set_pd(20., 1.25);
60088        assert_eq_m128d(r, e);
60089    }
60090
60091    #[simd_test(enable = "avx512f")]
60092    fn test_mm_mask_getmant_sd() {
60093        let a = _mm_set1_pd(20.);
60094        let b = _mm_set1_pd(10.);
60095        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
60096        let e = _mm_set_pd(20., 20.);
60097        assert_eq_m128d(r, e);
60098        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
60099        let e = _mm_set_pd(20., 1.25);
60100        assert_eq_m128d(r, e);
60101    }
60102
60103    #[simd_test(enable = "avx512f")]
60104    fn test_mm_maskz_getmant_sd() {
60105        let a = _mm_set1_pd(20.);
60106        let b = _mm_set1_pd(10.);
60107        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
60108        let e = _mm_set_pd(20., 0.);
60109        assert_eq_m128d(r, e);
60110        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
60111        let e = _mm_set_pd(20., 1.25);
60112        assert_eq_m128d(r, e);
60113    }
60114
60115    #[simd_test(enable = "avx512f")]
60116    fn test_mm_roundscale_ss() {
60117        let a = _mm_set1_ps(2.2);
60118        let b = _mm_set1_ps(1.1);
60119        let r = _mm_roundscale_ss::<0>(a, b);
60120        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
60121        assert_eq_m128(r, e);
60122    }
60123
60124    #[simd_test(enable = "avx512f")]
60125    fn test_mm_mask_roundscale_ss() {
60126        let a = _mm_set1_ps(2.2);
60127        let b = _mm_set1_ps(1.1);
60128        let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
60129        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
60130        assert_eq_m128(r, e);
60131        let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
60132        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
60133        assert_eq_m128(r, e);
60134    }
60135
60136    #[simd_test(enable = "avx512f")]
60137    fn test_mm_maskz_roundscale_ss() {
60138        let a = _mm_set1_ps(2.2);
60139        let b = _mm_set1_ps(1.1);
60140        let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
60141        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
60142        assert_eq_m128(r, e);
60143        let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
60144        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
60145        assert_eq_m128(r, e);
60146    }
60147
60148    #[simd_test(enable = "avx512f")]
60149    fn test_mm_roundscale_sd() {
60150        let a = _mm_set1_pd(2.2);
60151        let b = _mm_set1_pd(1.1);
60152        let r = _mm_roundscale_sd::<0>(a, b);
60153        let e = _mm_set_pd(2.2, 1.0);
60154        assert_eq_m128d(r, e);
60155    }
60156
60157    #[simd_test(enable = "avx512f")]
60158    fn test_mm_mask_roundscale_sd() {
60159        let a = _mm_set1_pd(2.2);
60160        let b = _mm_set1_pd(1.1);
60161        let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
60162        let e = _mm_set_pd(2.2, 2.2);
60163        assert_eq_m128d(r, e);
60164        let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
60165        let e = _mm_set_pd(2.2, 1.0);
60166        assert_eq_m128d(r, e);
60167    }
60168
60169    #[simd_test(enable = "avx512f")]
60170    fn test_mm_maskz_roundscale_sd() {
60171        let a = _mm_set1_pd(2.2);
60172        let b = _mm_set1_pd(1.1);
60173        let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
60174        let e = _mm_set_pd(2.2, 0.0);
60175        assert_eq_m128d(r, e);
60176        let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
60177        let e = _mm_set_pd(2.2, 1.0);
60178        assert_eq_m128d(r, e);
60179    }
60180
60181    #[simd_test(enable = "avx512f")]
60182    fn test_mm_scalef_ss() {
60183        let a = _mm_set1_ps(1.);
60184        let b = _mm_set1_ps(3.);
60185        let r = _mm_scalef_ss(a, b);
60186        let e = _mm_set_ps(1., 1., 1., 8.);
60187        assert_eq_m128(r, e);
60188    }
60189
60190    #[simd_test(enable = "avx512f")]
60191    fn test_mm_mask_scalef_ss() {
60192        let a = _mm_set1_ps(1.);
60193        let b = _mm_set1_ps(3.);
60194        let r = _mm_mask_scalef_ss(a, 0, a, b);
60195        let e = _mm_set_ps(1., 1., 1., 1.);
60196        assert_eq_m128(r, e);
60197        let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
60198        let e = _mm_set_ps(1., 1., 1., 8.);
60199        assert_eq_m128(r, e);
60200    }
60201
60202    #[simd_test(enable = "avx512f")]
60203    fn test_mm_maskz_scalef_ss() {
60204        let a = _mm_set1_ps(1.);
60205        let b = _mm_set1_ps(3.);
60206        let r = _mm_maskz_scalef_ss(0, a, b);
60207        let e = _mm_set_ps(1., 1., 1., 0.);
60208        assert_eq_m128(r, e);
60209        let r = _mm_maskz_scalef_ss(0b11111111, a, b);
60210        let e = _mm_set_ps(1., 1., 1., 8.);
60211        assert_eq_m128(r, e);
60212    }
60213
60214    #[simd_test(enable = "avx512f")]
60215    fn test_mm_scalef_sd() {
60216        let a = _mm_set1_pd(1.);
60217        let b = _mm_set1_pd(3.);
60218        let r = _mm_scalef_sd(a, b);
60219        let e = _mm_set_pd(1., 8.);
60220        assert_eq_m128d(r, e);
60221    }
60222
60223    #[simd_test(enable = "avx512f")]
60224    fn test_mm_mask_scalef_sd() {
60225        let a = _mm_set1_pd(1.);
60226        let b = _mm_set1_pd(3.);
60227        let r = _mm_mask_scalef_sd(a, 0, a, b);
60228        let e = _mm_set_pd(1., 1.);
60229        assert_eq_m128d(r, e);
60230        let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
60231        let e = _mm_set_pd(1., 8.);
60232        assert_eq_m128d(r, e);
60233    }
60234
60235    #[simd_test(enable = "avx512f")]
60236    fn test_mm_maskz_scalef_sd() {
60237        let a = _mm_set1_pd(1.);
60238        let b = _mm_set1_pd(3.);
60239        let r = _mm_maskz_scalef_sd(0, a, b);
60240        let e = _mm_set_pd(1., 0.);
60241        assert_eq_m128d(r, e);
60242        let r = _mm_maskz_scalef_sd(0b11111111, a, b);
60243        let e = _mm_set_pd(1., 8.);
60244        assert_eq_m128d(r, e);
60245    }
60246
60247    #[simd_test(enable = "avx512f")]
60248    const fn test_mm_mask_fmadd_ss() {
60249        let a = _mm_set1_ps(1.);
60250        let b = _mm_set1_ps(2.);
60251        let c = _mm_set1_ps(3.);
60252        let r = _mm_mask_fmadd_ss(a, 0, b, c);
60253        assert_eq_m128(r, a);
60254        let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
60255        let e = _mm_set_ps(1., 1., 1., 5.);
60256        assert_eq_m128(r, e);
60257    }
60258
60259    #[simd_test(enable = "avx512f")]
60260    const fn test_mm_maskz_fmadd_ss() {
60261        let a = _mm_set1_ps(1.);
60262        let b = _mm_set1_ps(2.);
60263        let c = _mm_set1_ps(3.);
60264        let r = _mm_maskz_fmadd_ss(0, a, b, c);
60265        let e = _mm_set_ps(1., 1., 1., 0.);
60266        assert_eq_m128(r, e);
60267        let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
60268        let e = _mm_set_ps(1., 1., 1., 5.);
60269        assert_eq_m128(r, e);
60270    }
60271
60272    #[simd_test(enable = "avx512f")]
60273    const fn test_mm_mask3_fmadd_ss() {
60274        let a = _mm_set1_ps(1.);
60275        let b = _mm_set1_ps(2.);
60276        let c = _mm_set1_ps(3.);
60277        let r = _mm_mask3_fmadd_ss(a, b, c, 0);
60278        assert_eq_m128(r, c);
60279        let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
60280        let e = _mm_set_ps(3., 3., 3., 5.);
60281        assert_eq_m128(r, e);
60282    }
60283
60284    #[simd_test(enable = "avx512f")]
60285    const fn test_mm_mask_fmadd_sd() {
60286        let a = _mm_set1_pd(1.);
60287        let b = _mm_set1_pd(2.);
60288        let c = _mm_set1_pd(3.);
60289        let r = _mm_mask_fmadd_sd(a, 0, b, c);
60290        assert_eq_m128d(r, a);
60291        let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
60292        let e = _mm_set_pd(1., 5.);
60293        assert_eq_m128d(r, e);
60294    }
60295
60296    #[simd_test(enable = "avx512f")]
60297    const fn test_mm_maskz_fmadd_sd() {
60298        let a = _mm_set1_pd(1.);
60299        let b = _mm_set1_pd(2.);
60300        let c = _mm_set1_pd(3.);
60301        let r = _mm_maskz_fmadd_sd(0, a, b, c);
60302        let e = _mm_set_pd(1., 0.);
60303        assert_eq_m128d(r, e);
60304        let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
60305        let e = _mm_set_pd(1., 5.);
60306        assert_eq_m128d(r, e);
60307    }
60308
60309    #[simd_test(enable = "avx512f")]
60310    const fn test_mm_mask3_fmadd_sd() {
60311        let a = _mm_set1_pd(1.);
60312        let b = _mm_set1_pd(2.);
60313        let c = _mm_set1_pd(3.);
60314        let r = _mm_mask3_fmadd_sd(a, b, c, 0);
60315        assert_eq_m128d(r, c);
60316        let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
60317        let e = _mm_set_pd(3., 5.);
60318        assert_eq_m128d(r, e);
60319    }
60320
60321    #[simd_test(enable = "avx512f")]
60322    const fn test_mm_mask_fmsub_ss() {
60323        let a = _mm_set1_ps(1.);
60324        let b = _mm_set1_ps(2.);
60325        let c = _mm_set1_ps(3.);
60326        let r = _mm_mask_fmsub_ss(a, 0, b, c);
60327        assert_eq_m128(r, a);
60328        let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
60329        let e = _mm_set_ps(1., 1., 1., -1.);
60330        assert_eq_m128(r, e);
60331    }
60332
60333    #[simd_test(enable = "avx512f")]
60334    const fn test_mm_maskz_fmsub_ss() {
60335        let a = _mm_set1_ps(1.);
60336        let b = _mm_set1_ps(2.);
60337        let c = _mm_set1_ps(3.);
60338        let r = _mm_maskz_fmsub_ss(0, a, b, c);
60339        let e = _mm_set_ps(1., 1., 1., 0.);
60340        assert_eq_m128(r, e);
60341        let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
60342        let e = _mm_set_ps(1., 1., 1., -1.);
60343        assert_eq_m128(r, e);
60344    }
60345
60346    #[simd_test(enable = "avx512f")]
60347    const fn test_mm_mask3_fmsub_ss() {
60348        let a = _mm_set1_ps(1.);
60349        let b = _mm_set1_ps(2.);
60350        let c = _mm_set1_ps(3.);
60351        let r = _mm_mask3_fmsub_ss(a, b, c, 0);
60352        assert_eq_m128(r, c);
60353        let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
60354        let e = _mm_set_ps(3., 3., 3., -1.);
60355        assert_eq_m128(r, e);
60356    }
60357
60358    #[simd_test(enable = "avx512f")]
60359    const fn test_mm_mask_fmsub_sd() {
60360        let a = _mm_set1_pd(1.);
60361        let b = _mm_set1_pd(2.);
60362        let c = _mm_set1_pd(3.);
60363        let r = _mm_mask_fmsub_sd(a, 0, b, c);
60364        assert_eq_m128d(r, a);
60365        let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
60366        let e = _mm_set_pd(1., -1.);
60367        assert_eq_m128d(r, e);
60368    }
60369
60370    #[simd_test(enable = "avx512f")]
60371    const fn test_mm_maskz_fmsub_sd() {
60372        let a = _mm_set1_pd(1.);
60373        let b = _mm_set1_pd(2.);
60374        let c = _mm_set1_pd(3.);
60375        let r = _mm_maskz_fmsub_sd(0, a, b, c);
60376        let e = _mm_set_pd(1., 0.);
60377        assert_eq_m128d(r, e);
60378        let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
60379        let e = _mm_set_pd(1., -1.);
60380        assert_eq_m128d(r, e);
60381    }
60382
60383    #[simd_test(enable = "avx512f")]
60384    const fn test_mm_mask3_fmsub_sd() {
60385        let a = _mm_set1_pd(1.);
60386        let b = _mm_set1_pd(2.);
60387        let c = _mm_set1_pd(3.);
60388        let r = _mm_mask3_fmsub_sd(a, b, c, 0);
60389        assert_eq_m128d(r, c);
60390        let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
60391        let e = _mm_set_pd(3., -1.);
60392        assert_eq_m128d(r, e);
60393    }
60394
60395    #[simd_test(enable = "avx512f")]
60396    const fn test_mm_mask_fnmadd_ss() {
60397        let a = _mm_set1_ps(1.);
60398        let b = _mm_set1_ps(2.);
60399        let c = _mm_set1_ps(3.);
60400        let r = _mm_mask_fnmadd_ss(a, 0, b, c);
60401        assert_eq_m128(r, a);
60402        let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
60403        let e = _mm_set_ps(1., 1., 1., 1.);
60404        assert_eq_m128(r, e);
60405    }
60406
60407    #[simd_test(enable = "avx512f")]
60408    const fn test_mm_maskz_fnmadd_ss() {
60409        let a = _mm_set1_ps(1.);
60410        let b = _mm_set1_ps(2.);
60411        let c = _mm_set1_ps(3.);
60412        let r = _mm_maskz_fnmadd_ss(0, a, b, c);
60413        let e = _mm_set_ps(1., 1., 1., 0.);
60414        assert_eq_m128(r, e);
60415        let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
60416        let e = _mm_set_ps(1., 1., 1., 1.);
60417        assert_eq_m128(r, e);
60418    }
60419
60420    #[simd_test(enable = "avx512f")]
60421    const fn test_mm_mask3_fnmadd_ss() {
60422        let a = _mm_set1_ps(1.);
60423        let b = _mm_set1_ps(2.);
60424        let c = _mm_set1_ps(3.);
60425        let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
60426        assert_eq_m128(r, c);
60427        let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
60428        let e = _mm_set_ps(3., 3., 3., 1.);
60429        assert_eq_m128(r, e);
60430    }
60431
60432    #[simd_test(enable = "avx512f")]
60433    const fn test_mm_mask_fnmadd_sd() {
60434        let a = _mm_set1_pd(1.);
60435        let b = _mm_set1_pd(2.);
60436        let c = _mm_set1_pd(3.);
60437        let r = _mm_mask_fnmadd_sd(a, 0, b, c);
60438        assert_eq_m128d(r, a);
60439        let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
60440        let e = _mm_set_pd(1., 1.);
60441        assert_eq_m128d(r, e);
60442    }
60443
60444    #[simd_test(enable = "avx512f")]
60445    const fn test_mm_maskz_fnmadd_sd() {
60446        let a = _mm_set1_pd(1.);
60447        let b = _mm_set1_pd(2.);
60448        let c = _mm_set1_pd(3.);
60449        let r = _mm_maskz_fnmadd_sd(0, a, b, c);
60450        let e = _mm_set_pd(1., 0.);
60451        assert_eq_m128d(r, e);
60452        let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
60453        let e = _mm_set_pd(1., 1.);
60454        assert_eq_m128d(r, e);
60455    }
60456
60457    #[simd_test(enable = "avx512f")]
60458    const fn test_mm_mask3_fnmadd_sd() {
60459        let a = _mm_set1_pd(1.);
60460        let b = _mm_set1_pd(2.);
60461        let c = _mm_set1_pd(3.);
60462        let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
60463        assert_eq_m128d(r, c);
60464        let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
60465        let e = _mm_set_pd(3., 1.);
60466        assert_eq_m128d(r, e);
60467    }
60468
60469    #[simd_test(enable = "avx512f")]
60470    const fn test_mm_mask_fnmsub_ss() {
60471        let a = _mm_set1_ps(1.);
60472        let b = _mm_set1_ps(2.);
60473        let c = _mm_set1_ps(3.);
60474        let r = _mm_mask_fnmsub_ss(a, 0, b, c);
60475        assert_eq_m128(r, a);
60476        let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
60477        let e = _mm_set_ps(1., 1., 1., -5.);
60478        assert_eq_m128(r, e);
60479    }
60480
60481    #[simd_test(enable = "avx512f")]
60482    const fn test_mm_maskz_fnmsub_ss() {
60483        let a = _mm_set1_ps(1.);
60484        let b = _mm_set1_ps(2.);
60485        let c = _mm_set1_ps(3.);
60486        let r = _mm_maskz_fnmsub_ss(0, a, b, c);
60487        let e = _mm_set_ps(1., 1., 1., 0.);
60488        assert_eq_m128(r, e);
60489        let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
60490        let e = _mm_set_ps(1., 1., 1., -5.);
60491        assert_eq_m128(r, e);
60492    }
60493
60494    #[simd_test(enable = "avx512f")]
60495    const fn test_mm_mask3_fnmsub_ss() {
60496        let a = _mm_set1_ps(1.);
60497        let b = _mm_set1_ps(2.);
60498        let c = _mm_set1_ps(3.);
60499        let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
60500        assert_eq_m128(r, c);
60501        let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
60502        let e = _mm_set_ps(3., 3., 3., -5.);
60503        assert_eq_m128(r, e);
60504    }
60505
60506    #[simd_test(enable = "avx512f")]
60507    const fn test_mm_mask_fnmsub_sd() {
60508        let a = _mm_set1_pd(1.);
60509        let b = _mm_set1_pd(2.);
60510        let c = _mm_set1_pd(3.);
60511        let r = _mm_mask_fnmsub_sd(a, 0, b, c);
60512        assert_eq_m128d(r, a);
60513        let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
60514        let e = _mm_set_pd(1., -5.);
60515        assert_eq_m128d(r, e);
60516    }
60517
60518    #[simd_test(enable = "avx512f")]
60519    const fn test_mm_maskz_fnmsub_sd() {
60520        let a = _mm_set1_pd(1.);
60521        let b = _mm_set1_pd(2.);
60522        let c = _mm_set1_pd(3.);
60523        let r = _mm_maskz_fnmsub_sd(0, a, b, c);
60524        let e = _mm_set_pd(1., 0.);
60525        assert_eq_m128d(r, e);
60526        let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
60527        let e = _mm_set_pd(1., -5.);
60528        assert_eq_m128d(r, e);
60529    }
60530
60531    #[simd_test(enable = "avx512f")]
60532    const fn test_mm_mask3_fnmsub_sd() {
60533        let a = _mm_set1_pd(1.);
60534        let b = _mm_set1_pd(2.);
60535        let c = _mm_set1_pd(3.);
60536        let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
60537        assert_eq_m128d(r, c);
60538        let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
60539        let e = _mm_set_pd(3., -5.);
60540        assert_eq_m128d(r, e);
60541    }
60542
60543    #[simd_test(enable = "avx512f")]
60544    fn test_mm_add_round_ss() {
60545        let a = _mm_set_ps(1., 2., 10., 20.);
60546        let b = _mm_set_ps(3., 4., 30., 40.);
60547        let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60548        let e = _mm_set_ps(1., 2., 10., 60.);
60549        assert_eq_m128(r, e);
60550    }
60551
60552    #[simd_test(enable = "avx512f")]
60553    fn test_mm_mask_add_round_ss() {
60554        let src = _mm_set_ps(10., 11., 100., 110.);
60555        let a = _mm_set_ps(1., 2., 10., 20.);
60556        let b = _mm_set_ps(3., 4., 30., 40.);
60557        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60558        let e = _mm_set_ps(1., 2., 10., 110.);
60559        assert_eq_m128(r, e);
60560        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60561            src, 0b11111111, a, b,
60562        );
60563        let e = _mm_set_ps(1., 2., 10., 60.);
60564        assert_eq_m128(r, e);
60565    }
60566
60567    #[simd_test(enable = "avx512f")]
60568    fn test_mm_maskz_add_round_ss() {
60569        let a = _mm_set_ps(1., 2., 10., 20.);
60570        let b = _mm_set_ps(3., 4., 30., 40.);
60571        let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60572        let e = _mm_set_ps(1., 2., 10., 0.);
60573        assert_eq_m128(r, e);
60574        let r =
60575            _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60576        let e = _mm_set_ps(1., 2., 10., 60.);
60577        assert_eq_m128(r, e);
60578    }
60579
60580    #[simd_test(enable = "avx512f")]
60581    fn test_mm_add_round_sd() {
60582        let a = _mm_set_pd(1., 2.);
60583        let b = _mm_set_pd(3., 4.);
60584        let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60585        let e = _mm_set_pd(1., 6.);
60586        assert_eq_m128d(r, e);
60587    }
60588
60589    #[simd_test(enable = "avx512f")]
60590    fn test_mm_mask_add_round_sd() {
60591        let src = _mm_set_pd(10., 11.);
60592        let a = _mm_set_pd(1., 2.);
60593        let b = _mm_set_pd(3., 4.);
60594        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60595        let e = _mm_set_pd(1., 11.);
60596        assert_eq_m128d(r, e);
60597        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60598            src, 0b11111111, a, b,
60599        );
60600        let e = _mm_set_pd(1., 6.);
60601        assert_eq_m128d(r, e);
60602    }
60603
60604    #[simd_test(enable = "avx512f")]
60605    fn test_mm_maskz_add_round_sd() {
60606        let a = _mm_set_pd(1., 2.);
60607        let b = _mm_set_pd(3., 4.);
60608        let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60609        let e = _mm_set_pd(1., 0.);
60610        assert_eq_m128d(r, e);
60611        let r =
60612            _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60613        let e = _mm_set_pd(1., 6.);
60614        assert_eq_m128d(r, e);
60615    }
60616
60617    #[simd_test(enable = "avx512f")]
60618    fn test_mm_sub_round_ss() {
60619        let a = _mm_set_ps(1., 2., 10., 20.);
60620        let b = _mm_set_ps(3., 4., 30., 40.);
60621        let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60622        let e = _mm_set_ps(1., 2., 10., -20.);
60623        assert_eq_m128(r, e);
60624    }
60625
60626    #[simd_test(enable = "avx512f")]
60627    fn test_mm_mask_sub_round_ss() {
60628        let src = _mm_set_ps(10., 11., 100., 110.);
60629        let a = _mm_set_ps(1., 2., 10., 20.);
60630        let b = _mm_set_ps(3., 4., 30., 40.);
60631        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60632        let e = _mm_set_ps(1., 2., 10., 110.);
60633        assert_eq_m128(r, e);
60634        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60635            src, 0b11111111, a, b,
60636        );
60637        let e = _mm_set_ps(1., 2., 10., -20.);
60638        assert_eq_m128(r, e);
60639    }
60640
60641    #[simd_test(enable = "avx512f")]
60642    fn test_mm_maskz_sub_round_ss() {
60643        let a = _mm_set_ps(1., 2., 10., 20.);
60644        let b = _mm_set_ps(3., 4., 30., 40.);
60645        let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60646        let e = _mm_set_ps(1., 2., 10., 0.);
60647        assert_eq_m128(r, e);
60648        let r =
60649            _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60650        let e = _mm_set_ps(1., 2., 10., -20.);
60651        assert_eq_m128(r, e);
60652    }
60653
60654    #[simd_test(enable = "avx512f")]
60655    fn test_mm_sub_round_sd() {
60656        let a = _mm_set_pd(1., 2.);
60657        let b = _mm_set_pd(3., 4.);
60658        let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60659        let e = _mm_set_pd(1., -2.);
60660        assert_eq_m128d(r, e);
60661    }
60662
60663    #[simd_test(enable = "avx512f")]
60664    fn test_mm_mask_sub_round_sd() {
60665        let src = _mm_set_pd(10., 11.);
60666        let a = _mm_set_pd(1., 2.);
60667        let b = _mm_set_pd(3., 4.);
60668        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60669        let e = _mm_set_pd(1., 11.);
60670        assert_eq_m128d(r, e);
60671        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60672            src, 0b11111111, a, b,
60673        );
60674        let e = _mm_set_pd(1., -2.);
60675        assert_eq_m128d(r, e);
60676    }
60677
60678    #[simd_test(enable = "avx512f")]
60679    fn test_mm_maskz_sub_round_sd() {
60680        let a = _mm_set_pd(1., 2.);
60681        let b = _mm_set_pd(3., 4.);
60682        let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60683        let e = _mm_set_pd(1., 0.);
60684        assert_eq_m128d(r, e);
60685        let r =
60686            _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60687        let e = _mm_set_pd(1., -2.);
60688        assert_eq_m128d(r, e);
60689    }
60690
60691    #[simd_test(enable = "avx512f")]
60692    fn test_mm_mul_round_ss() {
60693        let a = _mm_set_ps(1., 2., 10., 20.);
60694        let b = _mm_set_ps(3., 4., 30., 40.);
60695        let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60696        let e = _mm_set_ps(1., 2., 10., 800.);
60697        assert_eq_m128(r, e);
60698    }
60699
60700    #[simd_test(enable = "avx512f")]
60701    fn test_mm_mask_mul_round_ss() {
60702        let src = _mm_set_ps(10., 11., 100., 110.);
60703        let a = _mm_set_ps(1., 2., 10., 20.);
60704        let b = _mm_set_ps(3., 4., 30., 40.);
60705        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60706        let e = _mm_set_ps(1., 2., 10., 110.);
60707        assert_eq_m128(r, e);
60708        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60709            src, 0b11111111, a, b,
60710        );
60711        let e = _mm_set_ps(1., 2., 10., 800.);
60712        assert_eq_m128(r, e);
60713    }
60714
60715    #[simd_test(enable = "avx512f")]
60716    fn test_mm_maskz_mul_round_ss() {
60717        let a = _mm_set_ps(1., 2., 10., 20.);
60718        let b = _mm_set_ps(3., 4., 30., 40.);
60719        let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60720        let e = _mm_set_ps(1., 2., 10., 0.);
60721        assert_eq_m128(r, e);
60722        let r =
60723            _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60724        let e = _mm_set_ps(1., 2., 10., 800.);
60725        assert_eq_m128(r, e);
60726    }
60727
60728    #[simd_test(enable = "avx512f")]
60729    fn test_mm_mul_round_sd() {
60730        let a = _mm_set_pd(1., 2.);
60731        let b = _mm_set_pd(3., 4.);
60732        let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60733        let e = _mm_set_pd(1., 8.);
60734        assert_eq_m128d(r, e);
60735    }
60736
60737    #[simd_test(enable = "avx512f")]
60738    fn test_mm_mask_mul_round_sd() {
60739        let src = _mm_set_pd(10., 11.);
60740        let a = _mm_set_pd(1., 2.);
60741        let b = _mm_set_pd(3., 4.);
60742        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60743        let e = _mm_set_pd(1., 11.);
60744        assert_eq_m128d(r, e);
60745        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60746            src, 0b11111111, a, b,
60747        );
60748        let e = _mm_set_pd(1., 8.);
60749        assert_eq_m128d(r, e);
60750    }
60751
60752    #[simd_test(enable = "avx512f")]
60753    fn test_mm_maskz_mul_round_sd() {
60754        let a = _mm_set_pd(1., 2.);
60755        let b = _mm_set_pd(3., 4.);
60756        let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60757        let e = _mm_set_pd(1., 0.);
60758        assert_eq_m128d(r, e);
60759        let r =
60760            _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60761        let e = _mm_set_pd(1., 8.);
60762        assert_eq_m128d(r, e);
60763    }
60764
60765    #[simd_test(enable = "avx512f")]
60766    fn test_mm_div_round_ss() {
60767        let a = _mm_set_ps(1., 2., 10., 20.);
60768        let b = _mm_set_ps(3., 4., 30., 40.);
60769        let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60770        let e = _mm_set_ps(1., 2., 10., 0.5);
60771        assert_eq_m128(r, e);
60772    }
60773
60774    #[simd_test(enable = "avx512f")]
60775    fn test_mm_mask_div_round_ss() {
60776        let src = _mm_set_ps(10., 11., 100., 110.);
60777        let a = _mm_set_ps(1., 2., 10., 20.);
60778        let b = _mm_set_ps(3., 4., 30., 40.);
60779        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60780        let e = _mm_set_ps(1., 2., 10., 110.);
60781        assert_eq_m128(r, e);
60782        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60783            src, 0b11111111, a, b,
60784        );
60785        let e = _mm_set_ps(1., 2., 10., 0.5);
60786        assert_eq_m128(r, e);
60787    }
60788
60789    #[simd_test(enable = "avx512f")]
60790    fn test_mm_maskz_div_round_ss() {
60791        let a = _mm_set_ps(1., 2., 10., 20.);
60792        let b = _mm_set_ps(3., 4., 30., 40.);
60793        let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60794        let e = _mm_set_ps(1., 2., 10., 0.);
60795        assert_eq_m128(r, e);
60796        let r =
60797            _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60798        let e = _mm_set_ps(1., 2., 10., 0.5);
60799        assert_eq_m128(r, e);
60800    }
60801
60802    #[simd_test(enable = "avx512f")]
60803    fn test_mm_div_round_sd() {
60804        let a = _mm_set_pd(1., 2.);
60805        let b = _mm_set_pd(3., 4.);
60806        let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60807        let e = _mm_set_pd(1., 0.5);
60808        assert_eq_m128d(r, e);
60809    }
60810
60811    #[simd_test(enable = "avx512f")]
60812    fn test_mm_mask_div_round_sd() {
60813        let src = _mm_set_pd(10., 11.);
60814        let a = _mm_set_pd(1., 2.);
60815        let b = _mm_set_pd(3., 4.);
60816        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60817        let e = _mm_set_pd(1., 11.);
60818        assert_eq_m128d(r, e);
60819        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60820            src, 0b11111111, a, b,
60821        );
60822        let e = _mm_set_pd(1., 0.5);
60823        assert_eq_m128d(r, e);
60824    }
60825
60826    #[simd_test(enable = "avx512f")]
60827    fn test_mm_maskz_div_round_sd() {
60828        let a = _mm_set_pd(1., 2.);
60829        let b = _mm_set_pd(3., 4.);
60830        let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60831        let e = _mm_set_pd(1., 0.);
60832        assert_eq_m128d(r, e);
60833        let r =
60834            _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60835        let e = _mm_set_pd(1., 0.5);
60836        assert_eq_m128d(r, e);
60837    }
60838
60839    #[simd_test(enable = "avx512f")]
60840    fn test_mm_max_round_ss() {
60841        let a = _mm_set_ps(0., 1., 2., 3.);
60842        let b = _mm_set_ps(4., 5., 6., 7.);
60843        let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60844        let e = _mm_set_ps(0., 1., 2., 7.);
60845        assert_eq_m128(r, e);
60846    }
60847
60848    #[simd_test(enable = "avx512f")]
60849    fn test_mm_mask_max_round_ss() {
60850        let a = _mm_set_ps(0., 1., 2., 3.);
60851        let b = _mm_set_ps(4., 5., 6., 7.);
60852        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60853        let e = _mm_set_ps(0., 1., 2., 3.);
60854        assert_eq_m128(r, e);
60855        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60856        let e = _mm_set_ps(0., 1., 2., 7.);
60857        assert_eq_m128(r, e);
60858    }
60859
60860    #[simd_test(enable = "avx512f")]
60861    fn test_mm_maskz_max_round_ss() {
60862        let a = _mm_set_ps(0., 1., 2., 3.);
60863        let b = _mm_set_ps(4., 5., 6., 7.);
60864        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60865        let e = _mm_set_ps(0., 1., 2., 0.);
60866        assert_eq_m128(r, e);
60867        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60868        let e = _mm_set_ps(0., 1., 2., 7.);
60869        assert_eq_m128(r, e);
60870    }
60871
60872    #[simd_test(enable = "avx512f")]
60873    fn test_mm_max_round_sd() {
60874        let a = _mm_set_pd(0., 1.);
60875        let b = _mm_set_pd(2., 3.);
60876        let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60877        let e = _mm_set_pd(0., 3.);
60878        assert_eq_m128d(r, e);
60879    }
60880
60881    #[simd_test(enable = "avx512f")]
60882    fn test_mm_mask_max_round_sd() {
60883        let a = _mm_set_pd(0., 1.);
60884        let b = _mm_set_pd(2., 3.);
60885        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60886        let e = _mm_set_pd(0., 1.);
60887        assert_eq_m128d(r, e);
60888        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60889        let e = _mm_set_pd(0., 3.);
60890        assert_eq_m128d(r, e);
60891    }
60892
60893    #[simd_test(enable = "avx512f")]
60894    fn test_mm_maskz_max_round_sd() {
60895        let a = _mm_set_pd(0., 1.);
60896        let b = _mm_set_pd(2., 3.);
60897        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60898        let e = _mm_set_pd(0., 0.);
60899        assert_eq_m128d(r, e);
60900        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60901        let e = _mm_set_pd(0., 3.);
60902        assert_eq_m128d(r, e);
60903    }
60904
60905    #[simd_test(enable = "avx512f")]
60906    fn test_mm_min_round_ss() {
60907        let a = _mm_set_ps(0., 1., 2., 3.);
60908        let b = _mm_set_ps(4., 5., 6., 7.);
60909        let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60910        let e = _mm_set_ps(0., 1., 2., 3.);
60911        assert_eq_m128(r, e);
60912    }
60913
60914    #[simd_test(enable = "avx512f")]
60915    fn test_mm_mask_min_round_ss() {
60916        let a = _mm_set_ps(0., 1., 2., 3.);
60917        let b = _mm_set_ps(4., 5., 6., 7.);
60918        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60919        let e = _mm_set_ps(0., 1., 2., 3.);
60920        assert_eq_m128(r, e);
60921        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60922        let e = _mm_set_ps(0., 1., 2., 3.);
60923        assert_eq_m128(r, e);
60924    }
60925
60926    #[simd_test(enable = "avx512f")]
60927    fn test_mm_maskz_min_round_ss() {
60928        let a = _mm_set_ps(0., 1., 2., 3.);
60929        let b = _mm_set_ps(4., 5., 6., 7.);
60930        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60931        let e = _mm_set_ps(0., 1., 2., 0.);
60932        assert_eq_m128(r, e);
60933        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60934        let e = _mm_set_ps(0., 1., 2., 3.);
60935        assert_eq_m128(r, e);
60936    }
60937
60938    #[simd_test(enable = "avx512f")]
60939    fn test_mm_min_round_sd() {
60940        let a = _mm_set_pd(0., 1.);
60941        let b = _mm_set_pd(2., 3.);
60942        let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60943        let e = _mm_set_pd(0., 1.);
60944        assert_eq_m128d(r, e);
60945    }
60946
60947    #[simd_test(enable = "avx512f")]
60948    fn test_mm_mask_min_round_sd() {
60949        let a = _mm_set_pd(0., 1.);
60950        let b = _mm_set_pd(2., 3.);
60951        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60952        let e = _mm_set_pd(0., 1.);
60953        assert_eq_m128d(r, e);
60954        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60955        let e = _mm_set_pd(0., 1.);
60956        assert_eq_m128d(r, e);
60957    }
60958
60959    #[simd_test(enable = "avx512f")]
60960    fn test_mm_maskz_min_round_sd() {
60961        let a = _mm_set_pd(0., 1.);
60962        let b = _mm_set_pd(2., 3.);
60963        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60964        let e = _mm_set_pd(0., 0.);
60965        assert_eq_m128d(r, e);
60966        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60967        let e = _mm_set_pd(0., 1.);
60968        assert_eq_m128d(r, e);
60969    }
60970
60971    #[simd_test(enable = "avx512f")]
60972    fn test_mm_sqrt_round_ss() {
60973        let a = _mm_set_ps(1., 2., 10., 20.);
60974        let b = _mm_set_ps(3., 4., 30., 4.);
60975        let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60976        let e = _mm_set_ps(1., 2., 10., 2.);
60977        assert_eq_m128(r, e);
60978    }
60979
60980    #[simd_test(enable = "avx512f")]
60981    fn test_mm_mask_sqrt_round_ss() {
60982        let src = _mm_set_ps(10., 11., 100., 110.);
60983        let a = _mm_set_ps(1., 2., 10., 20.);
60984        let b = _mm_set_ps(3., 4., 30., 4.);
60985        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60986        let e = _mm_set_ps(1., 2., 10., 110.);
60987        assert_eq_m128(r, e);
60988        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60989            src, 0b11111111, a, b,
60990        );
60991        let e = _mm_set_ps(1., 2., 10., 2.);
60992        assert_eq_m128(r, e);
60993    }
60994
60995    #[simd_test(enable = "avx512f")]
60996    fn test_mm_maskz_sqrt_round_ss() {
60997        let a = _mm_set_ps(1., 2., 10., 20.);
60998        let b = _mm_set_ps(3., 4., 30., 4.);
60999        let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
61000        let e = _mm_set_ps(1., 2., 10., 0.);
61001        assert_eq_m128(r, e);
61002        let r =
61003            _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
61004        let e = _mm_set_ps(1., 2., 10., 2.);
61005        assert_eq_m128(r, e);
61006    }
61007
61008    #[simd_test(enable = "avx512f")]
61009    fn test_mm_sqrt_round_sd() {
61010        let a = _mm_set_pd(1., 2.);
61011        let b = _mm_set_pd(3., 4.);
61012        let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
61013        let e = _mm_set_pd(1., 2.);
61014        assert_eq_m128d(r, e);
61015    }
61016
61017    #[simd_test(enable = "avx512f")]
61018    fn test_mm_mask_sqrt_round_sd() {
61019        let src = _mm_set_pd(10., 11.);
61020        let a = _mm_set_pd(1., 2.);
61021        let b = _mm_set_pd(3., 4.);
61022        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
61023        let e = _mm_set_pd(1., 11.);
61024        assert_eq_m128d(r, e);
61025        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
61026            src, 0b11111111, a, b,
61027        );
61028        let e = _mm_set_pd(1., 2.);
61029        assert_eq_m128d(r, e);
61030    }
61031
61032    #[simd_test(enable = "avx512f")]
61033    fn test_mm_maskz_sqrt_round_sd() {
61034        let a = _mm_set_pd(1., 2.);
61035        let b = _mm_set_pd(3., 4.);
61036        let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
61037        let e = _mm_set_pd(1., 0.);
61038        assert_eq_m128d(r, e);
61039        let r =
61040            _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
61041        let e = _mm_set_pd(1., 2.);
61042        assert_eq_m128d(r, e);
61043    }
61044
61045    #[simd_test(enable = "avx512f")]
61046    fn test_mm_getexp_round_ss() {
61047        let a = _mm_set1_ps(2.);
61048        let b = _mm_set1_ps(3.);
61049        let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
61050        let e = _mm_set_ps(2., 2., 2., 1.);
61051        assert_eq_m128(r, e);
61052    }
61053
61054    #[simd_test(enable = "avx512f")]
61055    fn test_mm_mask_getexp_round_ss() {
61056        let a = _mm_set1_ps(2.);
61057        let b = _mm_set1_ps(3.);
61058        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61059        let e = _mm_set_ps(2., 2., 2., 2.);
61060        assert_eq_m128(r, e);
61061        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61062        let e = _mm_set_ps(2., 2., 2., 1.);
61063        assert_eq_m128(r, e);
61064    }
61065
61066    #[simd_test(enable = "avx512f")]
61067    fn test_mm_maskz_getexp_round_ss() {
61068        let a = _mm_set1_ps(2.);
61069        let b = _mm_set1_ps(3.);
61070        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
61071        let e = _mm_set_ps(2., 2., 2., 0.);
61072        assert_eq_m128(r, e);
61073        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61074        let e = _mm_set_ps(2., 2., 2., 1.);
61075        assert_eq_m128(r, e);
61076    }
61077
61078    #[simd_test(enable = "avx512f")]
61079    fn test_mm_getexp_round_sd() {
61080        let a = _mm_set1_pd(2.);
61081        let b = _mm_set1_pd(3.);
61082        let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
61083        let e = _mm_set_pd(2., 1.);
61084        assert_eq_m128d(r, e);
61085    }
61086
61087    #[simd_test(enable = "avx512f")]
61088    fn test_mm_mask_getexp_round_sd() {
61089        let a = _mm_set1_pd(2.);
61090        let b = _mm_set1_pd(3.);
61091        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61092        let e = _mm_set_pd(2., 2.);
61093        assert_eq_m128d(r, e);
61094        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61095        let e = _mm_set_pd(2., 1.);
61096        assert_eq_m128d(r, e);
61097    }
61098
61099    #[simd_test(enable = "avx512f")]
61100    fn test_mm_maskz_getexp_round_sd() {
61101        let a = _mm_set1_pd(2.);
61102        let b = _mm_set1_pd(3.);
61103        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
61104        let e = _mm_set_pd(2., 0.);
61105        assert_eq_m128d(r, e);
61106        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61107        let e = _mm_set_pd(2., 1.);
61108        assert_eq_m128d(r, e);
61109    }
61110
61111    #[simd_test(enable = "avx512f")]
61112    fn test_mm_getmant_round_ss() {
61113        let a = _mm_set1_ps(20.);
61114        let b = _mm_set1_ps(10.);
61115        let r =
61116            _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
61117                a, b,
61118            );
61119        let e = _mm_set_ps(20., 20., 20., 1.25);
61120        assert_eq_m128(r, e);
61121    }
61122
61123    #[simd_test(enable = "avx512f")]
61124    fn test_mm_mask_getmant_round_ss() {
61125        let a = _mm_set1_ps(20.);
61126        let b = _mm_set1_ps(10.);
61127        let r = _mm_mask_getmant_round_ss::<
61128            _MM_MANT_NORM_1_2,
61129            _MM_MANT_SIGN_SRC,
61130            _MM_FROUND_CUR_DIRECTION,
61131        >(a, 0, a, b);
61132        let e = _mm_set_ps(20., 20., 20., 20.);
61133        assert_eq_m128(r, e);
61134        let r = _mm_mask_getmant_round_ss::<
61135            _MM_MANT_NORM_1_2,
61136            _MM_MANT_SIGN_SRC,
61137            _MM_FROUND_CUR_DIRECTION,
61138        >(a, 0b11111111, a, b);
61139        let e = _mm_set_ps(20., 20., 20., 1.25);
61140        assert_eq_m128(r, e);
61141    }
61142
61143    #[simd_test(enable = "avx512f")]
61144    fn test_mm_maskz_getmant_round_ss() {
61145        let a = _mm_set1_ps(20.);
61146        let b = _mm_set1_ps(10.);
61147        let r = _mm_maskz_getmant_round_ss::<
61148            _MM_MANT_NORM_1_2,
61149            _MM_MANT_SIGN_SRC,
61150            _MM_FROUND_CUR_DIRECTION,
61151        >(0, a, b);
61152        let e = _mm_set_ps(20., 20., 20., 0.);
61153        assert_eq_m128(r, e);
61154        let r = _mm_maskz_getmant_round_ss::<
61155            _MM_MANT_NORM_1_2,
61156            _MM_MANT_SIGN_SRC,
61157            _MM_FROUND_CUR_DIRECTION,
61158        >(0b11111111, a, b);
61159        let e = _mm_set_ps(20., 20., 20., 1.25);
61160        assert_eq_m128(r, e);
61161    }
61162
61163    #[simd_test(enable = "avx512f")]
61164    fn test_mm_getmant_round_sd() {
61165        let a = _mm_set1_pd(20.);
61166        let b = _mm_set1_pd(10.);
61167        let r =
61168            _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
61169                a, b,
61170            );
61171        let e = _mm_set_pd(20., 1.25);
61172        assert_eq_m128d(r, e);
61173    }
61174
61175    #[simd_test(enable = "avx512f")]
61176    fn test_mm_mask_getmant_round_sd() {
61177        let a = _mm_set1_pd(20.);
61178        let b = _mm_set1_pd(10.);
61179        let r = _mm_mask_getmant_round_sd::<
61180            _MM_MANT_NORM_1_2,
61181            _MM_MANT_SIGN_SRC,
61182            _MM_FROUND_CUR_DIRECTION,
61183        >(a, 0, a, b);
61184        let e = _mm_set_pd(20., 20.);
61185        assert_eq_m128d(r, e);
61186        let r = _mm_mask_getmant_round_sd::<
61187            _MM_MANT_NORM_1_2,
61188            _MM_MANT_SIGN_SRC,
61189            _MM_FROUND_CUR_DIRECTION,
61190        >(a, 0b11111111, a, b);
61191        let e = _mm_set_pd(20., 1.25);
61192        assert_eq_m128d(r, e);
61193    }
61194
61195    #[simd_test(enable = "avx512f")]
61196    fn test_mm_maskz_getmant_round_sd() {
61197        let a = _mm_set1_pd(20.);
61198        let b = _mm_set1_pd(10.);
61199        let r = _mm_maskz_getmant_round_sd::<
61200            _MM_MANT_NORM_1_2,
61201            _MM_MANT_SIGN_SRC,
61202            _MM_FROUND_CUR_DIRECTION,
61203        >(0, a, b);
61204        let e = _mm_set_pd(20., 0.);
61205        assert_eq_m128d(r, e);
61206        let r = _mm_maskz_getmant_round_sd::<
61207            _MM_MANT_NORM_1_2,
61208            _MM_MANT_SIGN_SRC,
61209            _MM_FROUND_CUR_DIRECTION,
61210        >(0b11111111, a, b);
61211        let e = _mm_set_pd(20., 1.25);
61212        assert_eq_m128d(r, e);
61213    }
61214
61215    #[simd_test(enable = "avx512f")]
61216    fn test_mm_roundscale_round_ss() {
61217        let a = _mm_set1_ps(2.2);
61218        let b = _mm_set1_ps(1.1);
61219        let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
61220        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61221        assert_eq_m128(r, e);
61222    }
61223
61224    #[simd_test(enable = "avx512f")]
61225    fn test_mm_mask_roundscale_round_ss() {
61226        let a = _mm_set1_ps(2.2);
61227        let b = _mm_set1_ps(1.1);
61228        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61229        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
61230        assert_eq_m128(r, e);
61231        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61232        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61233        assert_eq_m128(r, e);
61234    }
61235
61236    #[simd_test(enable = "avx512f")]
61237    fn test_mm_maskz_roundscale_round_ss() {
61238        let a = _mm_set1_ps(2.2);
61239        let b = _mm_set1_ps(1.1);
61240        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
61241        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
61242        assert_eq_m128(r, e);
61243        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61244        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61245        assert_eq_m128(r, e);
61246    }
61247
61248    #[simd_test(enable = "avx512f")]
61249    fn test_mm_roundscale_round_sd() {
61250        let a = _mm_set1_pd(2.2);
61251        let b = _mm_set1_pd(1.1);
61252        let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
61253        let e = _mm_set_pd(2.2, 1.0);
61254        assert_eq_m128d(r, e);
61255    }
61256
61257    #[simd_test(enable = "avx512f")]
61258    fn test_mm_mask_roundscale_round_sd() {
61259        let a = _mm_set1_pd(2.2);
61260        let b = _mm_set1_pd(1.1);
61261        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61262        let e = _mm_set_pd(2.2, 2.2);
61263        assert_eq_m128d(r, e);
61264        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61265        let e = _mm_set_pd(2.2, 1.0);
61266        assert_eq_m128d(r, e);
61267    }
61268
61269    #[simd_test(enable = "avx512f")]
61270    fn test_mm_maskz_roundscale_round_sd() {
61271        let a = _mm_set1_pd(2.2);
61272        let b = _mm_set1_pd(1.1);
61273        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
61274        let e = _mm_set_pd(2.2, 0.0);
61275        assert_eq_m128d(r, e);
61276        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61277        let e = _mm_set_pd(2.2, 1.0);
61278        assert_eq_m128d(r, e);
61279    }
61280
61281    #[simd_test(enable = "avx512f")]
61282    fn test_mm_scalef_round_ss() {
61283        let a = _mm_set1_ps(1.);
61284        let b = _mm_set1_ps(3.);
61285        let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
61286        let e = _mm_set_ps(1., 1., 1., 8.);
61287        assert_eq_m128(r, e);
61288    }
61289
61290    #[simd_test(enable = "avx512f")]
61291    fn test_mm_mask_scalef_round_ss() {
61292        let a = _mm_set1_ps(1.);
61293        let b = _mm_set1_ps(3.);
61294        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61295            a, 0, a, b,
61296        );
61297        let e = _mm_set_ps(1., 1., 1., 1.);
61298        assert_eq_m128(r, e);
61299        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61300            a, 0b11111111, a, b,
61301        );
61302        let e = _mm_set_ps(1., 1., 1., 8.);
61303        assert_eq_m128(r, e);
61304    }
61305
61306    #[simd_test(enable = "avx512f")]
61307    fn test_mm_maskz_scalef_round_ss() {
61308        let a = _mm_set1_ps(1.);
61309        let b = _mm_set1_ps(3.);
61310        let r =
61311            _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
61312        let e = _mm_set_ps(1., 1., 1., 0.);
61313        assert_eq_m128(r, e);
61314        let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61315            0b11111111, a, b,
61316        );
61317        let e = _mm_set_ps(1., 1., 1., 8.);
61318        assert_eq_m128(r, e);
61319    }
61320
61321    #[simd_test(enable = "avx512f")]
61322    fn test_mm_scalef_round_sd() {
61323        let a = _mm_set1_pd(1.);
61324        let b = _mm_set1_pd(3.);
61325        let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
61326        let e = _mm_set_pd(1., 8.);
61327        assert_eq_m128d(r, e);
61328    }
61329
61330    #[simd_test(enable = "avx512f")]
61331    fn test_mm_mask_scalef_round_sd() {
61332        let a = _mm_set1_pd(1.);
61333        let b = _mm_set1_pd(3.);
61334        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61335            a, 0, a, b,
61336        );
61337        let e = _mm_set_pd(1., 1.);
61338        assert_eq_m128d(r, e);
61339        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61340            a, 0b11111111, a, b,
61341        );
61342        let e = _mm_set_pd(1., 8.);
61343        assert_eq_m128d(r, e);
61344    }
61345
61346    #[simd_test(enable = "avx512f")]
61347    fn test_mm_maskz_scalef_round_sd() {
61348        let a = _mm_set1_pd(1.);
61349        let b = _mm_set1_pd(3.);
61350        let r =
61351            _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
61352        let e = _mm_set_pd(1., 0.);
61353        assert_eq_m128d(r, e);
61354        let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61355            0b11111111, a, b,
61356        );
61357        let e = _mm_set_pd(1., 8.);
61358        assert_eq_m128d(r, e);
61359    }
61360
61361    #[simd_test(enable = "avx512f")]
61362    fn test_mm_fmadd_round_ss() {
61363        let a = _mm_set1_ps(1.);
61364        let b = _mm_set1_ps(2.);
61365        let c = _mm_set1_ps(3.);
61366        let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61367        let e = _mm_set_ps(1., 1., 1., 5.);
61368        assert_eq_m128(r, e);
61369    }
61370
61371    #[simd_test(enable = "avx512f")]
61372    fn test_mm_mask_fmadd_round_ss() {
61373        let a = _mm_set1_ps(1.);
61374        let b = _mm_set1_ps(2.);
61375        let c = _mm_set1_ps(3.);
61376        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61377            a, 0, b, c,
61378        );
61379        assert_eq_m128(r, a);
61380        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61381            a, 0b11111111, b, c,
61382        );
61383        let e = _mm_set_ps(1., 1., 1., 5.);
61384        assert_eq_m128(r, e);
61385    }
61386
61387    #[simd_test(enable = "avx512f")]
61388    fn test_mm_maskz_fmadd_round_ss() {
61389        let a = _mm_set1_ps(1.);
61390        let b = _mm_set1_ps(2.);
61391        let c = _mm_set1_ps(3.);
61392        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61393            0, a, b, c,
61394        );
61395        let e = _mm_set_ps(1., 1., 1., 0.);
61396        assert_eq_m128(r, e);
61397        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61398            0b11111111, a, b, c,
61399        );
61400        let e = _mm_set_ps(1., 1., 1., 5.);
61401        assert_eq_m128(r, e);
61402    }
61403
61404    #[simd_test(enable = "avx512f")]
61405    fn test_mm_mask3_fmadd_round_ss() {
61406        let a = _mm_set1_ps(1.);
61407        let b = _mm_set1_ps(2.);
61408        let c = _mm_set1_ps(3.);
61409        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61410            a, b, c, 0,
61411        );
61412        assert_eq_m128(r, c);
61413        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61414            a, b, c, 0b11111111,
61415        );
61416        let e = _mm_set_ps(3., 3., 3., 5.);
61417        assert_eq_m128(r, e);
61418    }
61419
61420    #[simd_test(enable = "avx512f")]
61421    fn test_mm_fmadd_round_sd() {
61422        let a = _mm_set1_pd(1.);
61423        let b = _mm_set1_pd(2.);
61424        let c = _mm_set1_pd(3.);
61425        let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61426        let e = _mm_set_pd(1., 5.);
61427        assert_eq_m128d(r, e);
61428    }
61429
61430    #[simd_test(enable = "avx512f")]
61431    fn test_mm_mask_fmadd_round_sd() {
61432        let a = _mm_set1_pd(1.);
61433        let b = _mm_set1_pd(2.);
61434        let c = _mm_set1_pd(3.);
61435        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61436            a, 0, b, c,
61437        );
61438        assert_eq_m128d(r, a);
61439        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61440            a, 0b11111111, b, c,
61441        );
61442        let e = _mm_set_pd(1., 5.);
61443        assert_eq_m128d(r, e);
61444    }
61445
61446    #[simd_test(enable = "avx512f")]
61447    fn test_mm_maskz_fmadd_round_sd() {
61448        let a = _mm_set1_pd(1.);
61449        let b = _mm_set1_pd(2.);
61450        let c = _mm_set1_pd(3.);
61451        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61452            0, a, b, c,
61453        );
61454        let e = _mm_set_pd(1., 0.);
61455        assert_eq_m128d(r, e);
61456        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61457            0b11111111, a, b, c,
61458        );
61459        let e = _mm_set_pd(1., 5.);
61460        assert_eq_m128d(r, e);
61461    }
61462
61463    #[simd_test(enable = "avx512f")]
61464    fn test_mm_mask3_fmadd_round_sd() {
61465        let a = _mm_set1_pd(1.);
61466        let b = _mm_set1_pd(2.);
61467        let c = _mm_set1_pd(3.);
61468        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61469            a, b, c, 0,
61470        );
61471        assert_eq_m128d(r, c);
61472        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61473            a, b, c, 0b11111111,
61474        );
61475        let e = _mm_set_pd(3., 5.);
61476        assert_eq_m128d(r, e);
61477    }
61478
61479    #[simd_test(enable = "avx512f")]
61480    fn test_mm_fmsub_round_ss() {
61481        let a = _mm_set1_ps(1.);
61482        let b = _mm_set1_ps(2.);
61483        let c = _mm_set1_ps(3.);
61484        let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61485        let e = _mm_set_ps(1., 1., 1., -1.);
61486        assert_eq_m128(r, e);
61487    }
61488
61489    #[simd_test(enable = "avx512f")]
61490    fn test_mm_mask_fmsub_round_ss() {
61491        let a = _mm_set1_ps(1.);
61492        let b = _mm_set1_ps(2.);
61493        let c = _mm_set1_ps(3.);
61494        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61495            a, 0, b, c,
61496        );
61497        assert_eq_m128(r, a);
61498        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61499            a, 0b11111111, b, c,
61500        );
61501        let e = _mm_set_ps(1., 1., 1., -1.);
61502        assert_eq_m128(r, e);
61503    }
61504
61505    #[simd_test(enable = "avx512f")]
61506    fn test_mm_maskz_fmsub_round_ss() {
61507        let a = _mm_set1_ps(1.);
61508        let b = _mm_set1_ps(2.);
61509        let c = _mm_set1_ps(3.);
61510        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61511            0, a, b, c,
61512        );
61513        let e = _mm_set_ps(1., 1., 1., 0.);
61514        assert_eq_m128(r, e);
61515        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61516            0b11111111, a, b, c,
61517        );
61518        let e = _mm_set_ps(1., 1., 1., -1.);
61519        assert_eq_m128(r, e);
61520    }
61521
61522    #[simd_test(enable = "avx512f")]
61523    fn test_mm_mask3_fmsub_round_ss() {
61524        let a = _mm_set1_ps(1.);
61525        let b = _mm_set1_ps(2.);
61526        let c = _mm_set1_ps(3.);
61527        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61528            a, b, c, 0,
61529        );
61530        assert_eq_m128(r, c);
61531        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61532            a, b, c, 0b11111111,
61533        );
61534        let e = _mm_set_ps(3., 3., 3., -1.);
61535        assert_eq_m128(r, e);
61536    }
61537
61538    #[simd_test(enable = "avx512f")]
61539    fn test_mm_fmsub_round_sd() {
61540        let a = _mm_set1_pd(1.);
61541        let b = _mm_set1_pd(2.);
61542        let c = _mm_set1_pd(3.);
61543        let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61544        let e = _mm_set_pd(1., -1.);
61545        assert_eq_m128d(r, e);
61546    }
61547
61548    #[simd_test(enable = "avx512f")]
61549    fn test_mm_mask_fmsub_round_sd() {
61550        let a = _mm_set1_pd(1.);
61551        let b = _mm_set1_pd(2.);
61552        let c = _mm_set1_pd(3.);
61553        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61554            a, 0, b, c,
61555        );
61556        assert_eq_m128d(r, a);
61557        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61558            a, 0b11111111, b, c,
61559        );
61560        let e = _mm_set_pd(1., -1.);
61561        assert_eq_m128d(r, e);
61562    }
61563
61564    #[simd_test(enable = "avx512f")]
61565    fn test_mm_maskz_fmsub_round_sd() {
61566        let a = _mm_set1_pd(1.);
61567        let b = _mm_set1_pd(2.);
61568        let c = _mm_set1_pd(3.);
61569        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61570            0, a, b, c,
61571        );
61572        let e = _mm_set_pd(1., 0.);
61573        assert_eq_m128d(r, e);
61574        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61575            0b11111111, a, b, c,
61576        );
61577        let e = _mm_set_pd(1., -1.);
61578        assert_eq_m128d(r, e);
61579    }
61580
61581    #[simd_test(enable = "avx512f")]
61582    fn test_mm_mask3_fmsub_round_sd() {
61583        let a = _mm_set1_pd(1.);
61584        let b = _mm_set1_pd(2.);
61585        let c = _mm_set1_pd(3.);
61586        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61587            a, b, c, 0,
61588        );
61589        assert_eq_m128d(r, c);
61590        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61591            a, b, c, 0b11111111,
61592        );
61593        let e = _mm_set_pd(3., -1.);
61594        assert_eq_m128d(r, e);
61595    }
61596
61597    #[simd_test(enable = "avx512f")]
61598    fn test_mm_fnmadd_round_ss() {
61599        let a = _mm_set1_ps(1.);
61600        let b = _mm_set1_ps(2.);
61601        let c = _mm_set1_ps(3.);
61602        let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61603        let e = _mm_set_ps(1., 1., 1., 1.);
61604        assert_eq_m128(r, e);
61605    }
61606
61607    #[simd_test(enable = "avx512f")]
61608    fn test_mm_mask_fnmadd_round_ss() {
61609        let a = _mm_set1_ps(1.);
61610        let b = _mm_set1_ps(2.);
61611        let c = _mm_set1_ps(3.);
61612        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61613            a, 0, b, c,
61614        );
61615        assert_eq_m128(r, a);
61616        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61617            a, 0b11111111, b, c,
61618        );
61619        let e = _mm_set_ps(1., 1., 1., 1.);
61620        assert_eq_m128(r, e);
61621    }
61622
61623    #[simd_test(enable = "avx512f")]
61624    fn test_mm_maskz_fnmadd_round_ss() {
61625        let a = _mm_set1_ps(1.);
61626        let b = _mm_set1_ps(2.);
61627        let c = _mm_set1_ps(3.);
61628        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61629            0, a, b, c,
61630        );
61631        let e = _mm_set_ps(1., 1., 1., 0.);
61632        assert_eq_m128(r, e);
61633        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61634            0b11111111, a, b, c,
61635        );
61636        let e = _mm_set_ps(1., 1., 1., 1.);
61637        assert_eq_m128(r, e);
61638    }
61639
61640    #[simd_test(enable = "avx512f")]
61641    fn test_mm_mask3_fnmadd_round_ss() {
61642        let a = _mm_set1_ps(1.);
61643        let b = _mm_set1_ps(2.);
61644        let c = _mm_set1_ps(3.);
61645        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61646            a, b, c, 0,
61647        );
61648        assert_eq_m128(r, c);
61649        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61650            a, b, c, 0b11111111,
61651        );
61652        let e = _mm_set_ps(3., 3., 3., 1.);
61653        assert_eq_m128(r, e);
61654    }
61655
61656    #[simd_test(enable = "avx512f")]
61657    fn test_mm_fnmadd_round_sd() {
61658        let a = _mm_set1_pd(1.);
61659        let b = _mm_set1_pd(2.);
61660        let c = _mm_set1_pd(3.);
61661        let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61662        let e = _mm_set_pd(1., 1.);
61663        assert_eq_m128d(r, e);
61664    }
61665
61666    #[simd_test(enable = "avx512f")]
61667    fn test_mm_mask_fnmadd_round_sd() {
61668        let a = _mm_set1_pd(1.);
61669        let b = _mm_set1_pd(2.);
61670        let c = _mm_set1_pd(3.);
61671        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61672            a, 0, b, c,
61673        );
61674        assert_eq_m128d(r, a);
61675        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61676            a, 0b11111111, b, c,
61677        );
61678        let e = _mm_set_pd(1., 1.);
61679        assert_eq_m128d(r, e);
61680    }
61681
61682    #[simd_test(enable = "avx512f")]
61683    fn test_mm_maskz_fnmadd_round_sd() {
61684        let a = _mm_set1_pd(1.);
61685        let b = _mm_set1_pd(2.);
61686        let c = _mm_set1_pd(3.);
61687        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61688            0, a, b, c,
61689        );
61690        let e = _mm_set_pd(1., 0.);
61691        assert_eq_m128d(r, e);
61692        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61693            0b11111111, a, b, c,
61694        );
61695        let e = _mm_set_pd(1., 1.);
61696        assert_eq_m128d(r, e);
61697    }
61698
61699    #[simd_test(enable = "avx512f")]
61700    fn test_mm_mask3_fnmadd_round_sd() {
61701        let a = _mm_set1_pd(1.);
61702        let b = _mm_set1_pd(2.);
61703        let c = _mm_set1_pd(3.);
61704        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61705            a, b, c, 0,
61706        );
61707        assert_eq_m128d(r, c);
61708        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61709            a, b, c, 0b11111111,
61710        );
61711        let e = _mm_set_pd(3., 1.);
61712        assert_eq_m128d(r, e);
61713    }
61714
61715    #[simd_test(enable = "avx512f")]
61716    fn test_mm_fnmsub_round_ss() {
61717        let a = _mm_set1_ps(1.);
61718        let b = _mm_set1_ps(2.);
61719        let c = _mm_set1_ps(3.);
61720        let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61721        let e = _mm_set_ps(1., 1., 1., -5.);
61722        assert_eq_m128(r, e);
61723    }
61724
61725    #[simd_test(enable = "avx512f")]
61726    fn test_mm_mask_fnmsub_round_ss() {
61727        let a = _mm_set1_ps(1.);
61728        let b = _mm_set1_ps(2.);
61729        let c = _mm_set1_ps(3.);
61730        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61731            a, 0, b, c,
61732        );
61733        assert_eq_m128(r, a);
61734        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61735            a, 0b11111111, b, c,
61736        );
61737        let e = _mm_set_ps(1., 1., 1., -5.);
61738        assert_eq_m128(r, e);
61739    }
61740
61741    #[simd_test(enable = "avx512f")]
61742    fn test_mm_maskz_fnmsub_round_ss() {
61743        let a = _mm_set1_ps(1.);
61744        let b = _mm_set1_ps(2.);
61745        let c = _mm_set1_ps(3.);
61746        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61747            0, a, b, c,
61748        );
61749        let e = _mm_set_ps(1., 1., 1., 0.);
61750        assert_eq_m128(r, e);
61751        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61752            0b11111111, a, b, c,
61753        );
61754        let e = _mm_set_ps(1., 1., 1., -5.);
61755        assert_eq_m128(r, e);
61756    }
61757
61758    #[simd_test(enable = "avx512f")]
61759    fn test_mm_mask3_fnmsub_round_ss() {
61760        let a = _mm_set1_ps(1.);
61761        let b = _mm_set1_ps(2.);
61762        let c = _mm_set1_ps(3.);
61763        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61764            a, b, c, 0,
61765        );
61766        assert_eq_m128(r, c);
61767        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61768            a, b, c, 0b11111111,
61769        );
61770        let e = _mm_set_ps(3., 3., 3., -5.);
61771        assert_eq_m128(r, e);
61772    }
61773
61774    #[simd_test(enable = "avx512f")]
61775    fn test_mm_fnmsub_round_sd() {
61776        let a = _mm_set1_pd(1.);
61777        let b = _mm_set1_pd(2.);
61778        let c = _mm_set1_pd(3.);
61779        let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61780        let e = _mm_set_pd(1., -5.);
61781        assert_eq_m128d(r, e);
61782    }
61783
61784    #[simd_test(enable = "avx512f")]
61785    fn test_mm_mask_fnmsub_round_sd() {
61786        let a = _mm_set1_pd(1.);
61787        let b = _mm_set1_pd(2.);
61788        let c = _mm_set1_pd(3.);
61789        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61790            a, 0, b, c,
61791        );
61792        assert_eq_m128d(r, a);
61793        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61794            a, 0b11111111, b, c,
61795        );
61796        let e = _mm_set_pd(1., -5.);
61797        assert_eq_m128d(r, e);
61798    }
61799
61800    #[simd_test(enable = "avx512f")]
61801    fn test_mm_maskz_fnmsub_round_sd() {
61802        let a = _mm_set1_pd(1.);
61803        let b = _mm_set1_pd(2.);
61804        let c = _mm_set1_pd(3.);
61805        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61806            0, a, b, c,
61807        );
61808        let e = _mm_set_pd(1., 0.);
61809        assert_eq_m128d(r, e);
61810        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61811            0b11111111, a, b, c,
61812        );
61813        let e = _mm_set_pd(1., -5.);
61814        assert_eq_m128d(r, e);
61815    }
61816
61817    #[simd_test(enable = "avx512f")]
61818    fn test_mm_mask3_fnmsub_round_sd() {
61819        let a = _mm_set1_pd(1.);
61820        let b = _mm_set1_pd(2.);
61821        let c = _mm_set1_pd(3.);
61822        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61823            a, b, c, 0,
61824        );
61825        assert_eq_m128d(r, c);
61826        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61827            a, b, c, 0b11111111,
61828        );
61829        let e = _mm_set_pd(3., -5.);
61830        assert_eq_m128d(r, e);
61831    }
61832
61833    #[simd_test(enable = "avx512f")]
61834    fn test_mm_fixupimm_ss() {
61835        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61836        let b = _mm_set1_ps(f32::MAX);
61837        let c = _mm_set1_epi32(i32::MAX);
61838        let r = _mm_fixupimm_ss::<5>(a, b, c);
61839        let e = _mm_set_ps(0., 0., 0., -0.0);
61840        assert_eq_m128(r, e);
61841    }
61842
61843    #[simd_test(enable = "avx512f")]
61844    fn test_mm_mask_fixupimm_ss() {
61845        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61846        let b = _mm_set1_ps(f32::MAX);
61847        let c = _mm_set1_epi32(i32::MAX);
61848        let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
61849        let e = _mm_set_ps(0., 0., 0., -0.0);
61850        assert_eq_m128(r, e);
61851    }
61852
61853    #[simd_test(enable = "avx512f")]
61854    fn test_mm_maskz_fixupimm_ss() {
61855        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61856        let b = _mm_set1_ps(f32::MAX);
61857        let c = _mm_set1_epi32(i32::MAX);
61858        let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
61859        let e = _mm_set_ps(0., 0., 0., 0.0);
61860        assert_eq_m128(r, e);
61861        let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
61862        let e = _mm_set_ps(0., 0., 0., -0.0);
61863        assert_eq_m128(r, e);
61864    }
61865
61866    #[simd_test(enable = "avx512f")]
61867    fn test_mm_fixupimm_sd() {
61868        let a = _mm_set_pd(0., f64::NAN);
61869        let b = _mm_set1_pd(f64::MAX);
61870        let c = _mm_set1_epi64x(i32::MAX as i64);
61871        let r = _mm_fixupimm_sd::<5>(a, b, c);
61872        let e = _mm_set_pd(0., -0.0);
61873        assert_eq_m128d(r, e);
61874    }
61875
61876    #[simd_test(enable = "avx512f")]
61877    fn test_mm_mask_fixupimm_sd() {
61878        let a = _mm_set_pd(0., f64::NAN);
61879        let b = _mm_set1_pd(f64::MAX);
61880        let c = _mm_set1_epi64x(i32::MAX as i64);
61881        let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
61882        let e = _mm_set_pd(0., -0.0);
61883        assert_eq_m128d(r, e);
61884    }
61885
61886    #[simd_test(enable = "avx512f")]
61887    fn test_mm_maskz_fixupimm_sd() {
61888        let a = _mm_set_pd(0., f64::NAN);
61889        let b = _mm_set1_pd(f64::MAX);
61890        let c = _mm_set1_epi64x(i32::MAX as i64);
61891        let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
61892        let e = _mm_set_pd(0., 0.0);
61893        assert_eq_m128d(r, e);
61894        let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
61895        let e = _mm_set_pd(0., -0.0);
61896        assert_eq_m128d(r, e);
61897    }
61898
61899    #[simd_test(enable = "avx512f")]
61900    fn test_mm_fixupimm_round_ss() {
61901        let a = _mm_set_ps(1., 0., 0., f32::NAN);
61902        let b = _mm_set1_ps(f32::MAX);
61903        let c = _mm_set1_epi32(i32::MAX);
61904        let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
61905        let e = _mm_set_ps(1., 0., 0., -0.0);
61906        assert_eq_m128(r, e);
61907    }
61908
61909    #[simd_test(enable = "avx512f")]
61910    fn test_mm_mask_fixupimm_round_ss() {
61911        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61912        let b = _mm_set1_ps(f32::MAX);
61913        let c = _mm_set1_epi32(i32::MAX);
61914        let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
61915        let e = _mm_set_ps(0., 0., 0., -0.0);
61916        assert_eq_m128(r, e);
61917    }
61918
61919    #[simd_test(enable = "avx512f")]
61920    fn test_mm_maskz_fixupimm_round_ss() {
61921        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61922        let b = _mm_set1_ps(f32::MAX);
61923        let c = _mm_set1_epi32(i32::MAX);
61924        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
61925        let e = _mm_set_ps(0., 0., 0., 0.0);
61926        assert_eq_m128(r, e);
61927        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
61928        let e = _mm_set_ps(0., 0., 0., -0.0);
61929        assert_eq_m128(r, e);
61930    }
61931
61932    #[simd_test(enable = "avx512f")]
61933    fn test_mm_fixupimm_round_sd() {
61934        let a = _mm_set_pd(0., f64::NAN);
61935        let b = _mm_set1_pd(f64::MAX);
61936        let c = _mm_set1_epi64x(i32::MAX as i64);
61937        let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
61938        let e = _mm_set_pd(0., -0.0);
61939        assert_eq_m128d(r, e);
61940    }
61941
61942    #[simd_test(enable = "avx512f")]
61943    fn test_mm_mask_fixupimm_round_sd() {
61944        let a = _mm_set_pd(0., f64::NAN);
61945        let b = _mm_set1_pd(f64::MAX);
61946        let c = _mm_set1_epi64x(i32::MAX as i64);
61947        let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
61948        let e = _mm_set_pd(0., -0.0);
61949        assert_eq_m128d(r, e);
61950    }
61951
61952    #[simd_test(enable = "avx512f")]
61953    fn test_mm_maskz_fixupimm_round_sd() {
61954        let a = _mm_set_pd(0., f64::NAN);
61955        let b = _mm_set1_pd(f64::MAX);
61956        let c = _mm_set1_epi64x(i32::MAX as i64);
61957        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
61958        let e = _mm_set_pd(0., 0.0);
61959        assert_eq_m128d(r, e);
61960        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
61961        let e = _mm_set_pd(0., -0.0);
61962        assert_eq_m128d(r, e);
61963    }
61964
61965    #[simd_test(enable = "avx512f")]
61966    fn test_mm_mask_cvtss_sd() {
61967        let a = _mm_set_pd(6., -7.5);
61968        let b = _mm_set_ps(0., -0.5, 1., -1.5);
61969        let r = _mm_mask_cvtss_sd(a, 0, a, b);
61970        assert_eq_m128d(r, a);
61971        let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
61972        let e = _mm_set_pd(6., -1.5);
61973        assert_eq_m128d(r, e);
61974    }
61975
61976    #[simd_test(enable = "avx512f")]
61977    fn test_mm_maskz_cvtss_sd() {
61978        let a = _mm_set_pd(6., -7.5);
61979        let b = _mm_set_ps(0., -0.5, 1., -1.5);
61980        let r = _mm_maskz_cvtss_sd(0, a, b);
61981        let e = _mm_set_pd(6., 0.);
61982        assert_eq_m128d(r, e);
61983        let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
61984        let e = _mm_set_pd(6., -1.5);
61985        assert_eq_m128d(r, e);
61986    }
61987
61988    #[simd_test(enable = "avx512f")]
61989    fn test_mm_mask_cvtsd_ss() {
61990        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61991        let b = _mm_set_pd(6., -7.5);
61992        let r = _mm_mask_cvtsd_ss(a, 0, a, b);
61993        assert_eq_m128(r, a);
61994        let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
61995        let e = _mm_set_ps(0., -0.5, 1., -7.5);
61996        assert_eq_m128(r, e);
61997    }
61998
61999    #[simd_test(enable = "avx512f")]
62000    fn test_mm_maskz_cvtsd_ss() {
62001        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62002        let b = _mm_set_pd(6., -7.5);
62003        let r = _mm_maskz_cvtsd_ss(0, a, b);
62004        let e = _mm_set_ps(0., -0.5, 1., 0.);
62005        assert_eq_m128(r, e);
62006        let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
62007        let e = _mm_set_ps(0., -0.5, 1., -7.5);
62008        assert_eq_m128(r, e);
62009    }
62010
62011    #[simd_test(enable = "avx512f")]
62012    fn test_mm_cvt_roundss_sd() {
62013        let a = _mm_set_pd(6., -7.5);
62014        let b = _mm_set_ps(0., -0.5, 1., -1.5);
62015        let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
62016        let e = _mm_set_pd(6., -1.5);
62017        assert_eq_m128d(r, e);
62018    }
62019
62020    #[simd_test(enable = "avx512f")]
62021    fn test_mm_mask_cvt_roundss_sd() {
62022        let a = _mm_set_pd(6., -7.5);
62023        let b = _mm_set_ps(0., -0.5, 1., -1.5);
62024        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
62025        assert_eq_m128d(r, a);
62026        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
62027        let e = _mm_set_pd(6., -1.5);
62028        assert_eq_m128d(r, e);
62029    }
62030
62031    #[simd_test(enable = "avx512f")]
62032    fn test_mm_maskz_cvt_roundss_sd() {
62033        let a = _mm_set_pd(6., -7.5);
62034        let b = _mm_set_ps(0., -0.5, 1., -1.5);
62035        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
62036        let e = _mm_set_pd(6., 0.);
62037        assert_eq_m128d(r, e);
62038        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
62039        let e = _mm_set_pd(6., -1.5);
62040        assert_eq_m128d(r, e);
62041    }
62042
62043    #[simd_test(enable = "avx512f")]
62044    fn test_mm_cvt_roundsd_ss() {
62045        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62046        let b = _mm_set_pd(6., -7.5);
62047        let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62048        let e = _mm_set_ps(0., -0.5, 1., -7.5);
62049        assert_eq_m128(r, e);
62050    }
62051
62052    #[simd_test(enable = "avx512f")]
62053    fn test_mm_mask_cvt_roundsd_ss() {
62054        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62055        let b = _mm_set_pd(6., -7.5);
62056        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
62057        assert_eq_m128(r, a);
62058        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
62059            a, 0b11111111, a, b,
62060        );
62061        let e = _mm_set_ps(0., -0.5, 1., -7.5);
62062        assert_eq_m128(r, e);
62063    }
62064
62065    #[simd_test(enable = "avx512f")]
62066    fn test_mm_maskz_cvt_roundsd_ss() {
62067        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62068        let b = _mm_set_pd(6., -7.5);
62069        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
62070        let e = _mm_set_ps(0., -0.5, 1., 0.);
62071        assert_eq_m128(r, e);
62072        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
62073            0b11111111, a, b,
62074        );
62075        let e = _mm_set_ps(0., -0.5, 1., -7.5);
62076        assert_eq_m128(r, e);
62077    }
62078
62079    #[simd_test(enable = "avx512f")]
62080    fn test_mm_cvt_roundss_si32() {
62081        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62082        let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62083        let e: i32 = -1;
62084        assert_eq!(r, e);
62085    }
62086
62087    #[simd_test(enable = "avx512f")]
62088    fn test_mm_cvt_roundss_i32() {
62089        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62090        let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62091        let e: i32 = -1;
62092        assert_eq!(r, e);
62093    }
62094
62095    #[simd_test(enable = "avx512f")]
62096    fn test_mm_cvt_roundss_u32() {
62097        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62098        let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62099        let e: u32 = u32::MAX;
62100        assert_eq!(r, e);
62101    }
62102
62103    #[simd_test(enable = "avx512f")]
62104    fn test_mm_cvtss_i32() {
62105        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62106        let r = _mm_cvtss_i32(a);
62107        let e: i32 = -2;
62108        assert_eq!(r, e);
62109    }
62110
62111    #[simd_test(enable = "avx512f")]
62112    fn test_mm_cvtss_u32() {
62113        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62114        let r = _mm_cvtss_u32(a);
62115        let e: u32 = u32::MAX;
62116        assert_eq!(r, e);
62117    }
62118
62119    #[simd_test(enable = "avx512f")]
62120    fn test_mm_cvt_roundsd_si32() {
62121        let a = _mm_set_pd(1., -1.5);
62122        let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62123        let e: i32 = -1;
62124        assert_eq!(r, e);
62125    }
62126
62127    #[simd_test(enable = "avx512f")]
62128    fn test_mm_cvt_roundsd_i32() {
62129        let a = _mm_set_pd(1., -1.5);
62130        let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62131        let e: i32 = -1;
62132        assert_eq!(r, e);
62133    }
62134
62135    #[simd_test(enable = "avx512f")]
62136    fn test_mm_cvt_roundsd_u32() {
62137        let a = _mm_set_pd(1., -1.5);
62138        let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62139        let e: u32 = u32::MAX;
62140        assert_eq!(r, e);
62141    }
62142
62143    #[simd_test(enable = "avx512f")]
62144    fn test_mm_cvtsd_i32() {
62145        let a = _mm_set_pd(1., -1.5);
62146        let r = _mm_cvtsd_i32(a);
62147        let e: i32 = -2;
62148        assert_eq!(r, e);
62149    }
62150
62151    #[simd_test(enable = "avx512f")]
62152    fn test_mm_cvtsd_u32() {
62153        let a = _mm_set_pd(1., -1.5);
62154        let r = _mm_cvtsd_u32(a);
62155        let e: u32 = u32::MAX;
62156        assert_eq!(r, e);
62157    }
62158
62159    #[simd_test(enable = "avx512f")]
62160    fn test_mm_cvt_roundi32_ss() {
62161        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62162        let b: i32 = 9;
62163        let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62164        let e = _mm_set_ps(0., -0.5, 1., 9.);
62165        assert_eq_m128(r, e);
62166    }
62167
62168    #[simd_test(enable = "avx512f")]
62169    fn test_mm_cvt_roundsi32_ss() {
62170        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62171        let b: i32 = 9;
62172        let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62173        let e = _mm_set_ps(0., -0.5, 1., 9.);
62174        assert_eq_m128(r, e);
62175    }
62176
62177    #[simd_test(enable = "avx512f")]
62178    fn test_mm_cvt_roundu32_ss() {
62179        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62180        let b: u32 = 9;
62181        let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62182        let e = _mm_set_ps(0., -0.5, 1., 9.);
62183        assert_eq_m128(r, e);
62184    }
62185
62186    #[simd_test(enable = "avx512f")]
62187    const fn test_mm_cvti32_ss() {
62188        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62189        let b: i32 = 9;
62190        let r = _mm_cvti32_ss(a, b);
62191        let e = _mm_set_ps(0., -0.5, 1., 9.);
62192        assert_eq_m128(r, e);
62193    }
62194
62195    #[simd_test(enable = "avx512f")]
62196    const fn test_mm_cvti32_sd() {
62197        let a = _mm_set_pd(1., -1.5);
62198        let b: i32 = 9;
62199        let r = _mm_cvti32_sd(a, b);
62200        let e = _mm_set_pd(1., 9.);
62201        assert_eq_m128d(r, e);
62202    }
62203
62204    #[simd_test(enable = "avx512f")]
62205    fn test_mm_cvtt_roundss_si32() {
62206        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62207        let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
62208        let e: i32 = -1;
62209        assert_eq!(r, e);
62210    }
62211
62212    #[simd_test(enable = "avx512f")]
62213    fn test_mm_cvtt_roundss_i32() {
62214        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62215        let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
62216        let e: i32 = -1;
62217        assert_eq!(r, e);
62218    }
62219
62220    #[simd_test(enable = "avx512f")]
62221    fn test_mm_cvtt_roundss_u32() {
62222        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62223        let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
62224        let e: u32 = u32::MAX;
62225        assert_eq!(r, e);
62226    }
62227
62228    #[simd_test(enable = "avx512f")]
62229    fn test_mm_cvttss_i32() {
62230        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62231        let r = _mm_cvttss_i32(a);
62232        let e: i32 = -1;
62233        assert_eq!(r, e);
62234    }
62235
62236    #[simd_test(enable = "avx512f")]
62237    fn test_mm_cvttss_u32() {
62238        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62239        let r = _mm_cvttss_u32(a);
62240        let e: u32 = u32::MAX;
62241        assert_eq!(r, e);
62242    }
62243
62244    #[simd_test(enable = "avx512f")]
62245    fn test_mm_cvtt_roundsd_si32() {
62246        let a = _mm_set_pd(1., -1.5);
62247        let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
62248        let e: i32 = -1;
62249        assert_eq!(r, e);
62250    }
62251
62252    #[simd_test(enable = "avx512f")]
62253    fn test_mm_cvtt_roundsd_i32() {
62254        let a = _mm_set_pd(1., -1.5);
62255        let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
62256        let e: i32 = -1;
62257        assert_eq!(r, e);
62258    }
62259
62260    #[simd_test(enable = "avx512f")]
62261    fn test_mm_cvtt_roundsd_u32() {
62262        let a = _mm_set_pd(1., -1.5);
62263        let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
62264        let e: u32 = u32::MAX;
62265        assert_eq!(r, e);
62266    }
62267
62268    #[simd_test(enable = "avx512f")]
62269    fn test_mm_cvttsd_i32() {
62270        let a = _mm_set_pd(1., -1.5);
62271        let r = _mm_cvttsd_i32(a);
62272        let e: i32 = -1;
62273        assert_eq!(r, e);
62274    }
62275
62276    #[simd_test(enable = "avx512f")]
62277    fn test_mm_cvttsd_u32() {
62278        let a = _mm_set_pd(1., -1.5);
62279        let r = _mm_cvttsd_u32(a);
62280        let e: u32 = u32::MAX;
62281        assert_eq!(r, e);
62282    }
62283
62284    #[simd_test(enable = "avx512f")]
62285    const fn test_mm_cvtu32_ss() {
62286        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62287        let b: u32 = 9;
62288        let r = _mm_cvtu32_ss(a, b);
62289        let e = _mm_set_ps(0., -0.5, 1., 9.);
62290        assert_eq_m128(r, e);
62291    }
62292
62293    #[simd_test(enable = "avx512f")]
62294    const fn test_mm_cvtu32_sd() {
62295        let a = _mm_set_pd(1., -1.5);
62296        let b: u32 = 9;
62297        let r = _mm_cvtu32_sd(a, b);
62298        let e = _mm_set_pd(1., 9.);
62299        assert_eq_m128d(r, e);
62300    }
62301
62302    #[simd_test(enable = "avx512f")]
62303    fn test_mm_comi_round_ss() {
62304        let a = _mm_set1_ps(2.2);
62305        let b = _mm_set1_ps(1.1);
62306        let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
62307        let e: i32 = 0;
62308        assert_eq!(r, e);
62309    }
62310
62311    #[simd_test(enable = "avx512f")]
62312    fn test_mm_comi_round_sd() {
62313        let a = _mm_set1_pd(2.2);
62314        let b = _mm_set1_pd(1.1);
62315        let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
62316        let e: i32 = 0;
62317        assert_eq!(r, e);
62318    }
62319
62320    #[simd_test(enable = "avx512f")]
62321    const fn test_mm512_cvtsi512_si32() {
62322        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
62323        let r = _mm512_cvtsi512_si32(a);
62324        let e: i32 = 1;
62325        assert_eq!(r, e);
62326    }
62327
62328    #[simd_test(enable = "avx512f")]
62329    const fn test_mm512_cvtss_f32() {
62330        let a = _mm512_setr_ps(
62331            312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
62332        );
62333        assert_eq!(_mm512_cvtss_f32(a), 312.0134);
62334    }
62335
62336    #[simd_test(enable = "avx512f")]
62337    const fn test_mm512_cvtsd_f64() {
62338        let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
62339        assert_eq!(r, -1.1);
62340    }
62341
62342    #[simd_test(enable = "avx512f")]
62343    const fn test_mm512_shuffle_pd() {
62344        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62345        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62346        let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
62347        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
62348        assert_eq_m512d(r, e);
62349    }
62350
62351    #[simd_test(enable = "avx512f")]
62352    const fn test_mm512_mask_shuffle_pd() {
62353        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62354        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62355        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
62356        assert_eq_m512d(r, a);
62357        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
62358        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
62359        assert_eq_m512d(r, e);
62360    }
62361
62362    #[simd_test(enable = "avx512f")]
62363    const fn test_mm512_maskz_shuffle_pd() {
62364        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62365        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62366        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
62367        assert_eq_m512d(r, _mm512_setzero_pd());
62368        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
62369        let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
62370        assert_eq_m512d(r, e);
62371    }
62372
62373    #[simd_test(enable = "avx512f")]
62374    fn test_mm512_mask_expandloadu_epi32() {
62375        let src = _mm512_set1_epi32(42);
62376        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
62377        let p = a.as_ptr();
62378        let m = 0b11101000_11001010;
62379        let r = unsafe { _mm512_mask_expandloadu_epi32(src, m, black_box(p)) };
62380        let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
62381        assert_eq_m512i(r, e);
62382    }
62383
62384    #[simd_test(enable = "avx512f")]
62385    fn test_mm512_maskz_expandloadu_epi32() {
62386        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
62387        let p = a.as_ptr();
62388        let m = 0b11101000_11001010;
62389        let r = unsafe { _mm512_maskz_expandloadu_epi32(m, black_box(p)) };
62390        let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
62391        assert_eq_m512i(r, e);
62392    }
62393
62394    #[simd_test(enable = "avx512f,avx512vl")]
62395    fn test_mm256_mask_expandloadu_epi32() {
62396        let src = _mm256_set1_epi32(42);
62397        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
62398        let p = a.as_ptr();
62399        let m = 0b11101000;
62400        let r = unsafe { _mm256_mask_expandloadu_epi32(src, m, black_box(p)) };
62401        let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
62402        assert_eq_m256i(r, e);
62403    }
62404
62405    #[simd_test(enable = "avx512f,avx512vl")]
62406    fn test_mm256_maskz_expandloadu_epi32() {
62407        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
62408        let p = a.as_ptr();
62409        let m = 0b11101000;
62410        let r = unsafe { _mm256_maskz_expandloadu_epi32(m, black_box(p)) };
62411        let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
62412        assert_eq_m256i(r, e);
62413    }
62414
62415    #[simd_test(enable = "avx512f,avx512vl")]
62416    fn test_mm_mask_expandloadu_epi32() {
62417        let src = _mm_set1_epi32(42);
62418        let a = &[1_i32, 2, 3, 4];
62419        let p = a.as_ptr();
62420        let m = 0b11111000;
62421        let r = unsafe { _mm_mask_expandloadu_epi32(src, m, black_box(p)) };
62422        let e = _mm_set_epi32(1, 42, 42, 42);
62423        assert_eq_m128i(r, e);
62424    }
62425
62426    #[simd_test(enable = "avx512f,avx512vl")]
62427    fn test_mm_maskz_expandloadu_epi32() {
62428        let a = &[1_i32, 2, 3, 4];
62429        let p = a.as_ptr();
62430        let m = 0b11111000;
62431        let r = unsafe { _mm_maskz_expandloadu_epi32(m, black_box(p)) };
62432        let e = _mm_set_epi32(1, 0, 0, 0);
62433        assert_eq_m128i(r, e);
62434    }
62435
62436    #[simd_test(enable = "avx512f")]
62437    fn test_mm512_mask_expandloadu_epi64() {
62438        let src = _mm512_set1_epi64(42);
62439        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
62440        let p = a.as_ptr();
62441        let m = 0b11101000;
62442        let r = unsafe { _mm512_mask_expandloadu_epi64(src, m, black_box(p)) };
62443        let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
62444        assert_eq_m512i(r, e);
62445    }
62446
62447    #[simd_test(enable = "avx512f")]
62448    fn test_mm512_maskz_expandloadu_epi64() {
62449        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
62450        let p = a.as_ptr();
62451        let m = 0b11101000;
62452        let r = unsafe { _mm512_maskz_expandloadu_epi64(m, black_box(p)) };
62453        let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
62454        assert_eq_m512i(r, e);
62455    }
62456
62457    #[simd_test(enable = "avx512f,avx512vl")]
62458    fn test_mm256_mask_expandloadu_epi64() {
62459        let src = _mm256_set1_epi64x(42);
62460        let a = &[1_i64, 2, 3, 4];
62461        let p = a.as_ptr();
62462        let m = 0b11101000;
62463        let r = unsafe { _mm256_mask_expandloadu_epi64(src, m, black_box(p)) };
62464        let e = _mm256_set_epi64x(1, 42, 42, 42);
62465        assert_eq_m256i(r, e);
62466    }
62467
62468    #[simd_test(enable = "avx512f,avx512vl")]
62469    fn test_mm256_maskz_expandloadu_epi64() {
62470        let a = &[1_i64, 2, 3, 4];
62471        let p = a.as_ptr();
62472        let m = 0b11101000;
62473        let r = unsafe { _mm256_maskz_expandloadu_epi64(m, black_box(p)) };
62474        let e = _mm256_set_epi64x(1, 0, 0, 0);
62475        assert_eq_m256i(r, e);
62476    }
62477
62478    #[simd_test(enable = "avx512f,avx512vl")]
62479    fn test_mm_mask_expandloadu_epi64() {
62480        let src = _mm_set1_epi64x(42);
62481        let a = &[1_i64, 2];
62482        let p = a.as_ptr();
62483        let m = 0b11101000;
62484        let r = unsafe { _mm_mask_expandloadu_epi64(src, m, black_box(p)) };
62485        let e = _mm_set_epi64x(42, 42);
62486        assert_eq_m128i(r, e);
62487    }
62488
62489    #[simd_test(enable = "avx512f,avx512vl")]
62490    fn test_mm_maskz_expandloadu_epi64() {
62491        let a = &[1_i64, 2];
62492        let p = a.as_ptr();
62493        let m = 0b11101000;
62494        let r = unsafe { _mm_maskz_expandloadu_epi64(m, black_box(p)) };
62495        let e = _mm_set_epi64x(0, 0);
62496        assert_eq_m128i(r, e);
62497    }
62498
62499    #[simd_test(enable = "avx512f")]
62500    fn test_mm512_mask_expandloadu_ps() {
62501        let src = _mm512_set1_ps(42.);
62502        let a = &[
62503            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
62504        ];
62505        let p = a.as_ptr();
62506        let m = 0b11101000_11001010;
62507        let r = unsafe { _mm512_mask_expandloadu_ps(src, m, black_box(p)) };
62508        let e = _mm512_set_ps(
62509            8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
62510        );
62511        assert_eq_m512(r, e);
62512    }
62513
62514    #[simd_test(enable = "avx512f")]
62515    fn test_mm512_maskz_expandloadu_ps() {
62516        let a = &[
62517            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
62518        ];
62519        let p = a.as_ptr();
62520        let m = 0b11101000_11001010;
62521        let r = unsafe { _mm512_maskz_expandloadu_ps(m, black_box(p)) };
62522        let e = _mm512_set_ps(
62523            8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
62524        );
62525        assert_eq_m512(r, e);
62526    }
62527
62528    #[simd_test(enable = "avx512f,avx512vl")]
62529    fn test_mm256_mask_expandloadu_ps() {
62530        let src = _mm256_set1_ps(42.);
62531        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
62532        let p = a.as_ptr();
62533        let m = 0b11101000;
62534        let r = unsafe { _mm256_mask_expandloadu_ps(src, m, black_box(p)) };
62535        let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
62536        assert_eq_m256(r, e);
62537    }
62538
62539    #[simd_test(enable = "avx512f,avx512vl")]
62540    fn test_mm256_maskz_expandloadu_ps() {
62541        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
62542        let p = a.as_ptr();
62543        let m = 0b11101000;
62544        let r = unsafe { _mm256_maskz_expandloadu_ps(m, black_box(p)) };
62545        let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
62546        assert_eq_m256(r, e);
62547    }
62548
62549    #[simd_test(enable = "avx512f,avx512vl")]
62550    fn test_mm_mask_expandloadu_ps() {
62551        let src = _mm_set1_ps(42.);
62552        let a = &[1.0f32, 2., 3., 4.];
62553        let p = a.as_ptr();
62554        let m = 0b11101000;
62555        let r = unsafe { _mm_mask_expandloadu_ps(src, m, black_box(p)) };
62556        let e = _mm_set_ps(1., 42., 42., 42.);
62557        assert_eq_m128(r, e);
62558    }
62559
62560    #[simd_test(enable = "avx512f,avx512vl")]
62561    fn test_mm_maskz_expandloadu_ps() {
62562        let a = &[1.0f32, 2., 3., 4.];
62563        let p = a.as_ptr();
62564        let m = 0b11101000;
62565        let r = unsafe { _mm_maskz_expandloadu_ps(m, black_box(p)) };
62566        let e = _mm_set_ps(1., 0., 0., 0.);
62567        assert_eq_m128(r, e);
62568    }
62569
62570    #[simd_test(enable = "avx512f")]
62571    fn test_mm512_mask_expandloadu_pd() {
62572        let src = _mm512_set1_pd(42.);
62573        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
62574        let p = a.as_ptr();
62575        let m = 0b11101000;
62576        let r = unsafe { _mm512_mask_expandloadu_pd(src, m, black_box(p)) };
62577        let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
62578        assert_eq_m512d(r, e);
62579    }
62580
62581    #[simd_test(enable = "avx512f")]
62582    fn test_mm512_maskz_expandloadu_pd() {
62583        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
62584        let p = a.as_ptr();
62585        let m = 0b11101000;
62586        let r = unsafe { _mm512_maskz_expandloadu_pd(m, black_box(p)) };
62587        let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
62588        assert_eq_m512d(r, e);
62589    }
62590
62591    #[simd_test(enable = "avx512f,avx512vl")]
62592    fn test_mm256_mask_expandloadu_pd() {
62593        let src = _mm256_set1_pd(42.);
62594        let a = &[1.0f64, 2., 3., 4.];
62595        let p = a.as_ptr();
62596        let m = 0b11101000;
62597        let r = unsafe { _mm256_mask_expandloadu_pd(src, m, black_box(p)) };
62598        let e = _mm256_set_pd(1., 42., 42., 42.);
62599        assert_eq_m256d(r, e);
62600    }
62601
62602    #[simd_test(enable = "avx512f,avx512vl")]
62603    fn test_mm256_maskz_expandloadu_pd() {
62604        let a = &[1.0f64, 2., 3., 4.];
62605        let p = a.as_ptr();
62606        let m = 0b11101000;
62607        let r = unsafe { _mm256_maskz_expandloadu_pd(m, black_box(p)) };
62608        let e = _mm256_set_pd(1., 0., 0., 0.);
62609        assert_eq_m256d(r, e);
62610    }
62611
62612    #[simd_test(enable = "avx512f,avx512vl")]
62613    fn test_mm_mask_expandloadu_pd() {
62614        let src = _mm_set1_pd(42.);
62615        let a = &[1.0f64, 2.];
62616        let p = a.as_ptr();
62617        let m = 0b11101000;
62618        let r = unsafe { _mm_mask_expandloadu_pd(src, m, black_box(p)) };
62619        let e = _mm_set_pd(42., 42.);
62620        assert_eq_m128d(r, e);
62621    }
62622
62623    #[simd_test(enable = "avx512f,avx512vl")]
62624    fn test_mm_maskz_expandloadu_pd() {
62625        let a = &[1.0f64, 2.];
62626        let p = a.as_ptr();
62627        let m = 0b11101000;
62628        let r = unsafe { _mm_maskz_expandloadu_pd(m, black_box(p)) };
62629        let e = _mm_set_pd(0., 0.);
62630        assert_eq_m128d(r, e);
62631    }
62632}