diff --git a/simde/arm/neon/qshlu_n.h b/simde/arm/neon/qshlu_n.h index 587fc439c..43b0b3ee5 100644 --- a/simde/arm/neon/qshlu_n.h +++ b/simde/arm/neon/qshlu_n.h @@ -94,8 +94,8 @@ SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vqshlud_n_s64(int64_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { - uint32_t r = HEDLEY_STATIC_CAST(uint32_t, a << n); - r |= (((r >> n) != HEDLEY_STATIC_CAST(uint32_t, a)) ? UINT32_MAX : 0); + uint64_t r = HEDLEY_STATIC_CAST(uint64_t, a << n); + r |= (((r >> n) != HEDLEY_STATIC_CAST(uint64_t, a)) ? UINT64_MAX : 0); return (a < 0) ? 0 : r; } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) diff --git a/test/arm/neon/qshlu_n.c b/test/arm/neon/qshlu_n.c index a02c3d3cd..21cf7c999 100644 --- a/test/arm/neon/qshlu_n.c +++ b/test/arm/neon/qshlu_n.c @@ -8,84 +8,305 @@ SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vqshluh_n_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { - int16_t a[1]; - uint16_t r0[1]; - uint16_t r2[1]; - uint16_t r5[1]; - uint16_t r9[1]; - uint16_t r10[1]; - uint16_t r13[1]; - uint16_t r15[1]; - } test_vec[] = { - { { INT16_C( 10390) }, - { UINT16_C(10390) }, - { UINT16_C(41560) }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX } }, - { { INT16_C( 23503) }, - { UINT16_C(23503) }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX } }, - { { INT16_C( 15091) }, - { UINT16_C(15091) }, - { UINT16_C(60364) }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX } }, - { { -INT16_C( 28563) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) } }, - { { -INT16_C( 24923) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) } }, - { { -INT16_C( 12223) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) } }, - { { -INT16_C( 12133) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) }, - { UINT16_C( 0) } }, - { { INT16_C( 31833) }, - { UINT16_C(31833) }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX }, - { UINT16_MAX } }, - }; + int16_t a; + uint16_t r0; + uint16_t r2; + uint16_t r5; + uint16_t r9; + uint16_t r10; + uint16_t r13; + uint16_t r15; + } test_vec[] = { + { -INT16_C( 1108), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 115), + UINT16_C( 115), + UINT16_C( 460), + UINT16_C( 3680), + UINT16_C(58880), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 3529), + UINT16_C( 3529), + UINT16_C(14116), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 144), + UINT16_C( 144), + UINT16_C( 576), + UINT16_C( 4608), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { -INT16_C( 28545), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 26435), + UINT16_C(26435), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { -INT16_C( 22897), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 29), + UINT16_C( 29), + UINT16_C( 116), + UINT16_C( 928), + UINT16_C(14848), + UINT16_C(29696), + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 6552), + UINT16_C( 6552), + UINT16_C(26208), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 27151), + UINT16_C(27151), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { -INT16_C( 13307), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 253), + UINT16_C( 253), + UINT16_C( 1012), + UINT16_C( 8096), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 8965), + UINT16_C( 8965), + UINT16_C(35860), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 27187), + UINT16_C(27187), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 12609), + UINT16_C(12609), + UINT16_C(50436), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 89), + UINT16_C( 89), + UINT16_C( 356), + UINT16_C( 2848), + UINT16_C(45568), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 13356), + UINT16_C(13356), + UINT16_C(53424), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 115), + UINT16_C( 115), + UINT16_C( 460), + UINT16_C( 3680), + UINT16_C(58880), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { -INT16_C( 447), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 119), + UINT16_C( 119), + UINT16_C( 476), + UINT16_C( 3808), + UINT16_C(60928), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { -INT16_C( 13170), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 7463), + UINT16_C( 7463), + UINT16_C(29852), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 2930), + UINT16_C( 2930), + UINT16_C(11720), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 3011), + UINT16_C( 3011), + UINT16_C(12044), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { -INT16_C( 11740), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 10613), + UINT16_C(10613), + UINT16_C(42452), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 30878), + UINT16_C(30878), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { INT16_C( 69), + UINT16_C( 69), + UINT16_C( 276), + UINT16_C( 2208), + UINT16_C(35328), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { -INT16_C( 4453), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 242), + UINT16_C( 242), + UINT16_C( 968), + UINT16_C( 7744), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + { -INT16_C( 18913), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0), + UINT16_C( 0) }, + { INT16_C( 19402), + UINT16_C(19402), + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX, + UINT16_MAX }, + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - int16_t a = test_vec[i].a[0]; + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t a = test_vec[i].a; + uint16_t r0 = simde_vqshluh_n_s16(a, 0); + uint16_t r2 = simde_vqshluh_n_s16(a, 2); + uint16_t r5 = simde_vqshluh_n_s16(a, 5); + uint16_t r9 = simde_vqshluh_n_s16(a, 9); + uint16_t r10 = simde_vqshluh_n_s16(a, 10); + uint16_t r13 = simde_vqshluh_n_s16(a, 13); + uint16_t r15 = simde_vqshluh_n_s16(a, 15); + + simde_assert_equal_u16(r0, test_vec[i].r0); + simde_assert_equal_u16(r2, test_vec[i].r2); + simde_assert_equal_u16(r5, test_vec[i].r5); + simde_assert_equal_u16(r9, test_vec[i].r9); + simde_assert_equal_u16(r10, test_vec[i].r10); + simde_assert_equal_u16(r13, test_vec[i].r13); + simde_assert_equal_u16(r15, test_vec[i].r15); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 32 ; i++) { + int16_t a = simde_test_codegen_random_i16(); + // Ensure some reasonable number of valid test vectors. + if ((i % 2) && (a < 0)) + { + a = -a; + a %= 256; + } uint16_t r0 = simde_vqshluh_n_s16(a, 0); uint16_t r2 = simde_vqshluh_n_s16(a, 2); uint16_t r5 = simde_vqshluh_n_s16(a, 5); @@ -94,16 +315,917 @@ test_simde_vqshluh_n_s16 (SIMDE_MUNIT_TEST_ARGS) { uint16_t r13 = simde_vqshluh_n_s16(a, 13); uint16_t r15 = simde_vqshluh_n_s16(a, 15); - simde_assert_equal_u16(r0, test_vec[i].r0[0]); - simde_assert_equal_u16(r2, test_vec[i].r2[0]); - simde_assert_equal_u16(r5, test_vec[i].r5[0]); - simde_assert_equal_u16(r9, test_vec[i].r9[0]); - simde_assert_equal_u16(r10, test_vec[i].r10[0]); - simde_assert_equal_u16(r13, test_vec[i].r13[0]); - simde_assert_equal_u16(r15, test_vec[i].r15[0]); + simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_u16(2, r0, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u16(2, r2, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u16(2, r5, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u16(2, r9, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u16(2, r15, SIMDE_TEST_VEC_POS_LAST); } + return 1; +#endif +} - return 0; +static int +test_simde_vqshlus_n_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t a; + uint32_t r0; + uint32_t r2; + uint32_t r5; + uint32_t r9; + uint32_t r16; + uint32_t r24; + uint32_t r31; + } test_vec[] = { + { -INT32_C( 860251280), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0) }, + { INT32_C( 1938561109), + UINT32_C(1938561109), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 570684325), + UINT32_C( 570684325), + UINT32_C(2282737300), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 1438355717), + UINT32_C(1438355717), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { -INT32_C( 2110861755), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0) }, + { INT32_C( 220741274), + UINT32_C( 220741274), + UINT32_C( 882965096), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { -INT32_C( 1436645693), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0) }, + { INT32_C( 166), + UINT32_C( 166), + UINT32_C( 664), + UINT32_C( 5312), + UINT32_C( 84992), + UINT32_C( 10878976), + UINT32_C(2785017856), + UINT32_MAX }, + { INT32_C( 949453539), + UINT32_C( 949453539), + UINT32_C(3797814156), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 1084957595), + UINT32_C(1084957595), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 459452182), + UINT32_C( 459452182), + UINT32_C(1837808728), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 192), + UINT32_C( 192), + UINT32_C( 768), + UINT32_C( 6144), + UINT32_C( 98304), + UINT32_C( 12582912), + UINT32_C(3221225472), + UINT32_MAX }, + { -INT32_C( 2029478164), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0) }, + { INT32_C( 35), + UINT32_C( 35), + UINT32_C( 140), + UINT32_C( 1120), + UINT32_C( 17920), + UINT32_C( 2293760), + UINT32_C( 587202560), + UINT32_MAX }, + { INT32_C( 273347254), + UINT32_C( 273347254), + UINT32_C(1093389016), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 500896570), + UINT32_C( 500896570), + UINT32_C(2003586280), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 810906261), + UINT32_C( 810906261), + UINT32_C(3243625044), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 107), + UINT32_C( 107), + UINT32_C( 428), + UINT32_C( 3424), + UINT32_C( 54784), + UINT32_C( 7012352), + UINT32_C(1795162112), + UINT32_MAX }, + { -INT32_C( 255339600), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0) }, + { INT32_C( 15), + UINT32_C( 15), + UINT32_C( 60), + UINT32_C( 480), + UINT32_C( 7680), + UINT32_C( 983040), + UINT32_C( 251658240), + UINT32_MAX }, + { -INT32_C( 1285194026), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0) }, + { INT32_C( 1683225006), + UINT32_C(1683225006), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 645242347), + UINT32_C( 645242347), + UINT32_C(2580969388), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 1162039472), + UINT32_C(1162039472), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 1484167619), + UINT32_C(1484167619), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 1241835418), + UINT32_C(1241835418), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { -INT32_C( 1405432902), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0) }, + { INT32_C( 254), + UINT32_C( 254), + UINT32_C( 1016), + UINT32_C( 8128), + UINT32_C( 130048), + UINT32_C( 16646144), + UINT32_C(4261412864), + UINT32_MAX }, + { -INT32_C( 561254609), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0), + UINT32_C( 0) }, + { INT32_C( 24), + UINT32_C( 24), + UINT32_C( 96), + UINT32_C( 768), + UINT32_C( 12288), + UINT32_C( 1572864), + UINT32_C( 402653184), + UINT32_MAX }, + { INT32_C( 737785723), + UINT32_C( 737785723), + UINT32_C(2951142892), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { INT32_C( 248), + UINT32_C( 248), + UINT32_C( 992), + UINT32_C( 7936), + UINT32_C( 126976), + UINT32_C( 16252928), + UINT32_C(4160749568), + UINT32_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t a = test_vec[i].a; + uint32_t r0 = simde_vqshlus_n_s32(a, 0); + uint32_t r2 = simde_vqshlus_n_s32(a, 2); + uint32_t r5 = simde_vqshlus_n_s32(a, 5); + uint32_t r9 = simde_vqshlus_n_s32(a, 9); + uint32_t r16 = simde_vqshlus_n_s32(a, 16); + uint32_t r24 = simde_vqshlus_n_s32(a, 24); + uint32_t r31 = simde_vqshlus_n_s32(a, 31); + + simde_assert_equal_u32(r0, test_vec[i].r0); + simde_assert_equal_u32(r2, test_vec[i].r2); + simde_assert_equal_u32(r5, test_vec[i].r5); + simde_assert_equal_u32(r9, test_vec[i].r9); + simde_assert_equal_u32(r16, test_vec[i].r16); + simde_assert_equal_u32(r24, test_vec[i].r24); + simde_assert_equal_u32(r31, test_vec[i].r31); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 32 ; i++) { + int32_t a = simde_test_codegen_random_i32(); + // Ensure some reasonable number of valid test vectors. + if ((i % 2) && (a < 0)) + { + a = -a; + a %= 256; + } + uint32_t r0 = simde_vqshlus_n_s32(a, 0); + uint32_t r2 = simde_vqshlus_n_s32(a, 2); + uint32_t r5 = simde_vqshlus_n_s32(a, 5); + uint32_t r9 = simde_vqshlus_n_s32(a, 9); + uint32_t r16 = simde_vqshlus_n_s32(a, 16); + uint32_t r24 = simde_vqshlus_n_s32(a, 24); + uint32_t r31 = simde_vqshlus_n_s32(a, 31); + + simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_u32(2, r0, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r2, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r5, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r9, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r24, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r31, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqshlud_n_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int64_t a; + uint64_t r0; + uint64_t r2; + uint64_t r5; + uint64_t r9; + uint64_t r32; + uint64_t r48; + uint64_t r63; + } test_vec[] = { + { INT64_C( 2886399765623690139), + UINT64_C( 2886399765623690139), + UINT64_C(11545599062494760556), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 28), + UINT64_C( 28), + UINT64_C( 112), + UINT64_C( 896), + UINT64_C( 14336), + UINT64_C( 120259084288), + UINT64_C( 7881299347898368), + UINT64_MAX }, + { INT64_C( 2703632360428737343), + UINT64_C( 2703632360428737343), + UINT64_C(10814529441714949372), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 98), + UINT64_C( 98), + UINT64_C( 392), + UINT64_C( 3136), + UINT64_C( 50176), + UINT64_C( 420906795008), + UINT64_C( 27584547717644288), + UINT64_MAX }, + { INT64_C( 3086723063588304994), + UINT64_C( 3086723063588304994), + UINT64_C(12346892254353219976), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 2248181184994964008), + UINT64_C( 2248181184994964008), + UINT64_C( 8992724739979856032), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { -INT64_C( 6364903447505023433), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 1857972031008909455), + UINT64_C( 1857972031008909455), + UINT64_C( 7431888124035637820), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 6773304119622301583), + UINT64_C( 6773304119622301583), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 242), + UINT64_C( 242), + UINT64_C( 968), + UINT64_C( 7744), + UINT64_C( 123904), + UINT64_C( 1039382085632), + UINT64_C( 68116944363978752), + UINT64_MAX }, + { -INT64_C( 4798055237531738951), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 118), + UINT64_C( 118), + UINT64_C( 472), + UINT64_C( 3776), + UINT64_C( 60416), + UINT64_C( 506806140928), + UINT64_C( 33214047251857408), + UINT64_MAX }, + { -INT64_C( 3751909992286248869), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 5335298334686619359), + UINT64_C( 5335298334686619359), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { -INT64_C( 5939837636799872603), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 61), + UINT64_C( 61), + UINT64_C( 244), + UINT64_C( 1952), + UINT64_C( 31232), + UINT64_C( 261993005056), + UINT64_C( 17169973579350016), + UINT64_MAX }, + { -INT64_C( 8074556451617822871), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 1939630276072745999), + UINT64_C( 1939630276072745999), + UINT64_C( 7758521104290983996), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 3316195700146828852), + UINT64_C( 3316195700146828852), + UINT64_C(13264782800587315408), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 85), + UINT64_C( 85), + UINT64_C( 340), + UINT64_C( 2720), + UINT64_C( 43520), + UINT64_C( 365072220160), + UINT64_C( 23925373020405760), + UINT64_MAX }, + { INT64_C( 1881586601638470108), + UINT64_C( 1881586601638470108), + UINT64_C( 7526346406553880432), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 3240522415905375337), + UINT64_C( 3240522415905375337), + UINT64_C(12962089663621501348), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 1096214622027179420), + UINT64_C( 1096214622027179420), + UINT64_C( 4384858488108717680), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 131), + UINT64_C( 131), + UINT64_C( 524), + UINT64_C( 4192), + UINT64_C( 67072), + UINT64_C( 562640715776), + UINT64_C( 36873221949095936), + UINT64_MAX }, + { INT64_C( 5852294572258503441), + UINT64_C( 5852294572258503441), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 3723957473559704202), + UINT64_C( 3723957473559704202), + UINT64_C(14895829894238816808), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 4143155232059896332), + UINT64_C( 4143155232059896332), + UINT64_C(16572620928239585328), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 5448601532175206846), + UINT64_C( 5448601532175206846), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 1708727438762510205), + UINT64_C( 1708727438762510205), + UINT64_C( 6834909755050040820), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 1827132191375222160), + UINT64_C( 1827132191375222160), + UINT64_C( 7308528765500888640), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { -INT64_C( 4220425874231945607), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 3555348583236854226), + UINT64_C( 3555348583236854226), + UINT64_C(14221394332947416904), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { -INT64_C( 1311306851604316631), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 2907198552664270642), + UINT64_C( 2907198552664270642), + UINT64_C(11628794210657082568), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { -INT64_C( 4767961899337916153), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 212), + UINT64_C( 212), + UINT64_C( 848), + UINT64_C( 6784), + UINT64_C( 108544), + UINT64_C( 910533066752), + UINT64_C( 59672695062659072), + UINT64_MAX }, + { INT64_C( 2749774076919326686), + UINT64_C( 2749774076919326686), + UINT64_C(10999096307677306744), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 113), + UINT64_C( 113), + UINT64_C( 452), + UINT64_C( 3616), + UINT64_C( 57856), + UINT64_C( 485331304448), + UINT64_C( 31806672368304128), + UINT64_MAX }, + { -INT64_C( 5096215995630566753), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 1014621837266063348), + UINT64_C( 1014621837266063348), + UINT64_C( 4058487349064253392), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 2696374791627697744), + UINT64_C( 2696374791627697744), + UINT64_C(10785499166510790976), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 3249286169999167665), + UINT64_C( 3249286169999167665), + UINT64_C(12997144679996670660), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 7841608593457995147), + UINT64_C( 7841608593457995147), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 7298909016306707212), + UINT64_C( 7298909016306707212), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { -INT64_C( 5252863995267148962), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 27), + UINT64_C( 27), + UINT64_C( 108), + UINT64_C( 864), + UINT64_C( 13824), + UINT64_C( 115964116992), + UINT64_C( 7599824371187712), + UINT64_MAX }, + { INT64_C( 6985288989438203404), + UINT64_C( 6985288989438203404), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 1846448064061819942), + UINT64_C( 1846448064061819942), + UINT64_C( 7385792256247279768), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 4771045237274331513), + UINT64_C( 4771045237274331513), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 91), + UINT64_C( 91), + UINT64_C( 364), + UINT64_C( 2912), + UINT64_C( 46592), + UINT64_C( 390842023936), + UINT64_C( 25614222880669696), + UINT64_MAX }, + { -INT64_C( 8836064255623125844), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 6602101000981445578), + UINT64_C( 6602101000981445578), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 8811312277574299048), + UINT64_C( 8811312277574299048), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 161), + UINT64_C( 161), + UINT64_C( 644), + UINT64_C( 5152), + UINT64_C( 82432), + UINT64_C( 691489734656), + UINT64_C( 45317471250415616), + UINT64_MAX }, + { INT64_C( 4370847282686227396), + UINT64_C( 4370847282686227396), + UINT64_C(17483389130744909584), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 7968665194224314201), + UINT64_C( 7968665194224314201), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { -INT64_C( 2278895620339765233), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 71), + UINT64_C( 71), + UINT64_C( 284), + UINT64_C( 2272), + UINT64_C( 36352), + UINT64_C( 304942678016), + UINT64_C( 19984723346456576), + UINT64_MAX }, + { -INT64_C( 4520361235328595307), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 8651575862270625499), + UINT64_C( 8651575862270625499), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { -INT64_C( 475767200333069309), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0), + UINT64_C( 0) }, + { INT64_C( 127), + UINT64_C( 127), + UINT64_C( 508), + UINT64_C( 4064), + UINT64_C( 65024), + UINT64_C( 545460846592), + UINT64_C( 35747322042253312), + UINT64_MAX }, + { INT64_C( 2901765450253785371), + UINT64_C( 2901765450253785371), + UINT64_C(11607061801015141484), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { INT64_C( 213), + UINT64_C( 213), + UINT64_C( 852), + UINT64_C( 6816), + UINT64_C( 109056), + UINT64_C( 914828034048), + UINT64_C( 59954170039369728), + UINT64_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int64_t a = test_vec[i].a; + uint64_t r0 = simde_vqshlud_n_s64(a, 0); + uint64_t r2 = simde_vqshlud_n_s64(a, 2); + uint64_t r5 = simde_vqshlud_n_s64(a, 5); + uint64_t r9 = simde_vqshlud_n_s64(a, 9); + uint64_t r32 = simde_vqshlud_n_s64(a, 32); + uint64_t r48 = simde_vqshlud_n_s64(a, 48); + uint64_t r63 = simde_vqshlud_n_s64(a, 63); + + simde_assert_equal_u64(r0, test_vec[i].r0); + simde_assert_equal_u64(r2, test_vec[i].r2); + simde_assert_equal_u64(r5, test_vec[i].r5); + simde_assert_equal_u64(r9, test_vec[i].r9); + simde_assert_equal_u64(r32, test_vec[i].r32); + simde_assert_equal_u64(r48, test_vec[i].r48); + simde_assert_equal_u64(r63, test_vec[i].r63); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 64 ; i++) { + int64_t a = simde_test_codegen_random_i64(); + // Ensure some reasonable number of valid test vectors. + if ((i % 2) && (a < 0)) + { + a = -a; + a %= 256; + } + uint64_t r0 = simde_vqshlud_n_s64(a, 0); + uint64_t r2 = simde_vqshlud_n_s64(a, 2); + uint64_t r5 = simde_vqshlud_n_s64(a, 5); + uint64_t r9 = simde_vqshlud_n_s64(a, 9); + uint64_t r32 = simde_vqshlud_n_s64(a, 32); + uint64_t r48 = simde_vqshlud_n_s64(a, 48); + uint64_t r63 = simde_vqshlud_n_s64(a, 63); + + simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_u64(2, r0, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r2, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r5, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r9, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r32, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r48, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r63, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int @@ -986,6 +2108,8 @@ test_simde_vqshluq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqshluh_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlus_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlud_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlu_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlu_n_s16)