diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 4f9b2c4f0184b..fed1d2f76d0fe 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7430,7 +7430,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); // We shouldn't be seeing uint64 here as it should have been converted - // into a helper call by either front-end or lowering phase, unless we have AVX512F + // into a helper call by either front-end or lowering phase, unless we have AVX512F/AVX10.2 // accelerated conversions. assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || compiler->canUseEvexEncodingDebugOnly()); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ddec8af5e753f..fde303a40aecb 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -12316,6 +12316,14 @@ void emitter::emitDispIns( case INS_vcvttsd2usi64: case INS_vcvttss2usi32: case INS_vcvttss2usi64: + case INS_vcvttsd2sis32: + case INS_vcvttsd2sis64: + case INS_vcvttss2sis32: + case INS_vcvttss2sis64: + case INS_vcvttsd2usis32: + case INS_vcvttsd2usis64: + case INS_vcvttss2usis32: + case INS_vcvttss2usis64: { assert(!id->idIsEvexAaaContextSet()); printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 06854fa563b59..8a6987d2e7267 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21630,7 +21630,39 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, GenTree* fixupVal; bool isV512Supported = false; - if (compIsEvexOpportunisticallySupported(isV512Supported)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) + { + NamedIntrinsic cvtIntrinsic = NI_Illegal; + switch (simdTargetBaseType) + { + case TYP_INT: + cvtIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_ConvertToVectorInt32WithTruncationSaturation + : NI_AVX10v2_ConvertToVectorInt32WithTruncationSaturation; + break; + + case TYP_UINT: + cvtIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_ConvertToVectorUInt32WithTruncationSaturation + : NI_AVX10v2_ConvertToVectorUInt32WithTruncationSaturation; + break; + + case TYP_LONG: + cvtIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_ConvertToVectorInt64WithTruncationSaturation + : NI_AVX10v2_ConvertToVectorInt64WithTruncationSaturation; + break; + + case TYP_ULONG: + cvtIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_ConvertToVectorUInt64WithTruncationSaturation + : NI_AVX10v2_ConvertToVectorUInt64WithTruncationSaturation; + break; + + default: + { + unreached(); + } + } + return gtNewSimdHWIntrinsicNode(type, op1, cvtIntrinsic, simdSourceBaseJitType, simdSize); + } + else if (compIsEvexOpportunisticallySupported(isV512Supported)) { /*Generate the control table for VFIXUPIMMSD/SS - For conversion to unsigned diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 775cbd6923781..feac072504194 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2463,42 +2463,88 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) break; case TYP_FLOAT: - switch (to) + if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) { - case TYP_INT: - return INS_cvttss2si32; - case TYP_LONG: - return INS_cvttss2si64; - case TYP_FLOAT: - return ins_Move_Extend(TYP_FLOAT, false); - case TYP_DOUBLE: - return INS_cvtss2sd; - case TYP_ULONG: - return INS_vcvttss2usi64; - case TYP_UINT: - return INS_vcvttss2usi32; - default: - unreached(); + switch (to) + { + case TYP_INT: + return INS_vcvttss2sis32; + case TYP_LONG: + return INS_vcvttss2sis64; + case TYP_FLOAT: + return ins_Move_Extend(TYP_FLOAT, false); + case TYP_DOUBLE: + return INS_cvtss2sd; + case TYP_ULONG: + return INS_vcvttss2usis64; + case TYP_UINT: + return INS_vcvttss2usis32; + default: + unreached(); + } + } + else + { + switch (to) + { + case TYP_INT: + return INS_cvttss2si32; + case TYP_LONG: + return INS_cvttss2si64; + case TYP_FLOAT: + return ins_Move_Extend(TYP_FLOAT, false); + case TYP_DOUBLE: + return INS_cvtss2sd; + case TYP_ULONG: + return INS_vcvttss2usi64; + case TYP_UINT: + return INS_vcvttss2usi32; + default: + unreached(); + } } break; case TYP_DOUBLE: - switch (to) + if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) { - case TYP_INT: - return INS_cvttsd2si32; - case TYP_LONG: - return INS_cvttsd2si64; - case TYP_FLOAT: - return INS_cvtsd2ss; - case TYP_DOUBLE: - return ins_Move_Extend(TYP_DOUBLE, false); - case TYP_ULONG: - return INS_vcvttsd2usi64; - case TYP_UINT: - return INS_vcvttsd2usi32; - default: - unreached(); + switch (to) + { + case TYP_INT: + return INS_vcvttsd2sis32; + case TYP_LONG: + return INS_vcvttsd2sis64; + case TYP_FLOAT: + return INS_cvtsd2ss; + case TYP_DOUBLE: + return ins_Move_Extend(TYP_DOUBLE, false); + case TYP_ULONG: + return INS_vcvttsd2usis64; + case TYP_UINT: + return INS_vcvttsd2usis32; + default: + unreached(); + } + } + else + { + switch (to) + { + case TYP_INT: + return INS_cvttsd2si32; + case TYP_LONG: + return INS_cvttsd2si64; + case TYP_FLOAT: + return INS_cvtsd2ss; + case TYP_DOUBLE: + return ins_Move_Extend(TYP_DOUBLE, false); + case TYP_ULONG: + return INS_vcvttsd2usi64; + case TYP_UINT: + return INS_vcvttsd2usi32; + default: + unreached(); + } } break; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index bf0f463fdea76..2175c856ecb70 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -869,7 +869,9 @@ GenTree* Lowering::LowerCast(GenTree* tree) #if defined(TARGET_AMD64) // Handle saturation logic for X64 - if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType) && !varTypeIsSmall(dstType)) + // Let InstructionSet_AVX10v2 pass through since it can handle the saturation + if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType) && !varTypeIsSmall(dstType) && + !comp->compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) { // We should have filtered out float -> long conversion and // converted it to float -> double -> long conversion. @@ -886,10 +888,8 @@ GenTree* Lowering::LowerCast(GenTree* tree) bool isV512Supported = false; /*The code below is to introduce saturating conversions on X86/X64. The C# equivalence of the code is given below --> - // Replace QNaN and SNaN with Zero op1 = Avx512F.Fixup(op1, op1, Vector128.Create(0x88), 0); - // Convert from double to long, replacing any values that were greater than or equal to MaxValue with MaxValue // Values that were less than or equal to MinValue will already be MinValue diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a7071e4ec4de9..0fc0209eb34b6 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -337,8 +337,9 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) // dstType = int for SSE41 // For pre-SSE41, the all src is converted to TYP_DOUBLE // and goes through helpers. - && (tree->gtOverflow() || (dstType == TYP_LONG) || - !(canUseEvexEncoding() || (dstType == TYP_INT && compOpportunisticallyDependsOn(InstructionSet_SSE41)))) + && + (tree->gtOverflow() || (dstType == TYP_LONG && !compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) || + !(canUseEvexEncoding() || (dstType == TYP_INT && compOpportunisticallyDependsOn(InstructionSet_SSE41)))) #elif defined(TARGET_ARM) // Arm: src = float, dst = int64/uint64 or overflow conversion. && (tree->gtOverflow() || varTypeIsLong(dstType)) @@ -372,6 +373,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) #else #if defined(TARGET_AMD64) // Following nodes are handled when lowering the nodes + // float -> ulong/uint/int/long for AVX10.2 + // double -> ulong/uint/int/long for AVX10.2 // float -> ulong/uint/int for AVX512F // double -> ulong/uint/long/int for AVX512F // float -> int for SSE41