Skip to content

Commit

Permalink
[Hardware][CPU][bugfix] Fix half dtype support on AVX2-only target (v…
Browse files Browse the repository at this point in the history
…llm-project#10108)

Signed-off-by: jiang1.li <[email protected]>
Signed-off-by: Sumit Dubey <[email protected]>
  • Loading branch information
bigPYJ1151 authored and sumitd2 committed Nov 14, 2024
1 parent 83d962a commit 109491d
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
2 changes: 1 addition & 1 deletion cmake/cpu_extension.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED)
FetchContent_Declare(
oneDNN
GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
GIT_TAG v3.5.3
GIT_TAG v3.6
GIT_PROGRESS TRUE
GIT_SHALLOW TRUE
)
Expand Down
10 changes: 10 additions & 0 deletions csrc/cpu/cpu_types_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,16 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
explicit FP32Vec16(const FP32Vec8 &data)
: reg_low(data.reg), reg_high(data.reg) {}

explicit FP32Vec16(const FP16Vec16 &v) {
__m128i low = _mm256_extractf128_si256(v.reg, 0);
__m128i high = _mm256_extractf128_si256(v.reg, 1);

reg_low = _mm256_cvtph_ps(low);
reg_high = _mm256_cvtph_ps(high);
}

explicit FP32Vec16(const FP16Vec8 &v) : FP32Vec16(FP32Vec8(v)) {}

explicit FP32Vec16(const BF16Vec16 &v) {
__m128i low = _mm256_extractf128_si256(v.reg, 0);
__m128i high = _mm256_extractf128_si256(v.reg, 1);
Expand Down

0 comments on commit 109491d

Please sign in to comment.