Skip to content

Commit

Permalink
Use __builtin_shufflevector for the generic implementation of shuffle
Browse files Browse the repository at this point in the history
Let the compiler do the heavy lifting when possible :-)
Fix #926
  • Loading branch information
serge-sans-paille committed Oct 18, 2023
1 parent e036691 commit cc14c23
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions include/xsimd/arch/generic/xsimd_generic_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,29 @@ namespace xsimd
return select(batch_bool_constant<batch<T, A>, (Indices < bsize)...>(), x, y);
}

#if defined(__has_builtin)
#if __has_builtin(__builtin_shuffle_vector)
#define builtin_shuffle __builtin_shuffle_vector
#endif
#endif

#if defined(builtin_shuffle)
return builtin_shuffle(x.data, y.data, Indices...);

// FIXME: my experiments show that GCC only correctly optimizes this builtin
// starting at GCC 13, where it already has __builtin_shuffle_vector
//
// #elif __has_builtin(__builtin_shuffle) || GCC >= 6
// typedef ITy integer_vector_type __attribute__((vector_size(sizeof(batch<ITy, A>))));
// return __builtin_shuffle(x.data, y.data, integer_vector_type{Indices...});
#else
// Use a generic_pattern. It is suboptimal but clang optimizes this
// pretty well.
batch<T, A> x_lane = swizzle(x, batch_constant<batch<ITy, A>, ((Indices >= bsize) ? (Indices - bsize) : Indices)...>());
batch<T, A> y_lane = swizzle(y, batch_constant<batch<ITy, A>, ((Indices >= bsize) ? (Indices - bsize) : Indices)...>());
batch_bool_constant<batch<T, A>, (Indices < bsize)...> select_x_lane;
return select(select_x_lane, x_lane, y_lane);
#endif
}

// store
Expand Down

0 comments on commit cc14c23

Please sign in to comment.