Merge pull request #31 from eschnett/eschnett/vifelse

Rename ifelse to vifelse
eschnett · Jun 25, 2018 · b61fb0b · b61fb0b
2 parents 164d7b7 + 038ba4b
commit b61fb0b
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 39 deletions.
diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ The SIMD package provides the usual arithmetic and logical operations for SIMD v
 
 `+ - * / % ^ ! ~ & | $ << >> >>> == != < <= > >=`
 
-`abs cbrt ceil copysign cos div exp exp10 exp2 flipsign floor fma ifelse inv isfinite isinf isnan issubnormal log log10 log2 muladd rem round sign signbit sin sqrt trunc`
+`abs cbrt ceil copysign cos div exp exp10 exp2 flipsign floor fma inv isfinite isinf isnan issubnormal log log10 log2 muladd rem round sign signbit sin sqrt trunc vifelse`
 
 (Currently missing: `count_ones count_zeros exponent ldexp leading_ones leading_zeros significand trailing_ones trailing_zeros`, many trigonometric functions)
 

diff --git a/REQUIRE b/REQUIRE
@@ -1,2 +1,2 @@
 julia 0.6
-Compat 0.47.0
+Compat 0.52.0
diff --git a/src/SIMD.jl b/src/SIMD.jl
@@ -290,7 +290,7 @@ llvmins(::Type{Val{:(>=)}}, N, ::Type{T}) where {T <: UIntTypes} = "icmp uge"
 llvmins(::Type{Val{:(<)}}, N, ::Type{T}) where {T <: UIntTypes} = "icmp ult"
 llvmins(::Type{Val{:(<=)}}, N, ::Type{T}) where {T <: UIntTypes} = "icmp ule"
 
-llvmins(::Type{Val{:ifelse}}, N, ::Type{T}) where {T} = "select"
+llvmins(::Type{Val{:vifelse}}, N, ::Type{T}) where {T} = "select"
 
 llvmins(::Type{Val{:+}}, N, ::Type{T}) where {T <: FloatingTypes} = "fadd"
 llvmins(::Type{Val{:-}}, N, ::Type{T}) where {T <: FloatingTypes} = "fsub"
@@ -834,9 +834,9 @@ end
     ValNegOp = Val{NegOp}
     quote
         $(Expr(:meta, :inline))
-        ifelse(v2 >= 0,
-               llvmwrapshift($ValOp, v1, v2 % Vec{N,unsigned(U)}),
-               llvmwrapshift($ValNegOp, v1, -v2 % Vec{N,unsigned(U)}))
+        vifelse(v2 >= 0,
+                llvmwrapshift($ValOp, v1, v2 % Vec{N,unsigned(U)}),
+                llvmwrapshift($ValNegOp, v1, -v2 % Vec{N,unsigned(U)}))
     end
 end
 
@@ -870,7 +870,9 @@ end
     iv & sm != Vec{N,U}(0)
 end
 
-@generated function Base.ifelse(v1::Vec{N,Bool}, v2::Vec{N,T},
+export vifelse
+vifelse(c::Bool, x, y) = ifelse(c, x, y)
+@generated function vifelse(v1::Vec{N,Bool}, v2::Vec{N,T},
         v3::Vec{N,T}) where {N,T}
     btyp = llvmtype(Bool)
     vbtyp = "<$N x $btyp>"
@@ -922,10 +924,10 @@ end
 #       use a shift for v1<0
 #       evaluate v1>0 as -v1<0 ?
 @inline Base.sign(v1::Vec{N,T}) where {N,T<:IntTypes} =
-    ifelse(v1 == Vec{N,T}(0), Vec{N,T}(0),
-        ifelse(v1 < Vec{N,T}(0), Vec{N,T}(-1), Vec{N,T}(1)))
+    vifelse(v1 == Vec{N,T}(0), Vec{N,T}(0),
+        vifelse(v1 < Vec{N,T}(0), Vec{N,T}(-1), Vec{N,T}(1)))
 @inline Base.sign(v1::Vec{N,T}) where {N,T<:UIntTypes} =
-    ifelse(v1 == Vec{N,T}(0), Vec{N,T}(0), Vec{N,T}(1))
+    vifelse(v1 == Vec{N,T}(0), Vec{N,T}(0), Vec{N,T}(1))
 @inline Base.signbit(v1::Vec{N,T}) where {N,T<:IntTypes} = v1 < Vec{N,T}(0)
 @inline Base.signbit(v1::Vec{N,T}) where {N,T<:UIntTypes} = Vec{N,Bool}(false)
 
@@ -936,24 +938,24 @@ for op in (:&, :|, :⊻, :+, :-, :*, :div, :rem)
     end
 end
 @inline Base.copysign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:IntTypes} =
-    ifelse(signbit(v2), -abs(v1), abs(v1))
+    vifelse(signbit(v2), -abs(v1), abs(v1))
 @inline Base.copysign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:UIntTypes} = v1
 @inline Base.flipsign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:IntTypes} =
-    ifelse(signbit(v2), -v1, v1)
+    vifelse(signbit(v2), -v1, v1)
 @inline Base.flipsign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:UIntTypes} = v1
 @inline Base.max(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:IntegerTypes} =
-    ifelse(v1>=v2, v1, v2)
+    vifelse(v1>=v2, v1, v2)
 @inline Base.min(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:IntegerTypes} =
-    ifelse(v1>=v2, v2, v1)
+    vifelse(v1>=v2, v2, v1)
 
 @inline function Base.muladd(v1::Vec{N,T}, v2::Vec{N,T},
         v3::Vec{N,T}) where {N,T<:IntegerTypes}
     v1*v2+v3
 end
 
 # TODO: Handle negative shift counts
-#       use ifelse
-#       ensure ifelse is efficient
+#       use vifelse
+#       ensure vifelse is efficient
 for op in (:<<, :>>, :>>>)
     @eval begin
         @inline Base.$op(v1::Vec{N,T}, ::Type{Val{I}}) where {N,T<:IntegerTypes,I} =
@@ -988,7 +990,7 @@ for op in (
 end
 @inline Base.exp10(v1::Vec{N,T}) where {N,T<:FloatingTypes} = Vec{N,T}(10)^v1
 @inline Base.sign(v1::Vec{N,T}) where {N,T<:FloatingTypes} =
-    ifelse(v1 == Vec{N,T}(0.0), Vec{N,T}(0.0), copysign(Vec{N,T}(1.0), v1))
+    vifelse(v1 == Vec{N,T}(0.0), Vec{N,T}(0.0), copysign(Vec{N,T}(1.0), v1))
 
 for op in (:+, :-, :*, :/, :^, :copysign, :max, :min, :rem)
     @eval begin
@@ -999,7 +1001,7 @@ end
 @inline Base. ^(v1::Vec{N,T}, x2::Integer) where {N,T<:FloatingTypes} =
     llvmwrap(Val{:powi}, v1, Int(x2))
 @inline Base.flipsign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:FloatingTypes} =
-    ifelse(signbit(v2), -v1, v1)
+    vifelse(signbit(v2), -v1, v1)
 
 for op in (:fma, :muladd)
     @eval begin
@@ -1027,12 +1029,12 @@ for op in (
             $op(v1, Vec{N,T}(s2))
     end
 end
-@inline Base.ifelse(c::Vec{N,Bool}, s1::IntegerTypes,
+@inline vifelse(c::Vec{N,Bool}, s1::IntegerTypes,
         v2::Vec{N,T}) where {N,T<:IntegerTypes} =
-    ifelse(c, Vec{N,T}(s1), v2)
-@inline Base.ifelse(c::Vec{N,Bool}, v1::Vec{N,T},
+    vifelse(c, Vec{N,T}(s1), v2)
+@inline vifelse(c::Vec{N,Bool}, v1::Vec{N,T},
         s2::IntegerTypes) where {N,T<:IntegerTypes} =
-    ifelse(c, v1, Vec{N,T}(s2))
+    vifelse(c, v1, Vec{N,T}(s2))
 
 for op in (:muladd,)
     @eval begin
@@ -1070,12 +1072,12 @@ for op in (
             $op(v1, Vec{N,T}(s2))
     end
 end
-@inline Base.ifelse(c::Vec{N,Bool}, s1::ScalarTypes,
+@inline vifelse(c::Vec{N,Bool}, s1::ScalarTypes,
         v2::Vec{N,T}) where {N,T<:FloatingTypes} =
-    ifelse(c, Vec{N,T}(s1), v2)
-@inline Base.ifelse(c::Vec{N,Bool}, v1::Vec{N,T},
+    vifelse(c, Vec{N,T}(s1), v2)
+@inline vifelse(c::Vec{N,Bool}, v1::Vec{N,T},
         s2::ScalarTypes) where {N,T<:FloatingTypes} =
-    ifelse(c, v1, Vec{N,T}(s2))
+    vifelse(c, v1, Vec{N,T}(s2))
 
 for op in (:fma, :muladd)
     @eval begin
@@ -1205,8 +1207,11 @@ export valloc
 function valloc(::Type{T}, N::Int, sz::Int) where T
     @assert N > 0
     @assert sz >= 0
-    padding = N-1
-    mem = Vector{T}(uninitialized, sz + padding)
+    # We use padding to align the address of the first element, and
+    # also to ensure that we can access past the last element up to
+    # the next full vector width
+    padding = N-1 + mod(-sz, N)
+    mem = Vector{T}(undef, sz + padding)
     addr = Int(pointer(mem))
     off = mod(-addr, N * sizeof(T))
     @assert mod(off, sizeof(T)) == 0

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,6 +1,7 @@
 using SIMD
 using Compat.Test
 using Compat: @info
+using Compat.InteractiveUtils
 
 @info "Basic definitions"
 
@@ -98,8 +99,8 @@ for op in (
     @test Tuple(op(V8I32(v8i32), V8I32(v8i32b))) === map(op, v8i32, v8i32b)
 end
 
-ifelsebool(x,y,z) = ifelse(x>=typeof(x)(0),y,z)
-for op in (ifelsebool, muladd)
+vifelsebool(x,y,z) = vifelse(x>=typeof(x)(0),y,z)
+for op in (vifelsebool, muladd)
     @test Tuple(op(V8I32(v8i32), V8I32(v8i32b), V8I32(v8i32c))) ===
         map(op, v8i32, v8i32b, v8i32c)
 end
@@ -195,7 +196,7 @@ for op in (
     @test Tuple(op(V4F64(v4f64), V4F64(v4f64b))) === map(op, v4f64, v4f64b)
 end
 
-for op in (fma, ifelsebool, muladd)
+for op in (fma, vifelsebool, muladd)
     @test Tuple(op(V4F64(v4f64), V4F64(v4f64b), V4F64(v4f64c))) ===
         map(op, v4f64, v4f64b, v4f64c)
 end
@@ -208,10 +209,10 @@ for op in (
     @test op(42, V8I32(v8i32)) === op(V8I32(42), V8I32(v8i32))
     @test op(V8I32(v8i32), 42) === op(V8I32(v8i32), V8I32(42))
 end
-@test ifelse(signbit(V8I32(v8i32)), 42, V8I32(v8i32)) ===
-    ifelse(signbit(V8I32(v8i32)), V8I32(42), V8I32(v8i32))
-@test ifelse(signbit(V8I32(v8i32)), V8I32(v8i32), 42) ===
-    ifelse(signbit(V8I32(v8i32)), V8I32(v8i32), V8I32(42))
+@test vifelse(signbit(V8I32(v8i32)), 42, V8I32(v8i32)) ===
+    vifelse(signbit(V8I32(v8i32)), V8I32(42), V8I32(v8i32))
+@test vifelse(signbit(V8I32(v8i32)), V8I32(v8i32), 42) ===
+    vifelse(signbit(V8I32(v8i32)), V8I32(v8i32), V8I32(42))
 for op in (muladd,)
     @test op(42, 42, V8I32(v8i32)) ===
         op(V8I32(42), V8I32(42), V8I32(v8i32))
@@ -233,10 +234,10 @@ for op in (
     @test op(42, V4F64(v4f64)) === op(V4F64(42), V4F64(v4f64))
     @test op(V4F64(v4f64), 42) === op(V4F64(v4f64), V4F64(42))
 end
-@test ifelse(signbit(V4F64(v4f64)), 42, V4F64(v4f64)) ===
-    ifelse(signbit(V4F64(v4f64)), V4F64(42), V4F64(v4f64))
-@test ifelse(signbit(V4F64(v4f64)), V4F64(v4f64), 42) ===
-    ifelse(signbit(V4F64(v4f64)), V4F64(v4f64), V4F64(42))
+@test vifelse(signbit(V4F64(v4f64)), 42, V4F64(v4f64)) ===
+    vifelse(signbit(V4F64(v4f64)), V4F64(42), V4F64(v4f64))
+@test vifelse(signbit(V4F64(v4f64)), V4F64(v4f64), 42) ===
+    vifelse(signbit(V4F64(v4f64)), V4F64(v4f64), V4F64(42))
 for op in (fma, muladd)
     @test op(42, 42, V4F64(v4f64)) ===
         op(V4F64(42), V4F64(42), V4F64(v4f64))