add pow

ShigekiKarita · Jun 12, 2018 · dc51c09 · dc51c09
1 parent 9f6086a
commit dc51c09
Show file tree

Hide file tree

Showing 4 changed files with 206 additions and 6 deletions.
diff --git a/kernel/kernel.d b/kernel/kernel.d
@@ -78,5 +78,9 @@ nothrow @nogc extern(C++):
 @kernel void tan(float* x, uint len, uint ndim, const uint* shape, const uint* strides);
 
 @kernel void pow(float power, float* x, uint len, uint ndim, const uint* shape, const uint* strides);
+@kernel void powGrad(float power, float* x, uint len, uint ndim, const uint* shape, const uint* strides);
 
 @kernel void neg(float* x, uint len, uint ndim, const uint* shape, const uint* strides);
+
+@kernel void abs(float* x, uint len, uint ndim, const uint* shape, const uint* strides);
+@kernel void absGrad(float* x, uint len, uint ndim, const uint* shape, const uint* strides);
diff --git a/kernel/kernel_lib.cu b/kernel/kernel_lib.cu
@@ -184,6 +184,15 @@ GRAIN_GLOBAL void pow(float power, float* x, uint len, uint ndim, const uint* sh
     }
 }
 
+GRAIN_GLOBAL void powGrad(float power, float* x, uint len, uint ndim, const uint* shape, const uint* strides) {
+    uint idx;
+    GRAIN_PARALLEL_FOR(i, len) {
+        idx = indexof(i, ndim, shape, strides);
+        x[idx] = power * powf(x[idx], power-1);
+    }
+}
+
+
 
 GRAIN_GLOBAL void neg(float* x, uint len, uint ndim, const uint* shape, const uint* strides) {
     uint idx;
@@ -192,3 +201,20 @@ GRAIN_GLOBAL void neg(float* x, uint len, uint ndim, const uint* shape, const ui
         x[idx] = -x[idx];
     }
 }
+
+
+GRAIN_ND_EACH(abs, fabsf)
+
+GRAIN_GLOBAL void absGrad(float* x, uint len, uint ndim, const uint* shape, const uint* strides) {
+    uint idx;
+    GRAIN_PARALLEL_FOR(i, len) {
+        idx = indexof(i, ndim, shape, strides);
+        if (x[idx] > 0) {
+            x[idx] = 1.0f;
+            return;
+        }
+        if (x[idx] < 0) {
+            x[idx] = -1.0;
+        }
+    }
+}
diff --git a/source/grain/chain.d b/source/grain/chain.d
@@ -123,17 +123,31 @@ auto tan(T, size_t dim, alias Storage)(Variable!(T, dim, Storage) x) {
     return func.applyForward(x);
 }
 
+/// abs
+auto abs(T, size_t dim, alias Storage)(Variable!(T, dim, Storage) x) {
+    import grain.functions.unary : Abs;
+    auto func = new Abs!(T, dim);
+    return func.applyForward(x);
+}
+
+/// pow
+auto pow(T, size_t dim, alias Storage)(Variable!(T, dim, Storage) x, T power) {
+    import grain.functions.unary : Pow;
+    auto func = new Pow!(T, dim)(power);
+    return func.applyForward(x);
+}
+
 
 /// test fast math functions
 unittest {
     import grain.testing;
     import numir;
     import mir.ndslice;
     import std.meta;
-    foreach (f; AliasSeq!(sigmoid, tanh, reciprocal, neg, exp, log, sin, cos, tan)) {
+    foreach (f; AliasSeq!(sigmoid, tanh, reciprocal, neg, exp, log, sin, cos, tan, x => pow(x, 2.0f))) {
         auto hx = uniform!float(2, 3).slice.variable(true);
         auto hgy = uniform!float(2, 3).slice.variable;
-        gradCheckChain!f(hx, hgy, 1e-3, 1e-2, 1e-2);
+        gradCheckChain!f(hx, hgy, 1e-3, 1e-2, 5e-2);
     }
 }
 

diff --git a/source/grain/functions/unary.d b/source/grain/functions/unary.d
@@ -4,7 +4,6 @@
    TODO: support cudnn functions (see PDF manual in .deb for detail https://developer.nvidia.com/cudnn)
    - activation (e.g., clipped-relu, elu), cudnnActivationForward/Backward
    - (non-log) softmax, cudnnSoftmaxForward/Backward
-   - scale, cudnnScaleTensor
    - sqrt not, cudnnOpTensor
    - transform (e.g., contiguous or permute strides), cudnnTransformTensor
    - reshape (i.e., view), ...???
@@ -579,7 +578,7 @@ unittest {
     auto hy = hfunc.forward(hx);
     auto hgy = uniform!float(2, 2).slice.variable;
     auto hgx = hfunc.backward(hgy);
-    gradCheck(hfunc, hx, hgy, 1e-3, 1e-3, 1e-3);
+    gradCheck(hfunc, hx, hgy, 1e-3, 1e-2, 1e-2);
 
     version (grain_cuda) {
         Reciprocal!(float, 2) dfunc;
@@ -704,7 +703,7 @@ unittest {
     auto hy = hfunc.forward(hx);
     auto hgy = uniform!float(2, 3).slice.variable;
     auto hgx = hfunc.backward(hgy);
-    gradCheck(hfunc, hx, hgy);
+    gradCheck(hfunc, hx, hgy, 1e-3, 1e-3, 1e-2);
     assert(approxEqual(hy.sliced, hx.sliced.map!log));
 
     version (grain_cuda) {
@@ -851,7 +850,7 @@ unittest {
     auto hy = hfunc.forward(hx);
     auto hgy = uniform!float(2, 3).slice.variable;
     auto hgx = hfunc.backward(hgy);
-    gradCheck(hfunc, hx, hgy);
+    gradCheck(hfunc, hx, hgy, 1e-3, 1e-3, 1e-3);
     assert(approxEqual(hy.sliced, hx.sliced.map!cos));
 
     version (grain_cuda) {
@@ -1059,3 +1058,160 @@ unittest {
         assert(approxEqual(dgx.to!HostStorage.sliced, hgx.sliced));
     }
 }
+
+
+/// y = abs x
+struct Abs(T, size_t dim) {
+    import mir.ndslice : slice, map;
+
+    mixin FunctionCommon;
+    Variable!(T, dim, HostStorage) hx;
+
+    auto forward(Variable!(T, dim, HostStorage) x) {
+        import mir.math : fabs;
+        this.hx = x; // if train
+        return slice(x.sliced.map!fabs).variable(x.requiresGrad);
+    }
+
+    auto backward(Variable!(T, dim, HostStorage) gy) {
+        auto gx = gy.dup;
+        gx.sliced[] *= this.hx.sliced.map!(a => a == 0f ? 0f : (a > 0f ? 1f : -1f));
+        return gx;
+    }
+
+    version (grain_cuda) {
+        Variable!(T, dim, DeviceStorage) dx;
+
+        auto forward(Variable!(T, dim, DeviceStorage) x) {
+            import grain.kernel : abs;
+            auto y = x.dup;
+            unaryFunc!abs(y);
+            this.dx = x; // if train
+            return y;
+        }
+
+        auto backward(Variable!(T, dim, DeviceStorage) gy) {
+            import grain.kernel : absGrad;
+            auto gx = this.dx.dup;
+            unaryFunc!absGrad(gx);
+            return gy * gx;
+        }
+    }
+}
+
+/// test abs simple case, gradcheck and cpu/cuda equality
+unittest {
+    import grain.testing;
+    import std.typecons;
+    import numir;
+    import mir.ndslice;
+
+    auto xs = [[-1.0f, 2.0f, -3.0f], [1.0f, 0.0f, 0.0f]].nparray;
+    auto ys = [[1.0f, 2.0f, 3.0f], [1.0f, 0.0f, 0.0f]].nparray;
+    auto hfunc = Abs!(float, 2)();
+    auto hx = xs.variable;
+    auto hy = hfunc.forward(hx);
+    assert(approxEqual(hy.sliced, ys));
+
+    auto gxs = [[-0.1f, 0.2f, -0.3f], [0.5f, 0.0f, 0.0f]].nparray;
+    auto gys = [[0.1f, 0.2f, 0.3f], [0.5f, 0.6f, 0.7f]].nparray;
+    auto hgy = gys.variable;
+    auto hgx = hfunc.backward(hgy);
+    assert(approxEqual(hgx.sliced, gxs));
+
+    version (grain_cuda) {
+        auto dfunc = Abs!(float, 2)();
+        auto dy = dfunc.forward(hx.to!DeviceStorage);
+        assert(approxEqual(dy.to!HostStorage.sliced, hy.sliced));
+        auto dgx = dfunc.backward(hgy.to!DeviceStorage);
+        assert(approxEqual(dgx.to!HostStorage.sliced, hgx.sliced));
+    }
+}
+
+
+/// y = pow x
+struct Pow(T, size_t dim) {
+    import mir.ndslice : slice, map;
+
+    mixin FunctionCommon;
+
+    T power;
+    Variable!(T, dim, HostStorage) hx;
+
+    this(T power) {
+        this.power = power;
+    }
+
+    auto forward(Variable!(T, dim, HostStorage) x) {
+        import mir.math.common : pow;
+        auto y = slice(x.sliced.map!(a => pow(a, this.power))).variable(x.requiresGrad);
+        this.hx = x; // TODO if train
+        return y;
+    }
+
+    auto backward(Variable!(T, dim, HostStorage) gy) {
+        import mir.math.common : pow;
+        auto gx = gy.dup;
+        gx.sliced[] *= this.hx.sliced.map!(a => this.power * pow(a, this.power - 1));
+        return gx;
+    }
+
+    version (grain_cuda) {
+        Variable!(T, dim, DeviceStorage) dx;
+
+        auto forward(Variable!(T, dim, DeviceStorage) _x) {
+            this.dx = _x;
+            auto x = _x.dup;
+            import grain.kernel : pow;
+            auto shape = CuPtr!uint(x.shape[0..$]);
+            auto strides = CuPtr!int(x.strides[0..$]);
+            auto ndim = cast(uint) dim;
+            auto len = cast(uint) x.data.length;
+
+            Global.kernel!pow
+                .call(this.power, x.data.ptr, len, ndim, shape.ptr, strides.ptr)
+                .launch(len);
+            return x;
+        }
+
+        auto backward(Variable!(T, dim, DeviceStorage) gy) {
+            auto x = this.dx.dup;
+            import grain.kernel : powGrad;
+            auto shape = CuPtr!uint(x.shape[0..$]);
+            auto strides = CuPtr!int(x.strides[0..$]);
+            auto ndim = cast(uint) dim;
+            auto len = cast(uint) x.data.length;
+
+            Global.kernel!powGrad
+                .call(this.power, x.data.ptr, len, ndim, shape.ptr, strides.ptr)
+                .launch(len);
+            return gy * x;
+        }
+    }
+}
+
+///
+unittest {
+    import grain.testing;
+    import std.typecons;
+    import numir;
+    import mir.ndslice;
+    import mir.math : pow;
+
+    auto p = 2.0f;
+    auto hfunc = Pow!(float, 2)(p);
+    auto hx = uniform!float(2, 3).slice.variable;
+    auto hy = hfunc.forward(hx);
+    auto hgy = uniform!float(2, 3).slice.variable;
+    auto hgx = hfunc.backward(hgy);
+    gradCheck(hfunc, hx, hgy, 1e-3, 1e-3, 1e-3);
+    assert(approxEqual(hy.sliced, hx.sliced.map!(a => pow(a, p))));
+
+    version (grain_cuda) {
+        auto dfunc = Pow!(float, 2)(p);
+        auto dy = dfunc.forward(hx.to!DeviceStorage);
+        assert(approxEqual(dy.to!HostStorage.sliced, hy.sliced));
+        auto dgx = dfunc.backward(hgy.to!DeviceStorage);
+        assert(approxEqual(dgx.to!HostStorage.sliced, hgx.sliced));
+    }
+}