Skip to content

Commit

Permalink
add pow
Browse files Browse the repository at this point in the history
  • Loading branch information
ShigekiKarita committed Jun 12, 2018
1 parent 9f6086a commit dc51c09
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 6 deletions.
4 changes: 4 additions & 0 deletions kernel/kernel.d
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,9 @@ nothrow @nogc extern(C++):
@kernel void tan(float* x, uint len, uint ndim, const uint* shape, const uint* strides);

@kernel void pow(float power, float* x, uint len, uint ndim, const uint* shape, const uint* strides);
@kernel void powGrad(float power, float* x, uint len, uint ndim, const uint* shape, const uint* strides);

@kernel void neg(float* x, uint len, uint ndim, const uint* shape, const uint* strides);

@kernel void abs(float* x, uint len, uint ndim, const uint* shape, const uint* strides);
@kernel void absGrad(float* x, uint len, uint ndim, const uint* shape, const uint* strides);
26 changes: 26 additions & 0 deletions kernel/kernel_lib.cu
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,15 @@ GRAIN_GLOBAL void pow(float power, float* x, uint len, uint ndim, const uint* sh
}
}

GRAIN_GLOBAL void powGrad(float power, float* x, uint len, uint ndim, const uint* shape, const uint* strides) {
uint idx;
GRAIN_PARALLEL_FOR(i, len) {
idx = indexof(i, ndim, shape, strides);
x[idx] = power * powf(x[idx], power-1);
}
}



GRAIN_GLOBAL void neg(float* x, uint len, uint ndim, const uint* shape, const uint* strides) {
uint idx;
Expand All @@ -192,3 +201,20 @@ GRAIN_GLOBAL void neg(float* x, uint len, uint ndim, const uint* shape, const ui
x[idx] = -x[idx];
}
}


GRAIN_ND_EACH(abs, fabsf)

GRAIN_GLOBAL void absGrad(float* x, uint len, uint ndim, const uint* shape, const uint* strides) {
uint idx;
GRAIN_PARALLEL_FOR(i, len) {
idx = indexof(i, ndim, shape, strides);
if (x[idx] > 0) {
x[idx] = 1.0f;
return;
}
if (x[idx] < 0) {
x[idx] = -1.0;
}
}
}
18 changes: 16 additions & 2 deletions source/grain/chain.d
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,31 @@ auto tan(T, size_t dim, alias Storage)(Variable!(T, dim, Storage) x) {
return func.applyForward(x);
}

/// abs
auto abs(T, size_t dim, alias Storage)(Variable!(T, dim, Storage) x) {
import grain.functions.unary : Abs;
auto func = new Abs!(T, dim);
return func.applyForward(x);
}

/// pow
auto pow(T, size_t dim, alias Storage)(Variable!(T, dim, Storage) x, T power) {
import grain.functions.unary : Pow;
auto func = new Pow!(T, dim)(power);
return func.applyForward(x);
}


/// test fast math functions
unittest {
import grain.testing;
import numir;
import mir.ndslice;
import std.meta;
foreach (f; AliasSeq!(sigmoid, tanh, reciprocal, neg, exp, log, sin, cos, tan)) {
foreach (f; AliasSeq!(sigmoid, tanh, reciprocal, neg, exp, log, sin, cos, tan, x => pow(x, 2.0f))) {
auto hx = uniform!float(2, 3).slice.variable(true);
auto hgy = uniform!float(2, 3).slice.variable;
gradCheckChain!f(hx, hgy, 1e-3, 1e-2, 1e-2);
gradCheckChain!f(hx, hgy, 1e-3, 1e-2, 5e-2);
}
}

Expand Down
164 changes: 160 additions & 4 deletions source/grain/functions/unary.d
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
TODO: support cudnn functions (see PDF manual in .deb for detail https://developer.nvidia.com/cudnn)
- activation (e.g., clipped-relu, elu), cudnnActivationForward/Backward
- (non-log) softmax, cudnnSoftmaxForward/Backward
- scale, cudnnScaleTensor
- sqrt not, cudnnOpTensor
- transform (e.g., contiguous or permute strides), cudnnTransformTensor
- reshape (i.e., view), ...???
Expand Down Expand Up @@ -579,7 +578,7 @@ unittest {
auto hy = hfunc.forward(hx);
auto hgy = uniform!float(2, 2).slice.variable;
auto hgx = hfunc.backward(hgy);
gradCheck(hfunc, hx, hgy, 1e-3, 1e-3, 1e-3);
gradCheck(hfunc, hx, hgy, 1e-3, 1e-2, 1e-2);

version (grain_cuda) {
Reciprocal!(float, 2) dfunc;
Expand Down Expand Up @@ -704,7 +703,7 @@ unittest {
auto hy = hfunc.forward(hx);
auto hgy = uniform!float(2, 3).slice.variable;
auto hgx = hfunc.backward(hgy);
gradCheck(hfunc, hx, hgy);
gradCheck(hfunc, hx, hgy, 1e-3, 1e-3, 1e-2);
assert(approxEqual(hy.sliced, hx.sliced.map!log));

version (grain_cuda) {
Expand Down Expand Up @@ -851,7 +850,7 @@ unittest {
auto hy = hfunc.forward(hx);
auto hgy = uniform!float(2, 3).slice.variable;
auto hgx = hfunc.backward(hgy);
gradCheck(hfunc, hx, hgy);
gradCheck(hfunc, hx, hgy, 1e-3, 1e-3, 1e-3);
assert(approxEqual(hy.sliced, hx.sliced.map!cos));

version (grain_cuda) {
Expand Down Expand Up @@ -1059,3 +1058,160 @@ unittest {
assert(approxEqual(dgx.to!HostStorage.sliced, hgx.sliced));
}
}


/// y = abs x
struct Abs(T, size_t dim) {
import mir.ndslice : slice, map;

mixin FunctionCommon;
Variable!(T, dim, HostStorage) hx;

auto forward(Variable!(T, dim, HostStorage) x) {
import mir.math : fabs;
this.hx = x; // if train
return slice(x.sliced.map!fabs).variable(x.requiresGrad);
}

auto backward(Variable!(T, dim, HostStorage) gy) {
auto gx = gy.dup;
gx.sliced[] *= this.hx.sliced.map!(a => a == 0f ? 0f : (a > 0f ? 1f : -1f));
return gx;
}

version (grain_cuda) {
Variable!(T, dim, DeviceStorage) dx;

auto forward(Variable!(T, dim, DeviceStorage) x) {
import grain.kernel : abs;
auto y = x.dup;
unaryFunc!abs(y);
this.dx = x; // if train
return y;
}

auto backward(Variable!(T, dim, DeviceStorage) gy) {
import grain.kernel : absGrad;
auto gx = this.dx.dup;
unaryFunc!absGrad(gx);
return gy * gx;
}
}
}

/// test abs simple case, gradcheck and cpu/cuda equality
unittest {
import grain.testing;
import std.typecons;
import numir;
import mir.ndslice;

auto xs = [[-1.0f, 2.0f, -3.0f], [1.0f, 0.0f, 0.0f]].nparray;
auto ys = [[1.0f, 2.0f, 3.0f], [1.0f, 0.0f, 0.0f]].nparray;
auto hfunc = Abs!(float, 2)();
auto hx = xs.variable;
auto hy = hfunc.forward(hx);
assert(approxEqual(hy.sliced, ys));

auto gxs = [[-0.1f, 0.2f, -0.3f], [0.5f, 0.0f, 0.0f]].nparray;
auto gys = [[0.1f, 0.2f, 0.3f], [0.5f, 0.6f, 0.7f]].nparray;
auto hgy = gys.variable;
auto hgx = hfunc.backward(hgy);
assert(approxEqual(hgx.sliced, gxs));

version (grain_cuda) {
auto dfunc = Abs!(float, 2)();
auto dy = dfunc.forward(hx.to!DeviceStorage);
assert(approxEqual(dy.to!HostStorage.sliced, hy.sliced));
auto dgx = dfunc.backward(hgy.to!DeviceStorage);
assert(approxEqual(dgx.to!HostStorage.sliced, hgx.sliced));
}
}


/// y = pow x
struct Pow(T, size_t dim) {
import mir.ndslice : slice, map;

mixin FunctionCommon;

T power;
Variable!(T, dim, HostStorage) hx;

this(T power) {
this.power = power;
}

auto forward(Variable!(T, dim, HostStorage) x) {
import mir.math.common : pow;
auto y = slice(x.sliced.map!(a => pow(a, this.power))).variable(x.requiresGrad);
this.hx = x; // TODO if train
return y;
}

auto backward(Variable!(T, dim, HostStorage) gy) {
import mir.math.common : pow;
auto gx = gy.dup;
gx.sliced[] *= this.hx.sliced.map!(a => this.power * pow(a, this.power - 1));
return gx;
}

version (grain_cuda) {
Variable!(T, dim, DeviceStorage) dx;

auto forward(Variable!(T, dim, DeviceStorage) _x) {
this.dx = _x;
auto x = _x.dup;
import grain.kernel : pow;
auto shape = CuPtr!uint(x.shape[0..$]);
auto strides = CuPtr!int(x.strides[0..$]);
auto ndim = cast(uint) dim;
auto len = cast(uint) x.data.length;

Global.kernel!pow
.call(this.power, x.data.ptr, len, ndim, shape.ptr, strides.ptr)
.launch(len);
return x;
}

auto backward(Variable!(T, dim, DeviceStorage) gy) {
auto x = this.dx.dup;
import grain.kernel : powGrad;
auto shape = CuPtr!uint(x.shape[0..$]);
auto strides = CuPtr!int(x.strides[0..$]);
auto ndim = cast(uint) dim;
auto len = cast(uint) x.data.length;

Global.kernel!powGrad
.call(this.power, x.data.ptr, len, ndim, shape.ptr, strides.ptr)
.launch(len);
return gy * x;
}
}
}

///
unittest {
import grain.testing;
import std.typecons;
import numir;
import mir.ndslice;
import mir.math : pow;

auto p = 2.0f;
auto hfunc = Pow!(float, 2)(p);
auto hx = uniform!float(2, 3).slice.variable;
auto hy = hfunc.forward(hx);
auto hgy = uniform!float(2, 3).slice.variable;
auto hgx = hfunc.backward(hgy);
gradCheck(hfunc, hx, hgy, 1e-3, 1e-3, 1e-3);
assert(approxEqual(hy.sliced, hx.sliced.map!(a => pow(a, p))));

version (grain_cuda) {
auto dfunc = Pow!(float, 2)(p);
auto dy = dfunc.forward(hx.to!DeviceStorage);
assert(approxEqual(dy.to!HostStorage.sliced, hy.sliced));
auto dgx = dfunc.backward(hgy.to!DeviceStorage);
assert(approxEqual(dgx.to!HostStorage.sliced, hgx.sliced));
}
}

0 comments on commit dc51c09

Please sign in to comment.