Skip to content

Commit

Permalink
[NVIDIA] Change IsCudaGraphCompatible() interface to GetCudaGraphComp…
Browse files Browse the repository at this point in the history
…atibility() using enum
  • Loading branch information
Andrii Pavliuk committed Nov 15, 2023
1 parent e68fc18 commit 9a8412c
Show file tree
Hide file tree
Showing 109 changed files with 167 additions and 143 deletions.
30 changes: 17 additions & 13 deletions modules/nvidia_plugin/src/cuda_graph_topology_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ CudaGraphTopologyRunner::CudaGraphTopologyRunner(const CreationContext& context,
const auto totalSize = origSequence.size();
OPENVINO_ASSERT(totalSize != 0, "ExecSequence size is 0");

bool isLastOpCompatible = origSequence[0]->IsCudaGraphCompatible();
CudaGraphCompatibility lastOpCompatibility = origSequence[0]->GetCudaGraphCompatibility();
currentSequence.push_back(origSequence[0]);
for (size_t i = 1; i < totalSize; ++i) {
const auto& op = origSequence[i];
if (std::dynamic_pointer_cast<const TensorIteratorOp>(op) || op->IsCudaGraphCompatible() != isLastOpCompatible) {
isLastOpCompatible = !isLastOpCompatible;
if (auto c = op->GetCudaGraphCompatibility(); c != lastOpCompatibility) {
lastOpCompatibility = c;
sequences.emplace_back(std::move(currentSequence));
currentSequence.clear();
}
Expand All @@ -51,7 +51,7 @@ CudaGraphTopologyRunner::CudaGraphTopologyRunner(const CreationContext& context,
const auto& memoryManager = orig_subgraph_.memoryManager();
for (auto&& sequence : sequences) {
subgraphs_.emplace_back(context, model, std::move(sequence), memoryManager);
if (subgraphs_[subgraphs_.size() - 1].IsCudaGraphCompatible()) {
if (subgraphs_.back().GetCudaGraphCompatibility() != CudaGraphCompatibility::NONE) {
++cuda_graphs_count_;
}
}
Expand All @@ -62,7 +62,14 @@ void CudaGraphTopologyRunner::Run(InferenceRequestContext& context, const Device
auto& graphContext = context.getCudaGraphContext();
std::size_t graphIndex = 0;
for (auto& subgraph : subgraphs_) {
if (auto ti = getTI(subgraph)) {
auto compatibility = subgraph.GetCudaGraphCompatibility();
if (compatibility == CudaGraphCompatibility::FULL) {
graphContext.select_current_graph(graphIndex);
graphContext.get_current_graph_info().launch(stream);
graphIndex++;
} else if (compatibility == CudaGraphCompatibility::SPECIAL) {
// TODO: remove
auto ti = getTI(subgraph);
CUDA::DevicePointer<void*> mutableBuffer{memoryBlock.view().data()};
const auto& memoryManager = *subgraph.memoryManager();
const auto& inputTensors = memoryManager.inputTensorPointers(*ti, mutableBuffer);
Expand All @@ -71,10 +78,6 @@ void CudaGraphTopologyRunner::Run(InferenceRequestContext& context, const Device
graphContext.select_current_graph(graphIndex);
ti->ExecuteGraph(context, inputTensors, outputTensors, workBuffers);
graphIndex++;
} else if (subgraph.IsCudaGraphCompatible()) {
graphContext.select_current_graph(graphIndex);
graphContext.get_current_graph_info().launch(stream);
graphIndex++;
} else {
Workbuffers workbuffers{};
workbuffers.mutable_buffers.emplace_back(memoryBlock.view().data());
Expand All @@ -92,10 +95,8 @@ void CudaGraphTopologyRunner::Capture(InferenceRequestContext& context,
for (const auto& subgraph : subgraphs_) {
Workbuffers workbuffers{};
workbuffers.mutable_buffers.emplace_back(memoryBlock.view().data());
if (getTI(subgraph)) {
graphContext.add_new_graph_info();
subgraph.Capture(context, {}, {}, workbuffers);
} else if (subgraph.IsCudaGraphCompatible()) {
auto compatibility = subgraph.GetCudaGraphCompatibility();
if (compatibility == CudaGraphCompatibility::FULL) {
graphContext.add_new_graph_info();
CUDA::GraphCapture capture{stream};
{
Expand All @@ -104,6 +105,9 @@ void CudaGraphTopologyRunner::Capture(InferenceRequestContext& context,
}
const auto& graph = capture.getGraph();
graphContext.set_current_graph(graph);
} else if (compatibility == CudaGraphCompatibility::SPECIAL) {
graphContext.add_new_graph_info();
subgraph.Capture(context, {}, {}, workbuffers);
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions modules/nvidia_plugin/src/cuda_operation_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ namespace nvidia_gpu {
template <typename T>
using DevicePointer = CUDA::DevicePointer<T>;

enum class CudaGraphCompatibility { NONE, FULL, SPECIAL };

class IOperationExec {
public:
using Inputs = gsl::span<const CUDA::DevicePointer<const void*>>;
Expand All @@ -46,7 +48,7 @@ class IOperationExec {
Inputs inputTensors,
Outputs outputTensors,
const Workbuffers& workbuffers) const = 0;
virtual bool IsCudaGraphCompatible() const = 0;
virtual CudaGraphCompatibility GetCudaGraphCompatibility() const = 0;
virtual void InitSharedImmutableWorkbuffers(const Buffers&) = 0;
virtual WorkbufferRequest GetWorkBufferRequest() const = 0;
virtual const WorkbufferIds& GetWorkbufferIds() const = 0;
Expand Down Expand Up @@ -81,7 +83,7 @@ class OperationBase : public IOperationExec, public IOperationMeta, public std::
IndexCollection&& inputIds,
IndexCollection&& outputIds);

bool IsCudaGraphCompatible() const override { return false; }
CudaGraphCompatibility GetCudaGraphCompatibility() const override { return CudaGraphCompatibility::NONE; }

WorkbufferRequest GetWorkBufferRequest() const override {
return {}; // Most operators do not need workbuffers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ void ActivationForwardCuDnnOpBase::Execute(const InferenceRequestContext& contex
outputTensors[0].get());
}

bool ActivationForwardCuDnnOpBase::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility ActivationForwardCuDnnOpBase::GetCudaGraphCompatibility() const {
return CudaGraphCompatibility::FULL;
}

} // namespace nvidia_gpu
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class ActivationForwardCuDnnOpBase : public OperationCuDnn {
Outputs outputTensors,
const Workbuffers&) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

protected:
std::unique_ptr<CUDA::DnnActivationDescriptor> op_desc_;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/avgpool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void AvgPoolOp::Execute(const InferenceRequestContext& context,
outputs[PoolingImpl::output_index].get());
}

bool AvgPoolOp::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility AvgPoolOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

OPERATION_REGISTER(AvgPoolOp, AvgPool);

Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/avgpool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class AvgPoolOp : public OperationCuDnn {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

private:
PoolingImpl impl_;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/broadcast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ void BroadcastOp::Execute(const InferenceRequestContext& context,
(*kernel_)(stream, inputs[0].get(), broadcast_params_->mapper(workbuffers.immutable_buffers), outputs[0].get());
}

bool BroadcastOp::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility BroadcastOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

WorkbufferRequest BroadcastOp::GetWorkBufferRequest() const { return {immutable_buffer_sizes_, {}}; }

Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/broadcast.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class BroadcastOp : public OperationBase {
WorkbufferRequest GetWorkBufferRequest() const override;
void InitSharedImmutableWorkbuffers(const Buffers& buffers) override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

private:
std::vector<WorkbufferRequest::size_in_bytes_t> immutable_buffer_sizes_;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/clamp_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void ClampCudaOp::Execute(const InferenceRequestContext& context,
(*kernel_)(context.getThreadContext().stream().get(), inputTensors[0].get(), outputTensors[0].get());
}

bool ClampCudaOp::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility ClampCudaOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

} // namespace nvidia_gpu
} // namespace ov
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/clamp_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class ClampCudaOp : public OperationBase {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

private:
std::optional<kernel::Clamp> kernel_;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/clamp_cudnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void ClampCuDnnOp::Execute(const InferenceRequestContext& context,
outputTensors[0].get());
}

bool ClampCuDnnOp::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility ClampCuDnnOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

void ClampCuDnnOp::InitSharedImmutableWorkbuffers(const Buffers& buffers) {
switch (data_type_) {
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/clamp_cudnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ClampCuDnnOp : public OperationCuDnn {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;
void InitSharedImmutableWorkbuffers(const Buffers& buffers) override;
WorkbufferRequest GetWorkBufferRequest() const override;

Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/comparison.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Comparison::Comparison(const CreationContext& context,
threads_per_block};
}

bool Comparison::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility Comparison::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

void Comparison::Execute(const InferenceRequestContext& context,
Inputs inputs,
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/comparison.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Comparison : public OperationBase {
IndexCollection&& outputIds,
kernel::Comparison::Op_t operation_type);

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

private:
void calculateOffsets();
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ void ConcatOp::Execute(const InferenceRequestContext& context,
outputs[0].get());
}

bool ConcatOp::IsCudaGraphCompatible() const { return false; }
CudaGraphCompatibility ConcatOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::NONE; }

OPERATION_REGISTER(ConcatOp, Concat);
} // namespace nvidia_gpu
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/concat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class ConcatOp : public OperationBase {
WorkbufferRequest GetWorkBufferRequest() const override;
void InitSharedImmutableWorkbuffers(const Buffers&) override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

private:
size_t immutableWbSize() const { return concat_kernel_.value().immutableWbSize(); }
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void ConvertOp::Execute(const InferenceRequestContext& context,
(*convert_kernel_)(stream.get(), outputs[0].get(), inputs[0].get());
}

bool ConvertOp::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility ConvertOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

OPERATION_REGISTER(ConvertOp, Convert);

Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ConvertOp : public OperationBase {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

using Type_t = ov::element::Type_t;
using convert_t = void (*)(
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/convert_color_i420.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class I420ConvertColorBase : public OperationBase {
}
}

bool IsCudaGraphCompatible() const override { return true; }
CudaGraphCompatibility GetCudaGraphCompatibility() const override { return CudaGraphCompatibility::FULL; }

private:
std::optional<TKernel> kernel_;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/convert_color_nv12.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class NV12ConvertColorBase : public OperationBase {
}
}

bool IsCudaGraphCompatible() const override { return true; }
CudaGraphCompatibility GetCudaGraphCompatibility() const override { return CudaGraphCompatibility::FULL; }

private:
std::optional<TKernel> kernel_;
Expand Down
4 changes: 2 additions & 2 deletions modules/nvidia_plugin/src/ops/convolution_backprop_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ void ConvBackpropDataOp<T>::Execute(const InferenceRequestContext& context,
}

template <typename T>
bool ConvBackpropDataOp<T>::IsCudaGraphCompatible() const {
return true;
CudaGraphCompatibility ConvBackpropDataOp<T>::GetCudaGraphCompatibility() const {
return CudaGraphCompatibility::FULL;
}

OPERATION_REGISTER(ConvolutionBackpropDataOp, ConvolutionBackpropData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class ConvBackpropDataOp : public OperationCuDnn {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

void InitSharedImmutableWorkbuffers(const IOperationExec::Buffers&) override;
WorkbufferRequest GetWorkBufferRequest() const override;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/convolution_cudnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void ConvolutionCuDnn::Execute(const InferenceRequestContext& context,
throwIfError(status);
}

bool ConvolutionCuDnn::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility ConvolutionCuDnn::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

WorkbufferRequest ConvolutionCuDnn::GetWorkBufferRequest() const {
if (descs_.Algo().memory != 0)
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/convolution_cudnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ConvolutionCuDnn : public OperationCuDnn {

WorkbufferRequest GetWorkBufferRequest() const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

private:
Convolution::Details::ConvolutionDescriptorsCuDnn descs_;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/convolution_cudnn_be.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ void ConvolutionCuDnnBE::Execute(const InferenceRequestContext& context,
throwIfError(::cudnnBackendExecute(context.getThreadContext().dnnHandle().get(), plan->get(), variantPack->get()));
}

bool ConvolutionCuDnnBE::IsCudaGraphCompatible() const { return false; }
CudaGraphCompatibility ConvolutionCuDnnBE::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::NONE; }

std::shared_ptr<CUDA::DnnBETensorDescriptor> ConvolutionCuDnnBE::MakeTensorDescriptor(int64_t id,
cudnnDataType_t element_type,
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/convolution_cudnn_be.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ConvolutionCuDnnBE : public OperationCuDnn {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

WorkbufferRequest GetWorkBufferRequest() const override;

Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/cudnn_tensor_op_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ void CuDnnTensorOpBase::Execute(const InferenceRequestContext& context,
outputTensors[0].get());
}

bool CuDnnTensorOpBase::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility CuDnnTensorOpBase::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

CuDnnTensorOpBase::IoParams::IoParams(const ov::Node& node, const Type& io_type, int index)
: type_(convertDataType<cudnnDataType_t>(io_type == Type::INPUT ? node.get_input_element_type(index)
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/cudnn_tensor_op_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class CuDnnTensorOpBase : public OperationCuDnn {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

private:
struct IoParams {
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/detection_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void DetectionOutputOp::Execute(const InferenceRequestContext& context,
}
}

bool DetectionOutputOp::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility DetectionOutputOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

void DetectionOutputOp::InitSharedImmutableWorkbuffers(const Buffers& buffers) {
kernel_.value().initSharedImmutableWorkbuffers(buffers);
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/detection_output.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class DetectionOutputOp : public OperationBase {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

void InitSharedImmutableWorkbuffers(const Buffers& buffers) override;
WorkbufferRequest GetWorkBufferRequest() const override;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/elementwise_binary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class ElementwiseBinaryOp : public OperationBase {
static_cast<void*>(outputTensors[0].get()));
}

bool IsCudaGraphCompatible() const override { return true; }
CudaGraphCompatibility GetCudaGraphCompatibility() const override { return CudaGraphCompatibility::FULL; }

void InitSharedImmutableWorkbuffers(const IOperationExec::Buffers& buffers) override {
in0_broadcast_params_->initWorkbuffers(buffers);
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/elementwise_unary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class ElementwiseUnaryOp : public OperationBase {
(*kernel_)(stream.get(), inputTensors[0].get(), outputTensors[0].get());
}

bool IsCudaGraphCompatible() const override { return true; }
CudaGraphCompatibility GetCudaGraphCompatibility() const override { return CudaGraphCompatibility::FULL; }

private:
std::optional<Kernel> kernel_;
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/elu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void EluOp::Execute(const InferenceRequestContext& context,
(*kernel_)(stream.get(), inputTensors[0].get(), outputTensors[0].get());
}

bool EluOp::IsCudaGraphCompatible() const { return true; }
CudaGraphCompatibility EluOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }

OPERATION_REGISTER(EluOp, Elu);
} // namespace nvidia_gpu
Expand Down
2 changes: 1 addition & 1 deletion modules/nvidia_plugin/src/ops/elu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class EluOp : public OperationBase {
Outputs outputTensors,
const Workbuffers& workbuffers) const override;

bool IsCudaGraphCompatible() const override;
CudaGraphCompatibility GetCudaGraphCompatibility() const override;

private:
std::optional<kernel::Elu> kernel_;
Expand Down
Loading

0 comments on commit 9a8412c

Please sign in to comment.