Skip to content

Commit

Permalink
Remove commented code
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrii Pavliuk committed Nov 7, 2023
1 parent 6da3886 commit e48c892
Show file tree
Hide file tree
Showing 11 changed files with 15 additions and 663 deletions.
49 changes: 2 additions & 47 deletions modules/nvidia_plugin/src/cuda/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,20 +132,6 @@ TransferNode CaptureInfo::addTransferNode(CUDA::DevicePointer<void *> dst,
return TransferNode{newNode, dst, src, size};
}

// InsertNode CaptureInfo::addInsertNode(std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams) {
// cudaGraphNode_t newNode;
// throwIfError(cudaGraphAddKernelNode(&newNode, capturingGraph_, deps_, depCount_, insertParams->getKnp()));
// throwIfError(cudaStreamUpdateCaptureDependencies(stream_.get(), &newNode, 1, 1));
// return InsertNode{newNode, std::move(insertParams)};
// }

// SliceNode CaptureInfo::addSliceNode(std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams) {
// cudaGraphNode_t newNode;
// throwIfError(cudaGraphAddKernelNode(&newNode, capturingGraph_, deps_, depCount_, sliceParams->getKnp()));
// throwIfError(cudaStreamUpdateCaptureDependencies(stream_.get(), &newNode, 1, 1));
// return SliceNode{newNode, std::move(sliceParams)};
// }

KernelNode CaptureInfo::addKernelNode(const cudaKernelNodeParams& knp) {
cudaGraphNode_t newNode;
throwIfError(cudaGraphAddKernelNode(&newNode, capturingGraph_, deps_, depCount_, &knp));
Expand All @@ -161,8 +147,7 @@ void UploadNode::update_src(const GraphExec& exec, const void *src) {
}
}

UploadNode::UploadNode(cudaGraphNode_t node, DevicePointer<void*> dst, const void *src,
std::size_t size)
UploadNode::UploadNode(cudaGraphNode_t node, DevicePointer<void*> dst, const void *src, std::size_t size)
: node_{node},
dst_{dst},
src_{src},
Expand All @@ -177,8 +162,7 @@ void DownloadNode::update_dst(const GraphExec& exec, void *dst) {
}
}

DownloadNode::DownloadNode(cudaGraphNode_t node, void *dst, DevicePointer<const void*> src,
std::size_t size)
DownloadNode::DownloadNode(cudaGraphNode_t node, void *dst, DevicePointer<const void*> src, std::size_t size)
: node_{node},
dst_{dst},
src_{src},
Expand All @@ -201,28 +185,7 @@ CUDA::TransferNode::TransferNode(cudaGraphNode_t node, CUDA::DevicePointer<void
size_{size} {
}

// void CUDA::InsertNode::update_params(const GraphExec &exec, std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams) {
// insert_params_ = std::move(insertParams);
// throwIfError(cudaGraphExecKernelNodeSetParams(exec.get(), node_, insert_params_->getKnp()));
// }

// CUDA::InsertNode::InsertNode(cudaGraphNode_t node, std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams)
// : node_{node},
// insert_params_{std::move(insertParams)} {
// }

// void CUDA::SliceNode::update_params(const GraphExec &exec, std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams) {
// slice_params_ = std::move(sliceParams);
// throwIfError(cudaGraphExecKernelNodeSetParams(exec.get(), node_, slice_params_->getKnp()));
// }

// CUDA::SliceNode::SliceNode(cudaGraphNode_t node, std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams)
// : node_{node},
// slice_params_{std::move(sliceParams)} {
// }

CUDA::KernelNode::KernelNode(cudaGraphNode_t node, const cudaKernelNodeParams &knp) : node_{node}, knp_{&knp} {}
// CUDA::KernelNode::KernelNode(cudaGraphNode_t node) : node_{node} {}

bool UploadNode::operator==(const UploadNode &rhs) const {
return size_ == rhs.size_ && src_ == rhs.src_ && dst_.get() == rhs.dst_.get() && node_ == rhs.node_;
Expand All @@ -236,12 +199,4 @@ bool CUDA::TransferNode::operator==(const TransferNode &rhs) const {
return size_ == rhs.size_ && src_.get() == rhs.src_.get() && dst_.get() == rhs.dst_.get() && node_ == rhs.node_;
}

// bool CUDA::InsertNode::operator==(const InsertNode &rhs) const {
// return insert_params_ == rhs.insert_params_ && node_ == rhs.node_;
// }

// bool CUDA::SliceNode::operator==(const SliceNode &rhs) const {
// return slice_params_ == rhs.slice_params_ && node_ == rhs.node_;
// }

} // namespace CUDA
41 changes: 0 additions & 41 deletions modules/nvidia_plugin/src/cuda/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,54 +131,15 @@ class TransferNode {
std::size_t size_;
};

// class InsertNode {
// friend CaptureInfo;

// public:
// // InsertNode() = default;
// // InsertNode(const InsertNode&) = default;
// // InsertNode(InsertNode&&) = default;
// void update_params(const GraphExec& exec, std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams);
// // bool operator==(const InsertNode& rhs) const;

// private:
// InsertNode(cudaGraphNode_t node, std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> kernelParams);
// cudaGraphNode_t node_;
// std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insert_params_;
// // cudaKernelNodeParams knp_;
// };

// class SliceNode {
// friend CaptureInfo;

// public:
// // SliceNode() = default;
// // SliceNode(const SliceNode&) = default;
// // SliceNode(SliceNode&&) = default;
// void update_params(const GraphExec& exec, std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams);
// // bool operator==(const InsertNode& rhs) const;

// private:
// SliceNode(cudaGraphNode_t node, std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> kernelParams);
// cudaGraphNode_t node_;
// // std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> slice_params_;
// // cudaKernelNodeParams knp_;
// };

class KernelNode {
friend CaptureInfo;

public:
// void update_params(const GraphExec& exec);
// void KernelNode::update_params(const GraphExec &exec) {
inline void update_params(const GraphExec& exec, const cudaKernelNodeParams& knp) {
// slice_params_ = std::move(sliceParams);
knp_ = &knp;
throwIfError(cudaGraphExecKernelNodeSetParams(exec.get(), node_, knp_));
}

// bool operator==(const InsertNode& rhs) const;

private:
KernelNode(cudaGraphNode_t node, const cudaKernelNodeParams& knp);
cudaGraphNode_t node_;
Expand All @@ -191,8 +152,6 @@ class CaptureInfo {
UploadNode addUploadNode(CUDA::DevicePointer<void*> dst, const void* src, std::size_t size);
DownloadNode addDownloadNode(void* dst, CUDA::DevicePointer<const void*> src, std::size_t size);
TransferNode addTransferNode(CUDA::DevicePointer<void*> dst, CUDA::DevicePointer<const void*> src, std::size_t size);
// InsertNode addInsertNode(std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams);
// SliceNode addSliceNode(std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams);
KernelNode addKernelNode(const cudaKernelNodeParams& knp);

private:
Expand Down
70 changes: 0 additions & 70 deletions modules/nvidia_plugin/src/cuda_graph_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,7 @@ void CudaGraphContext::start_next_graph_addition() {
}

void CudaGraphContext::start_ti_graph_addition(const std::string& ti_op_name) {
// OPENVINO_ASSERT(!ti_graphs_.is_initialized(), "Only one TI graph supported");
// ti_graphs_.emplace();
ti_graphs_[ti_op_name] = {};
// OPENVINO_ASSERT(ti_graphs_.get_transfers_count() == 0 &&
// ti_graphs_.get_slices_count() == 0 &&
// ti_graphs_.get_inserts_count() == 0,
// "ti_graphs_ hasn't been reset properly");
}

void CudaGraphContext::add_parameter(const std::string& tensorName,
Expand All @@ -50,37 +44,21 @@ void CudaGraphContext::add_transfer(const std::string& ti_op_name,
CUDA::DevicePointer<void*> dst,
CUDA::DevicePointer<const void*> src,
std::size_t size) {
// OPENVINO_ASSERT(ti_graphs_.is_initialized(), "TI graph not initialized");
ti_graphs_.at(ti_op_name).add_transfer(stream, dst, src, size);
}

// void CudaGraphContext::add_slice(const std::string& ti_op_name,
// const CUDA::Stream& stream,
// std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams) {
// // OPENVINO_ASSERT(ti_graphs_.is_initialized(), "TI graph not initialized");
// ti_graphs_.at(ti_op_name).add_slice(stream, std::move(sliceParams));
// }

void CudaGraphContext::add_kernel(const std::string& ti_op_name,
const CUDA::Stream& stream,
const cudaKernelNodeParams& knp) {
ti_graphs_.at(ti_op_name).add_kernel(stream, knp);
}

// void CudaGraphContext::add_insert(const std::string& ti_op_name,
// const CUDA::Stream& stream,
// std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams) {
// // OPENVINO_ASSERT(ti_graphs_.is_initialized(), "TI graph not initialized");
// ti_graphs_.at(ti_op_name).add_insert(stream, std::move(insertParams));
// }

void CudaGraphContext::add_graph(const CUDA::Graph& graph) {
OPENVINO_ASSERT(currentGraphIndex_ < graphs_.size(), "Graph index/vector size incosistency");
graphs_[currentGraphIndex_].set_graph(graph);
}

void CudaGraphContext::add_ti_graph(const std::string& ti_op_name, const CUDA::Graph& graph) {
// OPENVINO_ASSERT(ti_graphs_.is_initialized(), "TI graph not initialized");
ti_graphs_.at(ti_op_name).set_graph(graph);
}

Expand All @@ -95,27 +73,13 @@ void CudaGraphContext::update_capture(const TensorMappingContext& context) {
}
}

// void CudaGraphContext::update_slice(const std::string& ti_op_name,
// std::size_t index,
// std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams) const {
// OPENVINO_ASSERT(ti_graphs_.at(ti_op_name).is_initialized(), "TI graph not initialized");
// ti_graphs_.at(ti_op_name).update_slice(index, std::move(sliceParams));
// }

void CudaGraphContext::update_kernel(const std::string& ti_op_name,
std::size_t index,
const cudaKernelNodeParams& knp) const {
OPENVINO_ASSERT(ti_graphs_.at(ti_op_name).is_initialized(), "TI graph not initialized");
ti_graphs_.at(ti_op_name).update_kernel(index, knp);
}

// void CudaGraphContext::update_insert(const std::string& ti_op_name,
// std::size_t index,
// std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams) const {
// OPENVINO_ASSERT(ti_graphs_.at(ti_op_name).is_initialized(), "TI graph not initialized");
// ti_graphs_.at(ti_op_name).update_insert(index, std::move(insertParams));
// }

void CudaGraphContext::launch(std::size_t index, const CUDA::Stream& stream) const {
currentGraphIndex_ = index;
OPENVINO_ASSERT(currentGraphIndex_ < graphs_.size(), "Graph index/vector size incosistency");
Expand Down Expand Up @@ -147,19 +111,10 @@ std::size_t CudaGraphContext::get_transfers_count(const std::string& ti_op_name)
return ti_graphs_.at(ti_op_name).get_transfers_count();
}


// std::size_t CudaGraphContext::get_slices_count(const std::string& ti_op_name) const {
// return ti_graphs_.at(ti_op_name).get_slices_count();
// }

std::size_t CudaGraphContext::get_kernels_count(const std::string& ti_op_name) const {
return ti_graphs_.at(ti_op_name).get_kernels_count();
}

// std::size_t CudaGraphContext::get_inserts_count(const std::string& ti_op_name) const {
// return ti_graphs_.at(ti_op_name).get_inserts_count();
// }

std::size_t CudaGraphContext::get_graphs_count() const {
return graphs_.size();
}
Expand Down Expand Up @@ -190,23 +145,11 @@ void CudaGraphContext::CudaGraphInfo::add_transfer(const CUDA::Stream& stream,
transferNodes_.emplace_back(captureInfo.addTransferNode(dst, src, size));
}

// void CudaGraphContext::CudaGraphInfo::add_slice(const CUDA::Stream& stream,
// std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams) {
// CUDA::CaptureInfo captureInfo{stream};
// sliceNodes_.emplace_back(captureInfo.addSliceNode(std::move(sliceParams)));
// }

void CudaGraphContext::CudaGraphInfo::add_kernel(const CUDA::Stream& stream, const cudaKernelNodeParams& knp) {
CUDA::CaptureInfo captureInfo{stream};
kernelNodes_.emplace_back(captureInfo.addKernelNode(knp));
}

// void CudaGraphContext::CudaGraphInfo::add_insert(const CUDA::Stream& stream,
// std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams) {
// CUDA::CaptureInfo captureInfo{stream};
// insertNodes_.emplace_back(captureInfo.addInsertNode(std::move(insertParams)));
// }

void CudaGraphContext::CudaGraphInfo::set_graph(const CUDA::Graph& graph) {
graph_.emplace(graph);
graphExec_.emplace(graph);
Expand All @@ -223,20 +166,10 @@ void CudaGraphContext::CudaGraphInfo::update_capture(const TensorMappingContext&
}
}

// void CudaGraphContext::CudaGraphInfo::update_slice(std::size_t index,
// std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams) {
// sliceNodes_[index].update_params(graphExec_.value(), std::move(sliceParams));
// }

void CudaGraphContext::CudaGraphInfo::update_kernel(std::size_t index, const cudaKernelNodeParams& knp) {
kernelNodes_[index].update_params(graphExec_.value(), knp);
}

// void CudaGraphContext::CudaGraphInfo::update_insert(
// std::size_t index, std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams) {
// insertNodes_[index].update_params(graphExec_.value(), std::move(insertParams));
// }

void CudaGraphContext::CudaGraphInfo::launch(const CUDA::Stream& stream) const { graphExec_.value().launch(stream); }

std::size_t CudaGraphContext::CudaGraphInfo::get_params_count() const { return parameterNodes_.size(); }
Expand All @@ -245,11 +178,8 @@ std::size_t CudaGraphContext::CudaGraphInfo::get_results_count() const { return

std::size_t CudaGraphContext::CudaGraphInfo::get_transfers_count() const { return transferNodes_.size(); }

// std::size_t CudaGraphContext::CudaGraphInfo::get_slices_count() const { return sliceNodes_.size(); }
std::size_t CudaGraphContext::CudaGraphInfo::get_kernels_count() const { return kernelNodes_.size(); }

// std::size_t CudaGraphContext::CudaGraphInfo::get_inserts_count() const { return insertNodes_.size(); }

bool operator==(const CudaGraphContext::CudaGraphInfo& lhs, const CudaGraphContext::CudaGraphInfo& rhs) {
return lhs.graph_ == rhs.graph_ && lhs.graphExec_ == rhs.graphExec_ && lhs.parameterNodes_ == rhs.parameterNodes_ &&
lhs.resultNodes_ == rhs.resultNodes_;
Expand Down
29 changes: 0 additions & 29 deletions modules/nvidia_plugin/src/cuda_graph_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,33 +37,19 @@ class CudaGraphContext {
CUDA::DevicePointer<const void*> src,
std::size_t size);

// void add_slice(const std::string& ti_op_name,
// const CUDA::Stream& stream,
// std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams);

void add_kernel(const std::string& ti_op_name,
const CUDA::Stream& stream,
const cudaKernelNodeParams& knp);

// void add_insert(const std::string& ti_op_name,
// const CUDA::Stream& stream,
// std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams);

void add_graph(const CUDA::Graph& graph);
void add_ti_graph(const std::string& ti_op_name, const CUDA::Graph& graph);

bool is_initialized() const;

void update_capture(const TensorMappingContext& context);
// void update_slice(const std::string& ti_op_name,
// std::size_t index,
// std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams) const;
void update_kernel(const std::string& ti_op_name,
std::size_t index,
const cudaKernelNodeParams& knp) const;
// void update_insert(const std::string& ti_op_name,
// std::size_t index,
// std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams) const;

void launch(std::size_t index, const CUDA::Stream& stream) const;
void launch_ti_graph(const std::string& ti_op_name, const CUDA::Stream& stream) const;
Expand All @@ -72,9 +58,7 @@ class CudaGraphContext {
std::size_t get_results_count() const;

std::size_t get_transfers_count(const std::string& ti_op_name) const;
// std::size_t get_slices_count(const std::string& ti_op_name) const;
std::size_t get_kernels_count(const std::string& ti_op_name) const;
// std::size_t get_inserts_count(const std::string& ti_op_name) const;

std::size_t get_graphs_count() const;

Expand All @@ -84,11 +68,6 @@ class CudaGraphContext {
private:
class CudaGraphInfo {
public:
// // TODO: think about this
// CudaGraphInfo() = default;
// CudaGraphInfo(const CudaGraphInfo&) = default;
// CudaGraphInfo(CudaGraphInfo&&) = default;

void add_parameter(const std::string& tensorName,
const CUDA::Stream& stream,
CUDA::DevicePointer<void*> dst,
Expand All @@ -106,28 +85,22 @@ class CudaGraphContext {
CUDA::DevicePointer<const void*> src,
std::size_t size);

// void add_slice(const CUDA::Stream& stream, std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams);
void add_kernel(const CUDA::Stream& stream, const cudaKernelNodeParams& knp);
// void add_insert(const CUDA::Stream& stream, std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams);

void set_graph(const CUDA::Graph& graph);

bool is_initialized() const;

void update_capture(const TensorMappingContext& context);
// void update_slice(std::size_t index, std::unique_ptr<ov::nvidia_gpu::kernel::Slice::Params> sliceParams);
void update_kernel(std::size_t index, const cudaKernelNodeParams& knp);
// void update_insert(std::size_t index, std::unique_ptr<ov::nvidia_gpu::kernel::Insert::Params> insertParams);

void launch(const CUDA::Stream& stream) const;

std::size_t get_params_count() const;
std::size_t get_results_count() const;

std::size_t get_transfers_count() const;
// std::size_t get_slices_count() const;
std::size_t get_kernels_count() const;
// std::size_t get_inserts_count() const;

friend bool operator==(const CudaGraphInfo& lhs, const CudaGraphInfo& rhs);
friend bool operator!=(const CudaGraphInfo& lhs, const CudaGraphInfo& rhs);
Expand All @@ -139,9 +112,7 @@ class CudaGraphContext {
std::map<std::string, CUDA::DownloadNode> resultNodes_;

std::vector<CUDA::TransferNode> transferNodes_;
// std::vector<CUDA::SliceNode> sliceNodes_;
std::vector<CUDA::KernelNode> kernelNodes_;
// std::vector<CUDA::InsertNode> insertNodes_;
};

friend bool operator==(const CudaGraphInfo& lhs, const CudaGraphInfo& rhs);
Expand Down
Loading

0 comments on commit e48c892

Please sign in to comment.