Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add stream function #123

Merged
merged 1 commit into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ You can also install cupoch using pip on Jetson Nano.
Please set up Jetson using [jetpack](https://developer.nvidia.com/embedded/jetpack) and install some packages with apt.

```
sudo apt-get install libxinerama-dev libxcursor-dev libglu1-mesa-dev
sudo apt-get install xorg-dev libxinerama-dev libxcursor-dev libglu1-mesa-dev
pip3 install cupoch
```

Expand Down
22 changes: 18 additions & 4 deletions src/cupoch/registration/kabsch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,21 @@
using namespace cupoch;
using namespace cupoch::registration;

Eigen::Matrix4f_u cupoch::registration::Kabsch(const utility::device_vector<Eigen::Vector3f> &model,
const utility::device_vector<Eigen::Vector3f> &target,
const CorrespondenceSet &corres) {
return Kabsch(utility::GetStream(0), utility::GetStream(1), model, target,
corres);
}

Eigen::Matrix4f_u cupoch::registration::Kabsch(
cudaStream_t stream1, cudaStream_t stream2,
const utility::device_vector<Eigen::Vector3f> &model,
const utility::device_vector<Eigen::Vector3f> &target,
const CorrespondenceSet &corres) {
// Compute the center
auto res1 = thrust::async::reduce(
utility::exec_policy(utility::GetStream(0)),
utility::exec_policy(stream1),
thrust::make_permutation_iterator(
model.begin(),
thrust::make_transform_iterator(
Expand All @@ -51,7 +59,7 @@ Eigen::Matrix4f_u cupoch::registration::Kabsch(
element_get_functor<Eigen::Vector2i, 0>())),
Eigen::Vector3f(0.0, 0.0, 0.0), thrust::plus<Eigen::Vector3f>());
auto res2 = thrust::async::reduce(
utility::exec_policy(utility::GetStream(1)),
utility::exec_policy(stream2),
thrust::make_permutation_iterator(
target.begin(),
thrust::make_transform_iterator(
Expand All @@ -73,7 +81,7 @@ Eigen::Matrix4f_u cupoch::registration::Kabsch(
// Compute the H matrix
const Eigen::Matrix3f init = Eigen::Matrix3f::Zero();
Eigen::Matrix3f hh = thrust::inner_product(
utility::exec_policy(0),
utility::exec_policy(stream1),
thrust::make_permutation_iterator(
model.begin(),
thrust::make_transform_iterator(
Expand Down Expand Up @@ -111,14 +119,20 @@ Eigen::Matrix4f_u cupoch::registration::Kabsch(
return tr;
}

Eigen::Matrix4f_u cupoch::registration::Kabsch(const utility::device_vector<Eigen::Vector3f> &model,
const utility::device_vector<Eigen::Vector3f> &target) {
return Kabsch(utility::GetStream(0), utility::GetStream(1), model, target);
}

Eigen::Matrix4f_u cupoch::registration::Kabsch(
cudaStream_t stream1, cudaStream_t stream2,
const utility::device_vector<Eigen::Vector3f> &model,
const utility::device_vector<Eigen::Vector3f> &target) {
CorrespondenceSet corres(model.size());
thrust::tabulate(corres.begin(), corres.end(), [] __device__(size_t idx) {
return Eigen::Vector2i(idx, idx);
});
return Kabsch(model, target, corres);
return Kabsch(stream1, stream2, model, target, corres);
}

Eigen::Matrix4f_u cupoch::registration::KabschWeighted(
Expand Down
9 changes: 9 additions & 0 deletions src/cupoch/registration/kabsch.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,18 @@ namespace registration {
Eigen::Matrix4f_u Kabsch(const utility::device_vector<Eigen::Vector3f> &model,
const utility::device_vector<Eigen::Vector3f> &target,
const CorrespondenceSet &corres);
Eigen::Matrix4f_u Kabsch(cudaStream_t stream1,
cudaStream_t stream2,
const utility::device_vector<Eigen::Vector3f> &model,
const utility::device_vector<Eigen::Vector3f> &target,
const CorrespondenceSet &corres);

Eigen::Matrix4f_u Kabsch(const utility::device_vector<Eigen::Vector3f> &model,
const utility::device_vector<Eigen::Vector3f> &target);
Eigen::Matrix4f_u Kabsch(cudaStream_t stream1,
cudaStream_t stream2,
const utility::device_vector<Eigen::Vector3f> &model,
const utility::device_vector<Eigen::Vector3f> &target);

Eigen::Matrix4f_u KabschWeighted(
const utility::device_vector<Eigen::Vector3f> &model,
Expand Down
10 changes: 10 additions & 0 deletions src/cupoch/utility/eigen.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ template <typename MatType, typename VecType, typename FuncType>
thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(const FuncType &f,
int iteration_num,
bool verbose = true);
template <typename MatType, typename VecType, typename FuncType>
thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(cudaStream_t stream,
const FuncType &f,
int iteration_num,
bool verbose = true);

/// Function to compute JTJ and Jtr
/// Input: function pointer f and total number of rows of Jacobian matrix
Expand All @@ -96,6 +101,11 @@ template <typename MatType, typename VecType, int NumJ, typename FuncType>
thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(const FuncType &f,
int iteration_num,
bool verbose = true);
template <typename MatType, typename VecType, int NumJ, typename FuncType>
thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(cudaStream_t stream,
FuncType &f,
int iteration_num,
bool verbose = true);

template <typename MatType,
typename VecType,
Expand Down
19 changes: 19 additions & 0 deletions src/cupoch/utility/eigen.inl
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,28 @@ struct wrapped_calc_weights_functor {

} // namespace


template <typename MatType, typename VecType, typename FuncType>
thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(const FuncType &f,
int iteration_num,
bool verbose) {
return ComputeJTJandJTr<MatType, VecType, FuncType>(
0, f, iteration_num, verbose);
}

template <typename MatType, typename VecType, typename FuncType>
thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(cudaStream_t stream,
const FuncType &f,
int iteration_num,
bool verbose) {
MatType JTJ;
VecType JTr;
float r2_sum = 0.0;
JTJ.setZero();
JTr.setZero();
jtj_jtr_functor<MatType, VecType, FuncType> func(f);
auto jtj_jtr_r2 = thrust::transform_reduce(
utility::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(iteration_num), func,
thrust::make_tuple(JTJ, JTr, r2_sum),
Expand All @@ -106,13 +117,21 @@ thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(const FuncType &f,
template <typename MatType, typename VecType, int NumJ, typename FuncType>
thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(
const FuncType &f, int iteration_num, bool verbose /*=true*/) {
return ComputeJTJandJTr<MatType, VecType, NumJ, FuncType>(
0, f, iteration_num, verbose);
}

template <typename MatType, typename VecType, int NumJ, typename FuncType>
thrust::tuple<MatType, VecType, float> ComputeJTJandJTr(
cudaStream_t stream, const FuncType &f, int iteration_num, bool verbose /*=true*/) {
MatType JTJ;
VecType JTr;
float r2_sum = 0.0;
JTJ.setZero();
JTr.setZero();
multiple_jtj_jtr_functor<MatType, VecType, NumJ, FuncType> func(f);
auto jtj_jtr_r2 = thrust::transform_reduce(
utility::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(iteration_num), func,
thrust::make_tuple(JTJ, JTr, r2_sum),
Expand Down
Loading