Skip to content

Commit

Permalink
avoid loop in case intrinsic coefficient are zero
Browse files Browse the repository at this point in the history
  • Loading branch information
gilpazintel committed Jan 7, 2025
1 parent d26ec5c commit e338df2
Show file tree
Hide file tree
Showing 14 changed files with 26 additions and 68 deletions.
2 changes: 1 addition & 1 deletion CMake/lrs_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ if (NOT APPLE)
else()
option(CHECK_FOR_UPDATES "Checks for versions updates" OFF)
endif()
option(BUILD_WITH_CPU_EXTENSIONS "Enable compiler optimizations using CPU extensions (such as AVX)" OFF)
option(BUILD_WITH_CPU_EXTENSIONS "Enable compiler optimizations using CPU extensions (such as AVX)" ON)
set(UNIT_TESTS_ARGS "" CACHE STRING "Command-line arguments to pass to unit-tests-config.py, e.g. '-t <tag> -r <regex>'")
#Performance improvement with Ubuntu 18/20
if(UNIX AND (NOT ANDROID_NDK_TOOLCHAIN_INCLUDED))
Expand Down
17 changes: 0 additions & 17 deletions examples/measure/rs-measure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include <thread>
#include <atomic>
#include <mutex>
#include <rsutils/easylogging/easyloggingpp.h>

using pixel = std::pair<int, int>;

Expand Down Expand Up @@ -101,7 +100,6 @@ void render_simple_distance(const rs2::depth_frame& depth,

int main(int argc, char * argv[]) try
{
rs2::log_to_console(RS2_LOG_SEVERITY_ERROR);
auto settings = rs2::cli( "rs-measure example" )
.process( argc, argv );

Expand Down Expand Up @@ -293,24 +291,9 @@ float dist_3d(const rs2::depth_frame& frame, pixel u, pixel v)

// Deproject from pixel to point in 3D
rs2_intrinsics intr = frame.get_profile().as<rs2::video_stream_profile>().get_intrinsics(); // Calibration data
// Get the starting time point
auto start = std::chrono::high_resolution_clock::now();

rs2_deproject_pixel_to_point(upoint, &intr, upixel, udist);

// Get the ending time point
auto end = std::chrono::high_resolution_clock::now();

// Calculate the elapsed time in milliseconds
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);

// Output the elapsed time
LOG_ERROR(duration.count());

rs2_deproject_pixel_to_point(vpoint, &intr, vpixel, vdist);



// Calculate euclidean distance between the two points
return sqrt(pow(upoint[0] - vpoint[0], 2.f) +
pow(upoint[1] - vpoint[1], 2.f) +
Expand Down
2 changes: 1 addition & 1 deletion src/image-avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "image-avx.h"

#ifndef ANDROID
#if defined(__SSE4__) && defined(__AVX2__)
#if defined(__SSSE3__) && defined(__AVX2__)
#include <tmmintrin.h> // For SSE3 intrinsic used in unpack_yuy2_sse
#include <immintrin.h>

Expand Down
2 changes: 1 addition & 1 deletion src/image-avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
namespace librealsense
{
#ifndef ANDROID
#if defined(__SSE4__) && defined(__AVX2__)
#if defined(__SSSE3__) && defined(__AVX2__)
void unpack_yuy2_avx_y8(uint8_t * const d[], const uint8_t * s, int n);
void unpack_yuy2_avx_y16(uint8_t * const d[], const uint8_t * s, int n);
void unpack_yuy2_avx_rgb8(uint8_t * const d[], const uint8_t * s, int n);
Expand Down
13 changes: 2 additions & 11 deletions src/proc/align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#if defined(RS2_USE_CUDA)
#include "proc/cuda/cuda-align.h"
#elif defined(__SSE4__)
#elif defined(__SSSE3__)
#include "proc/sse/sse-align.h"
#endif
#include "proc/neon/neon-align.h"
Expand All @@ -26,7 +26,7 @@ namespace librealsense
{
#if defined(RS2_USE_CUDA)
return std::make_shared<librealsense::align_cuda>(align_to);
#elif defined(__SSE4__)
#elif defined(__SSSE3__)
return std::make_shared<librealsense::align_sse>(align_to);
#elif defined(__ARM_NEON) && ! defined(ANDROID)
return std::make_shared<librealsense::align_neon>(align_to);
Expand All @@ -39,7 +39,6 @@ namespace librealsense
void align_images(const rs2_intrinsics& depth_intrin, const rs2_extrinsics& depth_to_other,
const rs2_intrinsics& other_intrin, GET_DEPTH get_depth, TRANSFER_PIXEL transfer_pixel)
{
auto start = std::chrono::high_resolution_clock::now();
// Iterate over the pixels of the depth image
#pragma omp parallel for schedule(dynamic)
for (int depth_y = 0; depth_y < depth_intrin.height; ++depth_y)
Expand All @@ -52,11 +51,7 @@ namespace librealsense
{
// Map the top-left corner of the depth pixel onto the other image
float depth_pixel[2] = { depth_x - 0.5f, depth_y - 0.5f }, depth_point[3], other_point[3], other_pixel[2];


rs2_deproject_pixel_to_point(depth_point, &depth_intrin, depth_pixel, depth);


rs2_transform_point_to_point(other_point, &depth_to_other, depth_point);
rs2_project_point_to_pixel(other_pixel, &other_intrin, other_point);
const int other_x0 = static_cast<int>(other_pixel[0] + 0.5f);
Expand Down Expand Up @@ -84,10 +79,6 @@ namespace librealsense
}
}
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
// Output the duration
std::cout << duration.count() << "\n";
}

align::align(rs2_stream to_stream) : align(to_stream, "Align")
Expand Down
12 changes: 6 additions & 6 deletions src/proc/color-formats-converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#ifdef RS2_USE_CUDA
#include "cuda/cuda-conversion.cuh"
#endif
#ifdef __SSE4__
#ifdef __SSSE3__
#include <tmmintrin.h> // For SSSE3 intrinsics
#endif
#include "neon/image-neon.h"
Expand Down Expand Up @@ -60,7 +60,7 @@ namespace librealsense
rscuda::unpack_yuy2_cuda<FORMAT>(d, s, n);
return;
#endif
#if defined __SSE4__ && ! defined ANDROID
#if defined __SSSE3__ && ! defined ANDROID
static bool do_avx = has_avx();
#ifdef __AVX2__

Expand Down Expand Up @@ -477,7 +477,7 @@ namespace librealsense
}
}

#if defined __SSE4__ && ! defined ANDROID
#if defined __SSSE3__ && ! defined ANDROID
// This method receives 1 line of y and one line of uv.
// source_chunks_y // yyyyyyyyyyyyyyyy
// source_chunks_uv // uvuvuvuvuvuvuvuv
Expand Down Expand Up @@ -631,7 +631,7 @@ namespace librealsense
auto n = width * height;
assert(n % 16 == 0); // All currently supported color resolutions are multiples of 16 pixels. Could easily extend support to other resolutions by copying final n<16 pixels into a zero-padded buffer and recursively calling self for final iteration.

#if defined __SSE4__ && ! defined ANDROID
#if defined __SSSE3__ && ! defined ANDROID
static bool do_avx = has_avx();

auto src = reinterpret_cast<const __m128i*>(s);
Expand Down Expand Up @@ -753,7 +753,7 @@ namespace librealsense
m420_parse_one_line<FORMAT>(start_of_second_line, start_of_uv, &dst, width);
}
return;
#endif // __SSE4__
#endif // __SSSE3__
}

void unpack_yuy2(rs2_format dst_format, rs2_stream dst_stream, uint8_t * const d[], const uint8_t * s, int w, int h, int actual_size)
Expand Down Expand Up @@ -822,7 +822,7 @@ namespace librealsense
{
auto n = width * height;
assert(n % 16 == 0); // All currently supported color resolutions are multiples of 16 pixels. Could easily extend support to other resolutions by copying final n<16 pixels into a zero-padded buffer and recursively calling self for final iteration.
#ifdef __SSE4__
#ifdef __SSSE3__
auto src = reinterpret_cast<const __m128i *>(s);
auto dst = reinterpret_cast<__m128i *>(d[0]);
for (; n; n -= 16)
Expand Down
4 changes: 2 additions & 2 deletions src/proc/pointcloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#ifdef RS2_USE_CUDA
#include "proc/cuda/cuda-pointcloud.h"
#endif
#ifdef __SSE4__
#ifdef __SSSE3__
#include "proc/sse/sse-pointcloud.h"
#endif
#include "proc/neon/neon-pointcloud.h"
Expand Down Expand Up @@ -397,7 +397,7 @@ namespace librealsense
{
#ifdef RS2_USE_CUDA
return std::make_shared<librealsense::pointcloud_cuda>();
#elif defined(__SSE4__)
#elif defined(__SSSE3__)
return std::make_shared<librealsense::pointcloud_sse>();
#elif defined(__ARM_NEON) && ! defined ANDROID
return std::make_shared<librealsense::pointcloud_neon>();
Expand Down
2 changes: 1 addition & 1 deletion src/proc/sse/sse-align.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// License: Apache 2.0. See LICENSE file in root directory.
// Copyright(c) 2019 Intel Corporation. All Rights Reserved.
#ifdef __SSE4__
#ifdef __SSSE3__

#include "sse-align.h"
#include <tmmintrin.h> // For SSE3 intrinsic used in unpack_yuy2_sse
Expand Down
4 changes: 2 additions & 2 deletions src/proc/sse/sse-align.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// License: Apache 2.0. See LICENSE file in root directory.
// Copyright(c) 2024 Intel Corporation. All Rights Reserved.
#pragma once
#ifdef __SSE4__
#ifdef __SSSE3__

#include "proc/align.h"
#include <src/float3.h>
Expand Down Expand Up @@ -87,4 +87,4 @@ namespace librealsense
std::shared_ptr<image_transform> _stream_transform;
};
}
#endif // __SSE4__
#endif // __SSSE3__
6 changes: 3 additions & 3 deletions src/proc/sse/sse-pointcloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#include <iostream>

#ifdef __SSE4__
#ifdef __SSSE3__

#include <tmmintrin.h> // For SSSE3 intrinsics

Expand Down Expand Up @@ -56,7 +56,7 @@ namespace librealsense
const rs2_intrinsics &depth_intrinsics,
const rs2::depth_frame& depth_frame)
{
#ifdef __SSE4__
#ifdef __SSSE3__

auto depth_image = (const uint16_t*)depth_frame.get_data();

Expand Down Expand Up @@ -145,7 +145,7 @@ namespace librealsense
{
auto tex_ptr = texture_map;

#ifdef __SSE4__
#ifdef __SSSE3__
auto point = reinterpret_cast<const float*>(points);
auto res = reinterpret_cast<float*>(tex_ptr);
auto res1 = reinterpret_cast<float*>(pixels_ptr);
Expand Down
6 changes: 3 additions & 3 deletions src/proc/y411-converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#ifdef RS2_USE_CUDA
#include "cuda/cuda-conversion.cuh"
#endif
#ifdef __SSE4__
#ifdef __SSSE3__
#include <tmmintrin.h> // For SSSE3 intrinsics
#endif

Expand Down Expand Up @@ -44,7 +44,7 @@ namespace librealsense
// See https://www.fourcc.org/pixel-format/yuv-y411/
//

#if defined __SSE4__ && ! defined ANDROID
#if defined __SSSE3__ && ! defined ANDROID
void unpack_y411_sse( uint8_t * const dest, const uint8_t * const s, int w, int h, int actual_size)
{
auto n = w * h;
Expand Down Expand Up @@ -297,7 +297,7 @@ namespace librealsense
// The size of the frame must be bigger than 4 pixels and product of 32
void unpack_y411( uint8_t * const dest[], const uint8_t * const s, int w, int h, int actual_size )
{
#if defined __SSE4__ && ! defined ANDROID
#if defined __SSSE3__ && ! defined ANDROID
unpack_y411_sse(dest[0], s, w, h, actual_size);
#else
unpack_y411_native(dest[0], s, w, h, actual_size);
Expand Down
2 changes: 1 addition & 1 deletion src/proc/y411-converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace librealsense

void unpack_y411( uint8_t * const dest[], const uint8_t * const s, int w, int h, int actual_size);

#if defined __SSE4__ && ! defined ANDROID
#if defined __SSSE3__ && ! defined ANDROID
void unpack_y411_sse( uint8_t * const dest, const uint8_t * const s, int w, int h, int actual_size);
#endif

Expand Down
18 changes: 1 addition & 17 deletions src/rs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,6 @@
#include <src/core/time-service.h>
#include <rsutils/string/from.h>

#include <iostream>
#include <chrono>
#include <thread> // For std::this_thread::sleep_for
#include <random> // For random number generation

////////////////////////
// API implementation //
////////////////////////
Expand Down Expand Up @@ -4113,7 +4108,6 @@ NOEXCEPT_RETURN(, pixel)
/* Helper inner function (not part of the API) */
inline bool is_intrinsics_distortion_zero(const struct rs2_intrinsics* intrin)
{
//return false;
return (abs(intrin->coeffs[0]) < std::numeric_limits<double>::epsilon() && abs(intrin->coeffs[1]) < std::numeric_limits<double>::epsilon() &&
abs(intrin->coeffs[2]) < std::numeric_limits<double>::epsilon() && abs(intrin->coeffs[3]) < std::numeric_limits<double>::epsilon() &&
abs(intrin->coeffs[4]) < std::numeric_limits<double>::epsilon());
Expand Down Expand Up @@ -4150,10 +4144,9 @@ void rs2_deproject_pixel_to_point(float point[3], const struct rs2_intrinsics* i
}
if (intrin->model == RS2_DISTORTION_BROWN_CONRADY)
{
int i = 0;
// need to loop until convergence
// 10 iterations determined empirically
for (; i < 10; i++)
for (int i = 0; i < 10; i++)
{
float r2 = x * x + y * y;
float icdist = (float)1 / (float)(1 + ((intrin->coeffs[4] * r2 + intrin->coeffs[1]) * r2 + intrin->coeffs[0]) * r2);
Expand Down Expand Up @@ -4205,15 +4198,6 @@ void rs2_deproject_pixel_to_point(float point[3], const struct rs2_intrinsics* i
point[0] = depth * x;
point[1] = depth * y;
point[2] = depth;

// Get the ending time point
//auto end = std::chrono::high_resolution_clock::now();

// Calculate the elapsed time in milliseconds
//auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);

// Output the elapsed time
//std::cout << duration.count() << std::endl;
}
NOEXCEPT_RETURN(, point)

Expand Down
4 changes: 2 additions & 2 deletions wrappers/opencv/depth-filter/downsample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include <assert.h>

#ifdef __SSE4__
#ifdef __SSSE3__
#include <emmintrin.h>
#include <smmintrin.h>
#endif
Expand All @@ -24,7 +24,7 @@ void downsample_min_4x4(const cv::Mat& source, cv::Mat* pDest)

const size_t sizeYresized = source.rows / DOWNSAMPLE_FACTOR;

#ifdef __SSE4__
#ifdef __SSSE3__
__m128i ones = _mm_set1_epi16(1);

// Note on multi-threading here, 2018-08-17
Expand Down

0 comments on commit e338df2

Please sign in to comment.