Skip to content

Commit

Permalink
Implement NVENC support for KMS and wlgrab capture methods
Browse files Browse the repository at this point in the history
  • Loading branch information
cgutman committed Jan 28, 2024
1 parent 9a3553d commit 8182f59
Show file tree
Hide file tree
Showing 6 changed files with 391 additions and 6 deletions.
266 changes: 266 additions & 0 deletions src/platform/linux/cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
*/
#include <bitset>

#include <fcntl.h>

#include <filesystem>

#include <NvFBC.h>
#include <ffnvcodec/dynlink_loader.h>

Expand All @@ -29,6 +33,8 @@ extern "C" {
#define CU_CHECK_IGNORE(x, y) \
check((x), SUNSHINE_STRINGVIEW(y ": "))

namespace fs = std::filesystem;

using namespace std::literals;
namespace cuda {
constexpr auto cudaDevAttrMaxThreadsPerBlock = (CUdevice_attribute) 1;
Expand Down Expand Up @@ -69,6 +75,13 @@ namespace cuda {
CU_CHECK_IGNORE(cdf->cuStreamDestroy(stream), "Couldn't destroy cuda stream");
}

void
unregisterResource(CUgraphicsResource resource) {
CU_CHECK_IGNORE(cdf->cuGraphicsUnregisterResource(resource), "Couldn't unregister resource");
}

using registered_resource_t = util::safe_ptr<CUgraphicsResource_st, unregisterResource>;

class img_t: public platf::img_t {
public:
tex_t tex;
Expand Down Expand Up @@ -223,6 +236,236 @@ namespace cuda {
}
};

/**
* @brief Opens the DRM device associated with the CUDA device index.
* @param index CUDA device index to open.
* @return File descriptor or -1 on failure.
*/
file_t
open_drm_fd_for_cuda_device(int index) {
CUdevice device;
CU_CHECK(cdf->cuDeviceGet(&device, index), "Couldn't get CUDA device");

// There's no way to directly go from CUDA to a DRM device, so we'll
// use sysfs to look up the DRM device name from the PCI ID.
char pci_bus_id[13];
CU_CHECK(cdf->cuDeviceGetPCIBusId(pci_bus_id, sizeof(pci_bus_id), device), "Couldn't get CUDA device PCI bus ID");
BOOST_LOG(debug) << "Found CUDA device with PCI bus ID: "sv << pci_bus_id;

// Look for the name of the primary node in sysfs
char sysfs_path[PATH_MAX];
std::snprintf(sysfs_path, sizeof(sysfs_path), "/sys/bus/pci/devices/%s/drm", pci_bus_id);
fs::path sysfs_dir { sysfs_path };
for (auto &entry : fs::directory_iterator { sysfs_dir }) {
auto file = entry.path().filename();
auto filestring = file.generic_u8string();
if (std::string_view { filestring }.substr(0, 4) != "card"sv) {
continue;
}

BOOST_LOG(debug) << "Found DRM primary node: "sv << filestring;

fs::path dri_path { "/dev/dri"sv };
auto device_path = dri_path / file;
return open(device_path.c_str(), O_RDWR);
}

BOOST_LOG(error) << "Unable to find DRM device with PCI bus ID: "sv << pci_bus_id;
return -1;
}

class gl_cuda_vram_t: public platf::avcodec_encode_device_t {
public:
/**
* @brief Initialize the GL->CUDA encoding device.
* @param in_width Width of captured frames.
* @param in_height Height of captured frames.
* @param offset_x Offset of content in captured frame.
* @param offset_y Offset of content in captured frame.
* @return 0 on success or -1 on failure.
*/
int
init(int in_width, int in_height, int offset_x, int offset_y) {
// This must be non-zero to tell the video core that it's a hardware encoding device.
data = (void *) 0x1;

// TODO: Support more than one CUDA device
file = std::move(open_drm_fd_for_cuda_device(0));
if (file.el < 0) {
char string[1024];
BOOST_LOG(error) << "Couldn't open DRM FD for CUDA device: "sv << strerror_r(errno, string, sizeof(string));
return -1;
}

gbm.reset(gbm::create_device(file.el));
if (!gbm) {
BOOST_LOG(error) << "Couldn't create GBM device: ["sv << util::hex(eglGetError()).to_string_view() << ']';
return -1;
}

display = egl::make_display(gbm.get());
if (!display) {
return -1;
}

auto ctx_opt = egl::make_ctx(display.get());
if (!ctx_opt) {
return -1;
}

ctx = std::move(*ctx_opt);

width = in_width;
height = in_height;

sequence = 0;

this->offset_x = offset_x;
this->offset_y = offset_y;

return 0;
}

/**
* @brief Initialize color conversion into target CUDA frame.
* @param frame Destination CUDA frame to write into.
* @param hw_frames_ctx_buf FFmpeg hardware frame context.
* @return 0 on success or -1 on failure.
*/
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx_buf) override {
this->hwframe.reset(frame);
this->frame = frame;

if (!frame->buf[0]) {
if (av_hwframe_get_buffer(hw_frames_ctx_buf, frame, 0)) {
BOOST_LOG(error) << "Couldn't get hwframe for VAAPI"sv;
return -1;
}
}

auto hw_frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data;
sw_format = hw_frames_ctx->sw_format;

auto nv12_opt = egl::create_target(frame->width, frame->height, sw_format);
if (!nv12_opt) {
return -1;
}

auto sws_opt = egl::sws_t::make(width, height, frame->width, frame->height, sw_format);
if (!sws_opt) {
return -1;
}

this->sws = std::move(*sws_opt);
this->nv12 = std::move(*nv12_opt);

auto cuda_ctx = (AVCUDADeviceContext *) hw_frames_ctx->device_ctx->hwctx;

stream = make_stream();
if (!stream) {
return -1;
}

cuda_ctx->stream = stream.get();

CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res, nv12->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY),
"Couldn't register Y plane texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&uv_res, nv12->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY),
"Couldn't register UV plane texture");

return 0;
}

/**
* @brief Convert the captured image into the target CUDA frame.
* @param img Captured screen image.
* @return 0 on success or -1 on failure.
*/
int
convert(platf::img_t &img) override {
auto &descriptor = (egl::img_descriptor_t &) img;

if (descriptor.sequence == 0) {
// For dummy images, use a blank RGB texture instead of importing a DMA-BUF
rgb = egl::create_blank(img);
}
else if (descriptor.sequence > sequence) {
sequence = descriptor.sequence;

rgb = egl::rgb_t {};

auto rgb_opt = egl::import_source(display.get(), descriptor.sd);

if (!rgb_opt) {
return -1;
}

rgb = std::move(*rgb_opt);
}

// Perform the color conversion and scaling in GL
sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0]);
sws.convert(nv12->buf);

auto fmt_desc = av_pix_fmt_desc_get(sw_format);

// Map the GL textures to read for CUDA
CUgraphicsResource resources[2] = { y_res.get(), uv_res.get() };
CU_CHECK(cdf->cuGraphicsMapResources(2, resources, stream.get()), "Couldn't map GL textures in CUDA");

// Copy from the GL textures to the target CUDA frame
for (int i = 0; i < 2; i++) {
CUDA_MEMCPY2D cpy = {};
cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY;
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array");

cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.dstDevice = (CUdeviceptr) frame->data[i];
cpy.dstPitch = frame->linesize[i];
cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step) >> (i ? fmt_desc->log2_chroma_w : 0);
cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0);

CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame");
}

// Unmap the textures to allow modification from GL again
CU_CHECK(cdf->cuGraphicsUnmapResources(2, resources, stream.get()), "Couldn't unmap GL textures from CUDA");
return 0;
}

/**
* @brief Configures shader parameters for the specified colorspace.
*/
void
apply_colorspace() override {
sws.apply_colorspace(colorspace);
}

file_t file;
gbm::gbm_t gbm;
egl::display_t display;
egl::ctx_t ctx;

// This must be destroyed before display_t
stream_t stream;
frame_t hwframe;

egl::sws_t sws;
egl::nv12_t nv12;
AVPixelFormat sw_format;

int width, height;

std::uint64_t sequence;
egl::rgb_t rgb;

registered_resource_t y_res;
registered_resource_t uv_res;

int offset_x, offset_y;
};

std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram) {
if (init()) {
Expand All @@ -245,6 +488,29 @@ namespace cuda {
return cuda;
}

/**
* @brief Create a GL->CUDA encoding device for consuming captured dmabufs.
* @param in_width Width of captured frames.
* @param in_height Height of captured frames.
* @param offset_x Offset of content in captured frame.
* @param offset_y Offset of content in captured frame.
* @return FFmpeg encoding device context.
*/
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_gl_encode_device(int width, int height, int offset_x, int offset_y) {
if (init()) {
return nullptr;
}

auto cuda = std::make_unique<gl_cuda_vram_t>();

if (cuda->init(width, height, offset_x, offset_y)) {
return nullptr;
}

return cuda;
}

namespace nvfbc {
static PNVFBCCREATEINSTANCE createInstance {};
static NVFBC_API_FUNCTION_LIST func { NVFBC_VERSION };
Expand Down
12 changes: 12 additions & 0 deletions src/platform/linux/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,18 @@ namespace cuda {
}
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_encode_device(int width, int height, bool vram);

/**
* @brief Create a GL->CUDA encoding device for consuming captured dmabufs.
* @param in_width Width of captured frames.
* @param in_height Height of captured frames.
* @param offset_x Offset of content in captured frame.
* @param offset_y Offset of content in captured frame.
* @return FFmpeg encoding device context.
*/
std::unique_ptr<platf::avcodec_encode_device_t>
make_avcodec_gl_encode_device(int width, int height, int offset_x, int offset_y);

int
init();
} // namespace cuda
Expand Down
65 changes: 65 additions & 0 deletions src/platform/linux/graphics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,71 @@ namespace egl {
return nv12;
}

/**
* @brief Creates biplanar YUV textures to render into.
* @param width Width of the target frame.
* @param height Height of the target frame.
* @param format Format of the target frame.
* @return The new RGB texture.
*/
std::optional<nv12_t>
create_target(int width, int height, AVPixelFormat format) {
nv12_t nv12 {
EGL_NO_DISPLAY,
EGL_NO_IMAGE,
EGL_NO_IMAGE,
gl::tex_t::make(2),
gl::frame_buf_t::make(2),
};

GLint y_format;
GLint uv_format;

// Determine the size of each plane element
auto fmt_desc = av_pix_fmt_desc_get(format);
if (fmt_desc->comp[0].depth <= 8) {
y_format = GL_R8;
uv_format = GL_RG8;
}
else if (fmt_desc->comp[0].depth <= 16) {
y_format = GL_R16;
uv_format = GL_RG16;
}
else {
BOOST_LOG(error) << "Unsupported target pixel format: "sv << format;
return std::nullopt;
}

gl::ctx.BindTexture(GL_TEXTURE_2D, nv12->tex[0]);
gl::ctx.TexStorage2D(GL_TEXTURE_2D, 1, y_format, width, height);

gl::ctx.BindTexture(GL_TEXTURE_2D, nv12->tex[1]);
gl::ctx.TexStorage2D(GL_TEXTURE_2D, 1, uv_format,
width >> fmt_desc->log2_chroma_w, height >> fmt_desc->log2_chroma_h);

nv12->buf.bind(std::begin(nv12->tex), std::end(nv12->tex));

GLenum attachments[] {
GL_COLOR_ATTACHMENT0,
GL_COLOR_ATTACHMENT1
};

for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) {
gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]);
gl::ctx.DrawBuffers(1, &attachments[x]);

const float y_black[] = { 0.0f, 0.0f, 0.0f, 0.0f };
const float uv_black[] = { 0.5f, 0.5f, 0.5f, 0.5f };
gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black);
}

gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0);

gl_drain_errors;

return nv12;
}

void
sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) {
auto color_p = video::color_vectors_from_colorspace(colorspace);
Expand Down
Loading

0 comments on commit 8182f59

Please sign in to comment.