From d9faa73cbbc186d7dd0dbfce0589012a0bed9f17 Mon Sep 17 00:00:00 2001 From: Jan Grulich Date: Fri, 17 Mar 2023 10:58:10 +0100 Subject: [PATCH] PipeWire capturer: import DMABufs directly into desktop frame Originally DMABufs were imported into a temporary buffer followed by a copy operation into the desktop frame itself. This is not needed as we can import them directly into desktop frames and avoid this overhead. Also drop support for MemPtr buffers as both Mutter and KWin don't seem to support them and they are going to be too slow anyway. Testing with latest Chromium, I could see two processes with usage around 20% and 40% without this change going down to 10% and 20% with this change applied. Bug: webrtc:13429 Bug: chrome:1378258 Change-Id: Ice3292528ff56300931c8638f8e03d4883d5e331 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/297501 Reviewed-by: Alexander Cooper Commit-Queue: Jan Grulich Cr-Commit-Position: refs/heads/main@{#39594} --- diff --git a/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc b/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc index 5bbd5d7aba..b529077c6d 100644 --- a/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc +++ b/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.cc @@ -101,11 +101,23 @@ typedef void (*glDeleteTextures_func)(GLsizei n, const GLuint* textures); typedef void (*glGenTextures_func)(GLsizei n, GLuint* textures); typedef GLenum (*glGetError_func)(void); typedef const GLubyte* (*glGetString_func)(GLenum name); -typedef void (*glGetTexImage_func)(GLenum target, - GLint level, - GLenum format, - GLenum type, - void* pixels); +typedef void (*glReadPixels_func)(GLint x, + GLint y, + GLsizei width, + GLsizei height, + GLenum format, + GLenum type, + void* data); +typedef void (*glGenFramebuffers_func)(GLsizei n, GLuint* ids); +typedef void (*glDeleteFramebuffers_func)(GLsizei n, + const GLuint* framebuffers); +typedef void (*glBindFramebuffer_func)(GLenum target, GLuint framebuffer); +typedef void (*glFramebufferTexture2D_func)(GLenum target, + GLenum attachment, + GLenum textarget, + GLuint texture, + GLint level); +typedef GLenum (*glCheckFramebufferStatus_func)(GLenum target); typedef void (*glTexParameteri_func)(GLenum target, GLenum pname, GLint param); typedef void* (*glXGetProcAddressARB_func)(const char*); @@ -118,7 +130,12 @@ glDeleteTextures_func GlDeleteTextures = nullptr; glGenTextures_func GlGenTextures = nullptr; glGetError_func GlGetError = nullptr; glGetString_func GlGetString = nullptr; -glGetTexImage_func GlGetTexImage = nullptr; +glReadPixels_func GlReadPixels = nullptr; +glGenFramebuffers_func GlGenFramebuffers = nullptr; +glDeleteFramebuffers_func GlDeleteFramebuffers = nullptr; +glBindFramebuffer_func GlBindFramebuffer = nullptr; +glFramebufferTexture2D_func GlFramebufferTexture2D = nullptr; +glCheckFramebufferStatus_func GlCheckFramebufferStatus = nullptr; glTexParameteri_func GlTexParameteri = nullptr; glXGetProcAddressARB_func GlXGetProcAddressARB = nullptr; @@ -279,12 +296,26 @@ static bool LoadGL() { (glDeleteTextures_func)GlXGetProcAddressARB("glDeleteTextures"); GlGenTextures = (glGenTextures_func)GlXGetProcAddressARB("glGenTextures"); GlGetError = (glGetError_func)GlXGetProcAddressARB("glGetError"); - GlGetTexImage = (glGetTexImage_func)GlXGetProcAddressARB("glGetTexImage"); + GlReadPixels = (glReadPixels_func)GlXGetProcAddressARB("glReadPixels"); + GlGenFramebuffers = + (glGenFramebuffers_func)GlXGetProcAddressARB("glGenFramebuffers"); + GlDeleteFramebuffers = + (glDeleteFramebuffers_func)GlXGetProcAddressARB("glDeleteFramebuffers"); + GlBindFramebuffer = + (glBindFramebuffer_func)GlXGetProcAddressARB("glBindFramebuffer"); + GlFramebufferTexture2D = (glFramebufferTexture2D_func)GlXGetProcAddressARB( + "glFramebufferTexture2D"); + GlCheckFramebufferStatus = + (glCheckFramebufferStatus_func)GlXGetProcAddressARB( + "glCheckFramebufferStatus"); + GlTexParameteri = (glTexParameteri_func)GlXGetProcAddressARB("glTexParameteri"); return GlBindTexture && GlDeleteTextures && GlGenTextures && GlGetError && - GlGetTexImage && GlTexParameteri; + GlReadPixels && GlGenFramebuffers && GlDeleteFramebuffers && + GlBindFramebuffer && GlFramebufferTexture2D && + GlCheckFramebufferStatus && GlTexParameteri; } return false; @@ -435,6 +466,14 @@ EglDmaBuf::~EglDmaBuf() { EglTerminate(egl_.display); } + if (fbo_) { + GlDeleteFramebuffers(1, &fbo_); + } + + if (texture_) { + GlDeleteTextures(1, &texture_); + } + // BUG: crbug.com/1290566 // Closing libEGL.so.1 when using NVidia drivers causes a crash // when EglGetPlatformDisplayEXT() is used, at least this one is enough @@ -466,20 +505,20 @@ bool EglDmaBuf::GetClientExtensions(EGLDisplay dpy, EGLint name) { } RTC_NO_SANITIZE("cfi-icall") -std::unique_ptr EglDmaBuf::ImageFromDmaBuf( - const DesktopSize& size, - uint32_t format, - const std::vector& plane_datas, - uint64_t modifier) { - std::unique_ptr src; - +bool EglDmaBuf::ImageFromDmaBuf(const DesktopSize& size, + uint32_t format, + const std::vector& plane_datas, + uint64_t modifier, + const DesktopVector& offset, + const DesktopSize& buffer_size, + uint8_t* data) { if (!egl_initialized_) { - return src; + return false; } if (plane_datas.size() <= 0) { RTC_LOG(LS_ERROR) << "Failed to process buffer: invalid number of planes"; - return src; + return false; } EGLint attribs[47]; @@ -568,20 +607,32 @@ std::unique_ptr EglDmaBuf::ImageFromDmaBuf( if (image == EGL_NO_IMAGE) { RTC_LOG(LS_ERROR) << "Failed to record frame: Error creating EGLImage - " << FormatEGLError(EglGetError()); - return src; + return false; } // create GL 2D texture for framebuffer - GLuint texture; - GlGenTextures(1, &texture); - GlTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - GlTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - GlTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - GlTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - GlBindTexture(GL_TEXTURE_2D, texture); + if (!texture_) { + GlGenTextures(1, &texture_); + GlTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + GlTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + GlTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + GlTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + GlBindTexture(GL_TEXTURE_2D, texture_); GlEGLImageTargetTexture2DOES(GL_TEXTURE_2D, image); - src = std::make_unique(plane_datas[0].stride * size.height()); + if (!fbo_) { + GlGenFramebuffers(1, &fbo_); + } + + GlBindFramebuffer(GL_FRAMEBUFFER, fbo_); + GlFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture_, 0); + if (GlCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + RTC_LOG(LS_ERROR) << "Failed to bind DMA buf framebuffer"; + EglDestroyImageKHR(egl_.display, image); + return false; + } GLenum gl_format = GL_BGRA; switch (format) { @@ -598,17 +649,18 @@ std::unique_ptr EglDmaBuf::ImageFromDmaBuf( gl_format = GL_BGRA; break; } - GlGetTexImage(GL_TEXTURE_2D, 0, gl_format, GL_UNSIGNED_BYTE, src.get()); - if (GlGetError()) { + GlReadPixels(offset.x(), offset.y(), buffer_size.width(), + buffer_size.height(), gl_format, GL_UNSIGNED_BYTE, data); + + const GLenum error = GlGetError(); + if (error) { RTC_LOG(LS_ERROR) << "Failed to get image from DMA buffer."; - return src; } - GlDeleteTextures(1, &texture); EglDestroyImageKHR(egl_.display, image); - return src; + return !error; } RTC_NO_SANITIZE("cfi-icall") diff --git a/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.h b/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.h index f1d96b2f80..22a8f5ab52 100644 --- a/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.h +++ b/third_party/libwebrtc/modules/desktop_capture/linux/wayland/egl_dmabuf.h @@ -41,11 +41,15 @@ class EglDmaBuf { EglDmaBuf(); ~EglDmaBuf(); - std::unique_ptr ImageFromDmaBuf( - const DesktopSize& size, - uint32_t format, - const std::vector& plane_datas, - uint64_t modifiers); + // Returns whether the image was successfully imported from + // given DmaBuf and its parameters + bool ImageFromDmaBuf(const DesktopSize& size, + uint32_t format, + const std::vector& plane_datas, + uint64_t modifiers, + const DesktopVector& offset, + const DesktopSize& buffer_size, + uint8_t* data); std::vector QueryDmaBufModifiers(uint32_t format); bool IsEglInitialized() const { return egl_initialized_; } @@ -58,6 +62,8 @@ class EglDmaBuf { int32_t drm_fd_ = -1; // for GBM buffer mmap gbm_device* gbm_device_ = nullptr; // for passed GBM buffer retrieval + GLuint fbo_ = 0; + GLuint texture_ = 0; EGLStruct egl_; absl::optional GetRenderNode(); diff --git a/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc b/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc index 0ca75d00fc..a8879764c7 100644 --- a/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc +++ b/third_party/libwebrtc/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc @@ -149,6 +149,12 @@ class SharedScreenCastStreamPrivate { struct spa_video_info_raw spa_video_format_; void ProcessBuffer(pw_buffer* buffer); + bool ProcessMemFDBuffer(pw_buffer* buffer, + DesktopFrame& frame, + const DesktopVector& offset); + bool ProcessDMABuffer(pw_buffer* buffer, + DesktopFrame& frame, + const DesktopVector& offset); void ConvertRGBxToBGRx(uint8_t* frame, uint32_t size); // PipeWire callbacks @@ -268,9 +274,8 @@ void SharedScreenCastStreamPrivate::OnStreamParamChanged( std::vector params; const int buffer_types = has_modifier || (that->pw_server_version_ >= kDmaBufMinVersion) - ? (1 << SPA_DATA_DmaBuf) | (1 << SPA_DATA_MemFd) | - (1 << SPA_DATA_MemPtr) - : (1 << SPA_DATA_MemFd) | (1 << SPA_DATA_MemPtr); + ? (1 << SPA_DATA_DmaBuf) | (1 << SPA_DATA_MemFd) + : (1 << SPA_DATA_MemFd); params.push_back(reinterpret_cast(spa_pod_builder_add_object( &builder, SPA_TYPE_OBJECT_ParamBuffers, SPA_PARAM_Buffers, @@ -605,9 +610,6 @@ DesktopVector SharedScreenCastStreamPrivate::CaptureCursorPosition() { RTC_NO_SANITIZE("cfi-icall") void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) { spa_buffer* spa_buffer = buffer->buffer; - ScopedBuf map; - std::unique_ptr src_unique_ptr; - uint8_t* src = nullptr; // Try to update the mouse cursor first, because it can be the only // information carried by the buffer @@ -641,76 +643,6 @@ void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) { return; } - if (spa_buffer->datas[0].type == SPA_DATA_MemFd) { - map.initialize( - static_cast( - mmap(nullptr, - spa_buffer->datas[0].maxsize + spa_buffer->datas[0].mapoffset, - PROT_READ, MAP_PRIVATE, spa_buffer->datas[0].fd, 0)), - spa_buffer->datas[0].maxsize + spa_buffer->datas[0].mapoffset, - spa_buffer->datas[0].fd); - - if (!map) { - RTC_LOG(LS_ERROR) << "Failed to mmap the memory: " - << std::strerror(errno); - return; - } - - src = SPA_MEMBER(map.get(), spa_buffer->datas[0].mapoffset, uint8_t); - } else if (spa_buffer->datas[0].type == SPA_DATA_DmaBuf) { - const uint n_planes = spa_buffer->n_datas; - - if (!n_planes) { - return; - } - - std::vector plane_datas; - for (uint32_t i = 0; i < n_planes; ++i) { - EglDmaBuf::PlaneData data = { - static_cast(spa_buffer->datas[i].fd), - static_cast(spa_buffer->datas[i].chunk->stride), - static_cast(spa_buffer->datas[i].chunk->offset)}; - plane_datas.push_back(data); - } - - // When importing DMA-BUFs, we use the stride (number of bytes from one row - // of pixels in the buffer) provided by PipeWire. The stride from PipeWire - // is given by the graphics driver and some drivers might add some - // additional padding for memory layout optimizations so not everytime the - // stride is equal to BYTES_PER_PIXEL x WIDTH. This is fine, because during - // the import we will use OpenGL and same graphics driver so it will be able - // to work with the stride it provided, but later on when we work with - // images we get from DMA-BUFs we will need to update the stride to be equal - // to BYTES_PER_PIXEL x WIDTH as that's the size of the DesktopFrame we - // allocate for each captured frame. - src_unique_ptr = egl_dmabuf_->ImageFromDmaBuf( - stream_size_, spa_video_format_.format, plane_datas, modifier_); - if (src_unique_ptr) { - src = src_unique_ptr.get(); - } else { - RTC_LOG(LS_ERROR) << "Dropping DMA-BUF modifier: " << modifier_ - << " and trying to renegotiate stream parameters"; - - if (pw_server_version_ >= kDropSingleModifierMinVersion) { - modifiers_.erase( - std::remove(modifiers_.begin(), modifiers_.end(), modifier_), - modifiers_.end()); - } else { - modifiers_.clear(); - } - - pw_loop_signal_event(pw_thread_loop_get_loop(pw_main_loop_), - renegotiate_); - return; - } - } else if (spa_buffer->datas[0].type == SPA_DATA_MemPtr) { - src = static_cast(spa_buffer->datas[0].data); - } - - if (!src) { - return; - } - // Use SPA_META_VideoCrop metadata to get the frame size. KDE and GNOME do // handle screen/window sharing differently. KDE/KWin doesn't use // SPA_META_VideoCrop metadata and when sharing a window, it always sets @@ -763,8 +695,8 @@ void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) { } // Get the position of the video crop within the stream. Just double-check - // that the position doesn't exceed the size of the stream itself. NOTE: - // Currently it looks there is no implementation using this. + // that the position doesn't exceed the size of the stream itself. + // NOTE: Currently it looks there is no implementation using this. uint32_t y_offset = videocrop_metadata_use && (videocrop_metadata->region.position.y + frame_size_.height() <= @@ -777,22 +709,7 @@ void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) { stream_size_.width()) ? videocrop_metadata->region.position.x : 0; - - const uint32_t stream_stride = kBytesPerPixel * stream_size_.width(); - uint32_t buffer_stride = spa_buffer->datas[0].chunk->stride; - uint32_t src_stride = buffer_stride; - - if (spa_buffer->datas[0].type == SPA_DATA_DmaBuf && - buffer_stride > stream_stride) { - // When DMA-BUFs are used, sometimes spa_buffer->stride we get might - // contain additional padding, but after we import the buffer, the stride - // we used is no longer relevant and we should just calculate it based on - // the stream width. For more context see https://crbug.com/1333304. - src_stride = stream_stride; - } - - uint8_t* updated_src = - src + (src_stride * y_offset) + (kBytesPerPixel * x_offset); + DesktopVector offset = DesktopVector(x_offset, y_offset); webrtc::MutexLock lock(&queue_lock_); @@ -813,9 +730,17 @@ void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) { queue_.ReplaceCurrentFrame(SharedDesktopFrame::Wrap(std::move(frame))); } - queue_.current_frame()->CopyPixelsFrom( - updated_src, (src_stride - (kBytesPerPixel * x_offset)), - DesktopRect::MakeWH(frame_size_.width(), frame_size_.height())); + bool bufferProcessed = false; + if (spa_buffer->datas[0].type == SPA_DATA_MemFd) { + bufferProcessed = + ProcessMemFDBuffer(buffer, *queue_.current_frame(), offset); + } else if (spa_buffer->datas[0].type == SPA_DATA_DmaBuf) { + bufferProcessed = ProcessDMABuffer(buffer, *queue_.current_frame(), offset); + } + + if (!bufferProcessed) { + return; + } if (spa_video_format_.format == SPA_VIDEO_FORMAT_RGBx || spa_video_format_.format == SPA_VIDEO_FORMAT_RGBA) { @@ -832,6 +757,87 @@ void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) { DesktopRect::MakeSize(queue_.current_frame()->size())); } +RTC_NO_SANITIZE("cfi-icall") +bool SharedScreenCastStreamPrivate::ProcessMemFDBuffer( + pw_buffer* buffer, + DesktopFrame& frame, + const DesktopVector& offset) { + spa_buffer* spa_buffer = buffer->buffer; + ScopedBuf map; + uint8_t* src = nullptr; + + map.initialize( + static_cast( + mmap(nullptr, + spa_buffer->datas[0].maxsize + spa_buffer->datas[0].mapoffset, + PROT_READ, MAP_PRIVATE, spa_buffer->datas[0].fd, 0)), + spa_buffer->datas[0].maxsize + spa_buffer->datas[0].mapoffset, + spa_buffer->datas[0].fd); + + if (!map) { + RTC_LOG(LS_ERROR) << "Failed to mmap the memory: " << std::strerror(errno); + return false; + } + + src = SPA_MEMBER(map.get(), spa_buffer->datas[0].mapoffset, uint8_t); + + uint32_t buffer_stride = spa_buffer->datas[0].chunk->stride; + uint32_t src_stride = buffer_stride; + + uint8_t* updated_src = + src + (src_stride * offset.y()) + (kBytesPerPixel * offset.x()); + + frame.CopyPixelsFrom( + updated_src, (src_stride - (kBytesPerPixel * offset.x())), + DesktopRect::MakeWH(frame.size().width(), frame.size().height())); + + return true; +} + +RTC_NO_SANITIZE("cfi-icall") +bool SharedScreenCastStreamPrivate::ProcessDMABuffer( + pw_buffer* buffer, + DesktopFrame& frame, + const DesktopVector& offset) { + spa_buffer* spa_buffer = buffer->buffer; + + const uint n_planes = spa_buffer->n_datas; + + if (!n_planes) { + return false; + } + + std::vector plane_datas; + for (uint32_t i = 0; i < n_planes; ++i) { + EglDmaBuf::PlaneData data = { + static_cast(spa_buffer->datas[i].fd), + static_cast(spa_buffer->datas[i].chunk->stride), + static_cast(spa_buffer->datas[i].chunk->offset)}; + plane_datas.push_back(data); + } + + const bool imported = egl_dmabuf_->ImageFromDmaBuf( + stream_size_, spa_video_format_.format, plane_datas, modifier_, offset, + frame.size(), frame.data()); + if (!imported) { + RTC_LOG(LS_ERROR) << "Dropping DMA-BUF modifier: " << modifier_ + << " and trying to renegotiate stream parameters"; + + if (pw_server_version_ >= kDropSingleModifierMinVersion) { + modifiers_.erase( + std::remove(modifiers_.begin(), modifiers_.end(), modifier_), + modifiers_.end()); + } else { + modifiers_.clear(); + } + + pw_loop_signal_event(pw_thread_loop_get_loop(pw_main_loop_), renegotiate_); + return false; + } + + return true; +} + void SharedScreenCastStreamPrivate::ConvertRGBxToBGRx(uint8_t* frame, uint32_t size) { for (uint32_t i = 0; i < size; i += 4) {