diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-03-21 19:23:16 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-03-21 19:23:16 +0000 |
commit | a6bafe0649b982e0dd433e727630becf2775f116 (patch) | |
tree | fce235dfe4ce4260278b9eca913106bbb72cf3a5 | |
parent | 829d193568fa069689fcb42535c4d12e50b1e621 (diff) | |
parent | ea15f40672340474611490d751a8af0913dbe699 (diff) | |
download | vulkan-cereal-a6bafe0649b982e0dd433e727630becf2775f116.tar.gz |
Snap for 8300341 from ea15f40672340474611490d751a8af0913dbe699 to emu-31-release
Change-Id: Iacaf501b954e078de72b2eb4d0e866ab1ad4a13e
-rw-r--r-- | base/CMakeLists.txt | 17 | ||||
-rw-r--r-- | base/ThreadPool.h | 89 | ||||
-rw-r--r-- | host-common/CMakeLists.txt | 18 | ||||
-rw-r--r-- | stream-servers/CMakeLists.txt | 1 | ||||
-rw-r--r-- | stream-servers/FrameBuffer.cpp | 18 | ||||
-rw-r--r-- | stream-servers/SyncThread.cpp | 496 | ||||
-rw-r--r-- | stream-servers/SyncThread.h | 108 | ||||
-rw-r--r-- | stream-servers/vulkan/VkAndroidNativeBuffer.cpp | 150 | ||||
-rw-r--r-- | stream-servers/vulkan/VkAndroidNativeBuffer.h | 64 | ||||
-rw-r--r-- | stream-servers/vulkan/VkDecoderGlobalState.cpp | 23 | ||||
-rw-r--r-- | stream-servers/vulkan/VkDecoderGlobalState.h | 8 | ||||
-rw-r--r-- | stream-servers/vulkan/VkQsriTimeline.h | 65 | ||||
-rw-r--r-- | stream-servers/vulkan/VkQsriTimeline_unittest.cpp | 42 |
13 files changed, 491 insertions, 608 deletions
diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index 978b7214..81a1bccb 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -44,22 +44,10 @@ else() ${gfxstream-base-posix-sources}) endif() -if (RECORDER_DELEGATE_LIB) - set(gfxstream-base-metrics-sources - MetricsRecorderDelegate.cpp) - set(gfxstream-base-metrics-link-libraries - recorder_delegate_lib) -else() - set(gfxstream-base-metrics-sources - MetricsNoOp.cpp) - set(gfxstream-base-metrics-link-libraries) -endif() - add_library( gfxstream-base ${gfxstream-base-common-sources} - ${gfxstream-platform-sources} - ${gfxstream-base-metrics-sources}) + ${gfxstream-platform-sources}) if (WIN32) set(gfxstream-base-platform-deps "") @@ -72,8 +60,7 @@ target_link_libraries( PRIVATE lz4 perfetto-tracing-only - ${gfxstream-base-platform-deps} - ${gfxstream-base-metrics-link-libraries}) + ${gfxstream-base-platform-deps}) target_include_directories( gfxstream-base PUBLIC ${GFXSTREAM_REPO_ROOT}) diff --git a/base/ThreadPool.h b/base/ThreadPool.h index 9e4aabca..c985c03a 100644 --- a/base/ThreadPool.h +++ b/base/ThreadPool.h @@ -14,17 +14,19 @@ #pragma once -#include "base/Compiler.h" -#include "base/Optional.h" -#include "base/System.h" -#include "base/WorkerThread.h" - #include <atomic> +#include <cstdint> #include <functional> #include <memory> +#include <type_traits> #include <utility> #include <vector> +#include "base/Compiler.h" +#include "base/Optional.h" +#include "base/System.h" +#include "base/WorkerThread.h" + // // ThreadPool<Item> - a simple collection of worker threads to process enqueued // items on multiple cores. @@ -58,27 +60,51 @@ namespace android { namespace base { +using ThreadPoolWorkerId = uint32_t; + template <class ItemT> class ThreadPool { DISALLOW_COPY_AND_ASSIGN(ThreadPool); public: using Item = ItemT; - using Worker = WorkerThread<Optional<Item>>; - using Processor = std::function<void(Item&&)>; - - ThreadPool(int threads, Processor&& processor) - : mProcessor(std::move(processor)) { + using WorkerId = ThreadPoolWorkerId; + using Processor = std::function<void(Item&&, WorkerId)>; + + private: + struct Command { + Item mItem; + WorkerId mWorkerId; + + Command(Item&& item, WorkerId workerId) : mItem(std::move(item)), mWorkerId(workerId) {} + DISALLOW_COPY_AND_ASSIGN(Command); + Command(Command&&) = default; + }; + using Worker = WorkerThread<Optional<Command>>; + + public: + // Fn is the type of the processor, it can either have 2 parameters: 1 for the Item, 1 for the + // WorkerId, or have only 1 Item parameter. + template <class Fn, typename = std::enable_if_t<std::is_invocable_v<Fn, Item, WorkerId> || + std::is_invocable_v<Fn, Item>>> + ThreadPool(int threads, Fn&& processor) : mProcessor() { + if constexpr (std::is_invocable_v<Fn, Item, WorkerId>) { + mProcessor = std::move(processor); + } else if constexpr (std::is_invocable_v<Fn, Item>) { + using namespace std::placeholders; + mProcessor = std::bind(std::move(processor), _1); + } if (threads < 1) { threads = android::base::getCpuCoreCount(); } mWorkers = std::vector<Optional<Worker>>(threads); for (auto& workerPtr : mWorkers) { - workerPtr.emplace([this](Optional<Item>&& item) { - if (!item) { + workerPtr.emplace([this](Optional<Command>&& commandOpt) { + if (!commandOpt) { return Worker::Result::Stop; } - mProcessor(std::move(item.value())); + Command command = std::move(commandOpt.value()); + mProcessor(std::move(command.mItem), command.mWorkerId); return Worker::Result::Continue; }); } @@ -123,47 +149,24 @@ public: for (;;) { int currentIndex = mNextWorkerIndex.fetch_add(1, std::memory_order_relaxed); - auto& workerPtr = mWorkers[currentIndex % mWorkers.size()]; - if (workerPtr) { - workerPtr->enqueue(std::move(item)); - break; - } - } - } - - void enqueueIndexed(const Item& item) { - for (;;) { - int currentIndex = - mNextWorkerIndex.fetch_add(1, std::memory_order_relaxed); int workerIndex = currentIndex % mWorkers.size(); auto& workerPtr = mWorkers[workerIndex]; if (workerPtr) { - Item itemCopy = item; - itemCopy.setIndex(workerIndex); - workerPtr->enqueue(std::move(itemCopy)); + Command command(std::forward<Item>(item), workerIndex); + workerPtr->enqueue(std::move(command)); break; } } } - // Runs the same Item for all workers. - void broadcast(const Item& item) { - for (auto workerOpt : mWorkers) { - if (!workerOpt) continue; - Item itemCopy = item; - workerOpt->enqueue(std::move(itemCopy)); - } - } - - // broadcast(), except the Item type must support a method to expose the - // worker id. - void broadcastIndexed(const Item& item) { + // The itemFactory will be called multiple times to generate one item for each worker thread. + template <class Fn, typename = std::enable_if_t<std::is_invocable_r_v<Item, Fn>>> + void broadcast(Fn&& itemFactory) { int i = 0; for (auto& workerOpt : mWorkers) { if (!workerOpt) continue; - Item itemCopy = item; - itemCopy.setIndex(i); - workerOpt->enqueue(std::move(itemCopy)); + Command command(std::move(itemFactory()), i); + workerOpt->enqueue(std::move(command)); ++i; } } diff --git a/host-common/CMakeLists.txt b/host-common/CMakeLists.txt index c3d62412..423d8fd0 100644 --- a/host-common/CMakeLists.txt +++ b/host-common/CMakeLists.txt @@ -10,16 +10,32 @@ else() opengl/NativeGpuInfo_linux.cpp) endif() +if (RECORDER_DELEGATE_LIB) + set(gfxstream-base-metrics-sources + ../base/MetricsRecorderDelegate.cpp) + set(gfxstream-base-metrics-link-libraries + recorder_delegate_lib) +else() + set(gfxstream-base-metrics-sources + ../base/MetricsNoOp.cpp) + set(gfxstream-base-metrics-link-libraries) +endif() + # TODO(gregschlom) move this to base add_library(logging-base STATIC logging.cpp - GfxstreamFatalError.cpp) + GfxstreamFatalError.cpp + ${gfxstream-base-metrics-sources}) target_include_directories( logging-base PRIVATE ${GFXSTREAM_REPO_ROOT} ${GFXSTREAM_REPO_ROOT}/include) +target_link_libraries( + logging-base + PRIVATE + ${gfxstream-base-metrics-link-libraries}) add_library( gfxstream-host-common diff --git a/stream-servers/CMakeLists.txt b/stream-servers/CMakeLists.txt index 0bf771cb..d7c8cfd1 100644 --- a/stream-servers/CMakeLists.txt +++ b/stream-servers/CMakeLists.txt @@ -220,6 +220,7 @@ add_executable( tests/DisplayVk_unittest.cpp tests/VirtioGpuTimelines_unittest.cpp vulkan/vk_util_unittest.cpp + vulkan/VkQsriTimeline_unittest.cpp ) target_link_libraries( Vulkan_unittests diff --git a/stream-servers/FrameBuffer.cpp b/stream-servers/FrameBuffer.cpp index f39b3e10..26d03e9a 100644 --- a/stream-servers/FrameBuffer.cpp +++ b/stream-servers/FrameBuffer.cpp @@ -998,15 +998,15 @@ WorkerProcessingResult FrameBuffer::postWorkerFunc(Post& post) { post.composeBuffer.size(), std::move(post.composeCallback)); } else { - auto composeCallback = - std::make_shared<Post::ComposeCallback>( - [composeCallback = std::move(post.composeCallback)] - (std::shared_future<void> waitForGpu) { - SyncThread::get()->triggerGeneral( - [composeCallback = std::move(composeCallback), waitForGpu]{ - (*composeCallback)(waitForGpu); - }); - }); + auto composeCallback = std::make_shared<Post::ComposeCallback>( + [composeCallback = + std::move(post.composeCallback)](std::shared_future<void> waitForGpu) { + SyncThread::get()->triggerGeneral( + [composeCallback = std::move(composeCallback), waitForGpu] { + (*composeCallback)(waitForGpu); + }, + "Wait for host composition"); + }); m_postWorker->compose( (ComposeDevice_v2*)post.composeBuffer.data(), post.composeBuffer.size(), std::move(composeCallback)); diff --git a/stream-servers/SyncThread.cpp b/stream-servers/SyncThread.cpp index 548fcfd8..fbad448f 100644 --- a/stream-servers/SyncThread.cpp +++ b/stream-servers/SyncThread.cpp @@ -98,13 +98,10 @@ static const uint64_t kDefaultTimeoutNsecs = 5ULL * 1000ULL * 1000ULL * 1000ULL; SyncThread::SyncThread(bool noGL) : android::base::Thread(android::base::ThreadFlags::MaskSignals, 512 * 1024), - mWorkerThreadPool(kNumWorkerThreads, [this](SyncThreadCmd&& cmd) { doSyncThreadCmd(&cmd); }), - mSignalPresentCompleteWorkerThreadPool( - kNumWorkerThreads, [this](SyncThreadCmd&& cmd) { doSyncThreadCmd(&cmd); }), + mWorkerThreadPool(kNumWorkerThreads, doSyncThreadCmd), mNoGL(noGL) { this->start(); mWorkerThreadPool.start(); - mSignalPresentCompleteWorkerThreadPool.start(); if (!noGL) { initSyncEGLContext(); } @@ -116,106 +113,97 @@ SyncThread::~SyncThread() { void SyncThread::triggerWait(FenceSync* fenceSync, uint64_t timeline) { - DPRINT("fenceSyncInfo=0x%llx timeline=0x%lx ...", - fenceSync, timeline); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_WAIT; - to_send.fenceSync = fenceSync; - to_send.timeline = timeline; - DPRINT("opcode=%u", to_send.opCode); - sendAsync(to_send); - DPRINT("exit"); + std::stringstream ss; + ss << "triggerWait fenceSyncInfo=0x" << std::hex << reinterpret_cast<uintptr_t>(fenceSync) + << " timeline=0x" << std::hex << timeline; + sendAsync( + [fenceSync, timeline, this](WorkerId) { + doSyncWait(fenceSync, [timeline] { + DPRINT("wait done (with fence), use goldfish sync timeline inc"); + emugl::emugl_sync_timeline_inc(timeline, kTimelineInterval); + }); + }, + ss.str()); } void SyncThread::triggerWaitVk(VkFence vkFence, uint64_t timeline) { - DPRINT("fenceSyncInfo=0x%llx timeline=0x%lx ...", fenceSync, timeline); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_WAIT_VK; - to_send.vkFence = vkFence; - to_send.timeline = timeline; - DPRINT("opcode=%u", to_send.opCode); - sendAsync(to_send); - DPRINT("exit"); + std::stringstream ss; + ss << "triggerWaitVk vkFence=0x" << std::hex << reinterpret_cast<uintptr_t>(vkFence) + << " timeline=0x" << std::hex << timeline; + sendAsync( + [vkFence, timeline](WorkerId) { + doSyncWaitVk(vkFence, [timeline] { + DPRINT("vk wait done, use goldfish sync timeline inc"); + emugl::emugl_sync_timeline_inc(timeline, kTimelineInterval); + }); + }, + ss.str()); } void SyncThread::triggerBlockedWaitNoTimeline(FenceSync* fenceSync) { - DPRINT("fenceSyncInfo=0x%llx ...", fenceSync); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_BLOCKED_WAIT_NO_TIMELINE; - to_send.fenceSync = fenceSync; - DPRINT("opcode=%u", to_send.opCode); - sendAndWaitForResult(to_send); - DPRINT("exit"); + std::stringstream ss; + ss << "triggerBlockedWaitNoTimeline fenceSyncInfo=0x" << std::hex + << reinterpret_cast<uintptr_t>(fenceSync); + sendAndWaitForResult( + [fenceSync, this](WorkerId) { + doSyncWait(fenceSync, std::function<void()>()); + return 0; + }, + ss.str()); } void SyncThread::triggerWaitWithCompletionCallback(FenceSync* fenceSync, FenceCompletionCallback cb) { - DPRINT("fenceSyncInfo=0x%llx ...", fenceSync); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_WAIT; - to_send.fenceSync = fenceSync; - to_send.useFenceCompletionCallback = true; - to_send.fenceCompletionCallback = cb; - DPRINT("opcode=%u", to_send.opCode); - sendAsync(to_send); - DPRINT("exit"); + std::stringstream ss; + ss << "triggerWaitWithCompletionCallback fenceSyncInfo=0x" << std::hex + << reinterpret_cast<uintptr_t>(fenceSync); + sendAsync( + [fenceSync, cb = std::move(cb), this](WorkerId) { doSyncWait(fenceSync, std::move(cb)); }, + ss.str()); } void SyncThread::triggerWaitVkWithCompletionCallback(VkFence vkFence, FenceCompletionCallback cb) { - DPRINT("fenceSyncInfo=0x%llx ...", fenceSync); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_WAIT_VK; - to_send.vkFence = vkFence; - to_send.useFenceCompletionCallback = true; - to_send.fenceCompletionCallback = cb; - DPRINT("opcode=%u", to_send.opCode); - sendAsync(to_send); - DPRINT("exit"); + std::stringstream ss; + ss << "triggerWaitVkWithCompletionCallback vkFence=0x" << std::hex + << reinterpret_cast<uintptr_t>(vkFence); + sendAsync([vkFence, cb = std::move(cb)](WorkerId) { doSyncWaitVk(vkFence, std::move(cb)); }, + ss.str()); } void SyncThread::triggerWaitVkQsriWithCompletionCallback(VkImage vkImage, FenceCompletionCallback cb) { - DPRINT("fenceSyncInfo=0x%llx ...", fenceSync); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_WAIT_VK_QSRI; - to_send.vkImage = vkImage; - to_send.useFenceCompletionCallback = true; - to_send.fenceCompletionCallback = cb; - DPRINT("opcode=%u", to_send.opCode); - sendAsync(to_send); - DPRINT("exit"); + std::stringstream ss; + ss << "triggerWaitVkQsriWithCompletionCallback vkImage=0x" + << reinterpret_cast<uintptr_t>(vkImage); + sendAsync( + [vkImage, cb = std::move(cb)](WorkerId) { + auto decoder = goldfish_vk::VkDecoderGlobalState::get(); + decoder->registerQsriCallback(vkImage, std::move(cb)); + }, + ss.str()); } -void SyncThread::triggerWaitVkQsriBlockedNoTimeline(VkImage vkImage) { - DPRINT("fenceSyncInfo=0x%llx ...", fenceSync); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_WAIT_VK_QSRI; - to_send.vkImage = vkImage; - DPRINT("opcode=%u", to_send.opCode); - sendAndWaitForResult(to_send); - DPRINT("exit"); -} - -void SyncThread::triggerGeneral(FenceCompletionCallback cb) { - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_GENERAL; - to_send.useFenceCompletionCallback = true; - to_send.fenceCompletionCallback = cb; - sendAsync(to_send); -} - -void SyncThread::triggerSignalVkPresentComplete(FenceCompletionCallback cb) { - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_GENERAL; - to_send.useFenceCompletionCallback = true; - to_send.fenceCompletionCallback = cb; - mSignalPresentCompleteWorkerThreadPool.enqueue(std::move(to_send)); +void SyncThread::triggerGeneral(FenceCompletionCallback cb, std::string description) { + std::stringstream ss; + ss << "triggerGeneral: " << description; + sendAsync(std::bind(std::move(cb)), ss.str()); } void SyncThread::cleanup() { - DPRINT("enter"); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_EXIT; - sendAndWaitForResult(to_send); + sendAndWaitForResult( + [this](WorkerId workerId) { + if (!mNoGL) { + const EGLDispatch* egl = emugl::LazyLoadedEGLDispatch::get(); + + egl->eglMakeCurrent(mDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + + egl->eglDestroyContext(mDisplay, mContext[workerId]); + egl->eglDestroySurface(mDisplay, mSurface[workerId]); + mContext[workerId] = EGL_NO_CONTEXT; + mSurface[workerId] = EGL_NO_SURFACE; + } + return 0; + }, + "cleanup"); DPRINT("signal"); mLock.lock(); mExiting = true; @@ -230,15 +218,6 @@ void SyncThread::cleanup() { // Private methods below//////////////////////////////////////////////////////// -void SyncThread::initSyncEGLContext() { - DPRINT("enter"); - SyncThreadCmd to_send; - to_send.opCode = SYNC_THREAD_EGL_INIT; - mWorkerThreadPool.broadcastIndexed(to_send); - mWorkerThreadPool.waitAllItems(); - DPRINT("exit"); -} - intptr_t SyncThread::main() { DPRINT("in sync thread"); mLock.lock(); @@ -246,96 +225,96 @@ intptr_t SyncThread::main() { mWorkerThreadPool.done(); mWorkerThreadPool.join(); - mSignalPresentCompleteWorkerThreadPool.done(); - mSignalPresentCompleteWorkerThreadPool.join(); DPRINT("exited sync thread"); return 0; } -int SyncThread::sendAndWaitForResult(SyncThreadCmd& cmd) { - DPRINT("send with opcode=%d", cmd.opCode); - android::base::Lock lock; - android::base::ConditionVariable cond; - android::base::Optional<int> result = android::base::kNullopt; - cmd.lock = &lock; - cmd.cond = &cond; - cmd.result = &result; - - lock.lock(); - mWorkerThreadPool.enqueue(std::move(cmd)); - cond.wait(&lock, [&result] { return result.hasValue(); }); - - DPRINT("result=%d", *result); - int final_result = *result; - - // Clear these to prevent dangling pointers after we return. - cmd.lock = nullptr; - cmd.cond = nullptr; - cmd.result = nullptr; - return final_result; -} - -void SyncThread::sendAsync(SyncThreadCmd& cmd) { - DPRINT("send with opcode=%u fenceSyncInfo=0x%llx", - cmd.opCode, cmd.fenceSync); - mWorkerThreadPool.enqueue(std::move(cmd)); -} - -void SyncThread::doSyncEGLContextInit(SyncThreadCmd* cmd) { - DPRINT("for worker id: %d", cmd->workerId); - // We shouldn't initialize EGL context, when SyncThread is initialized - // without GL enabled. - SYNC_THREAD_CHECK(!mNoGL); - - const EGLDispatch* egl = emugl::LazyLoadedEGLDispatch::get(); - - mDisplay = egl->eglGetDisplay(EGL_DEFAULT_DISPLAY); - int eglMaj, eglMin; - egl->eglInitialize(mDisplay, &eglMaj , &eglMin); - - const EGLint configAttribs[] = { - EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, - EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, - EGL_RED_SIZE, 8, - EGL_GREEN_SIZE, 8, - EGL_BLUE_SIZE, 8, - EGL_NONE, +int SyncThread::sendAndWaitForResult(std::function<int(WorkerId)> job, std::string description) { + DPRINT("sendAndWaitForResult task(%s)", description.c_str()); + std::packaged_task<int(WorkerId)> task(std::move(job)); + std::future<int> resFuture = task.get_future(); + Command command = { + .mTask = std::move(task), + .mDescription = std::move(description), }; - EGLint nConfigs; - EGLConfig config; - - egl->eglChooseConfig(mDisplay, configAttribs, &config, 1, &nConfigs); - - const EGLint pbufferAttribs[] = { - EGL_WIDTH, 1, - EGL_HEIGHT, 1, - EGL_NONE, - }; - - mSurface[cmd->workerId] = - egl->eglCreatePbufferSurface(mDisplay, config, pbufferAttribs); + mWorkerThreadPool.enqueue(std::move(command)); + auto res = resFuture.get(); + DPRINT("exit"); + return res; +} - const EGLint contextAttribs[] = { EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE }; - mContext[cmd->workerId] = egl->eglCreateContext(mDisplay, config, EGL_NO_CONTEXT, contextAttribs); +void SyncThread::sendAsync(std::function<void(WorkerId)> job, std::string description) { + DPRINT("send task(%s)", description.c_str()); + mWorkerThreadPool.enqueue(Command{ + .mTask = + std::packaged_task<int(WorkerId)>([job = std::move(job)](WorkerId workerId) mutable { + job(workerId); + return 0; + }), + .mDescription = std::move(description), + }); + DPRINT("exit"); +} - egl->eglMakeCurrent(mDisplay, mSurface[cmd->workerId], mSurface[cmd->workerId], mContext[cmd->workerId]); +void SyncThread::initSyncEGLContext() { + mWorkerThreadPool.broadcast([this] { + return Command{ + .mTask = std::packaged_task<int(WorkerId)>([this](WorkerId workerId) { + DPRINT("for worker id: %d", workerId); + // We shouldn't initialize EGL context, when SyncThread is initialized + // without GL enabled. + SYNC_THREAD_CHECK(!mNoGL); + + const EGLDispatch* egl = emugl::LazyLoadedEGLDispatch::get(); + + mDisplay = egl->eglGetDisplay(EGL_DEFAULT_DISPLAY); + int eglMaj, eglMin; + egl->eglInitialize(mDisplay, &eglMaj, &eglMin); + + const EGLint configAttribs[] = { + EGL_SURFACE_TYPE, + EGL_PBUFFER_BIT, + EGL_RENDERABLE_TYPE, + EGL_OPENGL_ES2_BIT, + EGL_RED_SIZE, + 8, + EGL_GREEN_SIZE, + 8, + EGL_BLUE_SIZE, + 8, + EGL_NONE, + }; + + EGLint nConfigs; + EGLConfig config; + + egl->eglChooseConfig(mDisplay, configAttribs, &config, 1, &nConfigs); + + const EGLint pbufferAttribs[] = { + EGL_WIDTH, 1, EGL_HEIGHT, 1, EGL_NONE, + }; + + mSurface[workerId] = egl->eglCreatePbufferSurface(mDisplay, config, pbufferAttribs); + + const EGLint contextAttribs[] = {EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE}; + mContext[workerId] = + egl->eglCreateContext(mDisplay, config, EGL_NO_CONTEXT, contextAttribs); + + egl->eglMakeCurrent(mDisplay, mSurface[workerId], mSurface[workerId], + mContext[workerId]); + return 0; + }), + .mDescription = "init sync EGL context", + }; + }); } -void SyncThread::doSyncWait(SyncThreadCmd* cmd) { +void SyncThread::doSyncWait(FenceSync* fenceSync, std::function<void()> onComplete) { DPRINT("enter"); - FenceSync* fenceSync = - FenceSync::getFromHandle((uint64_t)(uintptr_t)cmd->fenceSync); - - if (!fenceSync) { - if (cmd->useFenceCompletionCallback) { - DPRINT("wait done (null fence), use completion callback"); - cmd->fenceCompletionCallback(); - } else { - DPRINT("wait done (null fence), use sync timeline inc"); - emugl::emugl_sync_timeline_inc(cmd->timeline, kTimelineInterval); - } + if (!FenceSync::getFromHandle((uint64_t)(uintptr_t)fenceSync) && onComplete) { + onComplete(); return; } // We shouldn't use FenceSync to wait, when SyncThread is initialized @@ -344,16 +323,18 @@ void SyncThread::doSyncWait(SyncThreadCmd* cmd) { EGLint wait_result = 0x0; - DPRINT("wait on sync obj: %p", cmd->fenceSync); - wait_result = cmd->fenceSync->wait(kDefaultTimeoutNsecs); + DPRINT("wait on sync obj: %p", fenceSync); + wait_result = fenceSync->wait(kDefaultTimeoutNsecs); DPRINT("done waiting, with wait result=0x%x. " "increment timeline (and signal fence)", wait_result); if (wait_result != EGL_CONDITION_SATISFIED_KHR) { - DPRINT("error: eglClientWaitSync abnormal exit 0x%x. sync handle 0x%llx\n", - wait_result, (unsigned long long)cmd->fenceSync); + EGLint error = s_egl.eglGetError(); + DPRINT("error: eglClientWaitSync abnormal exit 0x%x. sync handle 0x%llx. egl error = %#x\n", + wait_result, (unsigned long long)fenceSync, error); + (void)error; } DPRINT("issue timeline increment"); @@ -382,12 +363,8 @@ void SyncThread::doSyncWait(SyncThreadCmd* cmd) { // incrementing the timeline means that the app's rendering freezes. // So, despite the faulty GPU driver, not incrementing is too heavyweight a response. - if (cmd->useFenceCompletionCallback) { - DPRINT("wait done (with fence), use completion callback"); - cmd->fenceCompletionCallback(); - } else { - DPRINT("wait done (with fence), use goldfish sync timeline inc"); - emugl::emugl_sync_timeline_inc(cmd->timeline, kTimelineInterval); + if (onComplete) { + onComplete(); } FenceSync::incrementTimelineAndDeleteOldFences(); @@ -396,15 +373,15 @@ void SyncThread::doSyncWait(SyncThreadCmd* cmd) { DPRINT("exit"); } -int SyncThread::doSyncWaitVk(SyncThreadCmd* cmd) { +int SyncThread::doSyncWaitVk(VkFence vkFence, std::function<void()> onComplete) { DPRINT("enter"); auto decoder = goldfish_vk::VkDecoderGlobalState::get(); - auto result = decoder->waitForFence(cmd->vkFence, kDefaultTimeoutNsecs); + auto result = decoder->waitForFence(vkFence, kDefaultTimeoutNsecs); if (result == VK_TIMEOUT) { - DPRINT("SYNC_WAIT_VK timeout: vkFence=%p", cmd->vkFence); + DPRINT("SYNC_WAIT_VK timeout: vkFence=%p", vkFence); } else if (result != VK_SUCCESS) { - DPRINT("SYNC_WAIT_VK error: %d vkFence=%p", result, cmd->vkFence); + DPRINT("SYNC_WAIT_VK error: %d vkFence=%p", result, vkFence); } DPRINT("issue timeline increment"); @@ -412,12 +389,8 @@ int SyncThread::doSyncWaitVk(SyncThreadCmd* cmd) { // We always unconditionally increment timeline at this point, even // if the call to vkWaitForFences returned abnormally. // See comments in |doSyncWait| about the rationale. - if (cmd->useFenceCompletionCallback) { - DPRINT("vk wait done, use completion callback"); - cmd->fenceCompletionCallback(); - } else { - DPRINT("vk wait done, use goldfish sync timeline inc"); - emugl::emugl_sync_timeline_inc(cmd->timeline, kTimelineInterval); + if (onComplete) { + onComplete(); } DPRINT("done timeline increment"); @@ -426,148 +399,9 @@ int SyncThread::doSyncWaitVk(SyncThreadCmd* cmd) { return result; } -int SyncThread::doSyncWaitVkQsri(SyncThreadCmd* cmd) { - DPRINT("enter"); - - auto decoder = goldfish_vk::VkDecoderGlobalState::get(); - DPRINT("doSyncWaitVkQsri for image %p", cmd->vkImage); - auto result = decoder->waitQsri(cmd->vkImage, kDefaultTimeoutNsecs); - DPRINT("doSyncWaitVkQsri for image %p (done, do signal/callback)", cmd->vkImage); - if (result == VK_TIMEOUT) { - fprintf(stderr, "SyncThread::%s: SYNC_WAIT_VK_QSRI timeout: vkImage=%p\n", - __func__, cmd->vkImage); - } else if (result != VK_SUCCESS) { - fprintf(stderr, "SyncThread::%s: SYNC_WAIT_VK_QSRI error: %d vkImage=%p\n", - __func__, result, cmd->vkImage); - } - - DPRINT("issue timeline increment"); - - // We always unconditionally increment timeline at this point, even - // if the call to vkWaitForFences returned abnormally. - // See comments in |doSyncWait| about the rationale. - if (cmd->useFenceCompletionCallback) { - DPRINT("wait done, use completion callback"); - cmd->fenceCompletionCallback(); - } else { - DPRINT("wait done, use goldfish sync timeline inc"); - emugl::emugl_sync_timeline_inc(cmd->timeline, kTimelineInterval); - } - - DPRINT("done timeline increment"); - - DPRINT("exit"); - return result; -} - -int SyncThread::doSyncGeneral(SyncThreadCmd* cmd) { - DPRINT("enter"); - if (cmd->useFenceCompletionCallback) { - DPRINT("wait done, use completion callback"); - cmd->fenceCompletionCallback(); - } else { - DPRINT("warning, completion callback not provided in general op!"); - } - - return 0; -} - -void SyncThread::doSyncBlockedWaitNoTimeline(SyncThreadCmd* cmd) { - DPRINT("enter"); - - FenceSync* fenceSync = - FenceSync::getFromHandle((uint64_t)(uintptr_t)cmd->fenceSync); - - if (!fenceSync) { - return; - } - - // We shouldn't use FenceSync to wait, when SyncThread is initialized - // without GL enabled, because FenceSync uses EGL/GLES. - SYNC_THREAD_CHECK(!mNoGL); - - EGLint wait_result = 0x0; - - DPRINT("wait on sync obj: %p", cmd->fenceSync); - wait_result = cmd->fenceSync->wait(kDefaultTimeoutNsecs); - - DPRINT("done waiting, with wait result=0x%x. " - "increment timeline (and signal fence)", - wait_result); - - if (wait_result != EGL_CONDITION_SATISFIED_KHR) { - EGLint error = s_egl.eglGetError(); - fprintf(stderr, "error: eglClientWaitSync abnormal exit 0x%x %p %#x\n", - wait_result, cmd->fenceSync, error); - } - - FenceSync::incrementTimelineAndDeleteOldFences(); -} - -void SyncThread::doExit(SyncThreadCmd* cmd) { - if (!mNoGL) { - const EGLDispatch* egl = emugl::LazyLoadedEGLDispatch::get(); - - egl->eglMakeCurrent(mDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, - EGL_NO_CONTEXT); - - egl->eglDestroyContext(mDisplay, mContext[cmd->workerId]); - egl->eglDestroySurface(mDisplay, mSurface[cmd->workerId]); - mContext[cmd->workerId] = EGL_NO_CONTEXT; - mSurface[cmd->workerId] = EGL_NO_SURFACE; - } -} - -int SyncThread::doSyncThreadCmd(SyncThreadCmd* cmd) { -#if DEBUG - thread_local static auto threadId = android::base::getCurrentThreadId(); - thread_local static size_t numCommands = 0U; - DPRINT("threadId = %lu numCommands = %lu cmd = %p", threadId, ++numCommands, - cmd); -#endif // DEBUG - - int result = 0; - switch (cmd->opCode) { - case SYNC_THREAD_EGL_INIT: - DPRINT("exec SYNC_THREAD_EGL_INIT"); - doSyncEGLContextInit(cmd); - break; - case SYNC_THREAD_WAIT: - DPRINT("exec SYNC_THREAD_WAIT"); - doSyncWait(cmd); - break; - case SYNC_THREAD_WAIT_VK: - DPRINT("exec SYNC_THREAD_WAIT_VK"); - result = doSyncWaitVk(cmd); - break; - case SYNC_THREAD_WAIT_VK_QSRI: - DPRINT("exec SYNC_THREAD_WAIT_VK_QSRI"); - result = doSyncWaitVkQsri(cmd); - break; - case SYNC_THREAD_GENERAL: - DPRINT("exec SYNC_THREAD_GENERAL"); - result = doSyncGeneral(cmd); - break; - case SYNC_THREAD_EXIT: - DPRINT("exec SYNC_THREAD_EXIT"); - doExit(cmd); - break; - case SYNC_THREAD_BLOCKED_WAIT_NO_TIMELINE: - DPRINT("exec SYNC_THREAD_BLOCKED_WAIT_NO_TIMELINE"); - doSyncBlockedWaitNoTimeline(cmd); - break; - } - - bool need_reply = cmd->lock != nullptr && cmd->cond != nullptr; - if (need_reply) { - cmd->lock->lock(); - *cmd->result = android::base::makeOptional(result); - cmd->cond->signalAndUnlock(cmd->lock); - } - return result; -} - /* static */ +void SyncThread::doSyncThreadCmd(Command&& command, WorkerId workerId) { command.mTask(workerId); } + SyncThread* SyncThread::get() { auto res = sGlobalSyncThread()->syncThreadPtr(); SYNC_THREAD_CHECK(res); diff --git a/stream-servers/SyncThread.h b/stream-servers/SyncThread.h index 24f6f00e..0adbb0f3 100644 --- a/stream-servers/SyncThread.h +++ b/stream-servers/SyncThread.h @@ -16,70 +16,29 @@ #pragma once -#include "FenceSync.h" - #include <EGL/egl.h> #include <GLES2/gl2.h> #include <GLES2/gl2ext.h> -#include "base/Optional.h" +#include <functional> +#include <future> +#include <string> +#include <type_traits> + +#include "FenceSync.h" #include "base/ConditionVariable.h" #include "base/Lock.h" +#include "base/MessageChannel.h" +#include "base/Optional.h" #include "base/Thread.h" #include "base/ThreadPool.h" -#include "base/MessageChannel.h" -#include "vulkan/VkDecoderGlobalState.h" #include "virtio_gpu_ops.h" +#include "vulkan/VkDecoderGlobalState.h" // SyncThread/////////////////////////////////////////////////////////////////// // The purpose of SyncThread is to track sync device timelines and give out + // signal FD's that correspond to the completion of host-side GL fence commands. -// We communicate with the sync thread in 3 ways: -enum SyncThreadOpCode { - // Nonblocking command to initialize sync thread's EGL contexts for sync - // operations - SYNC_THREAD_EGL_INIT = 0, - // Nonblocking command to wait on a given FenceSync object - // and timeline handle. - // A fence FD object in the guest is signaled. - SYNC_THREAD_WAIT = 1, - // Blocking command to clean up and exit the sync thread. - SYNC_THREAD_EXIT = 2, - // Blocking command to wait on a given FenceSync object. - // No timeline handling is done. - SYNC_THREAD_BLOCKED_WAIT_NO_TIMELINE = 3, - // Nonblocking command to wait on a given VkFence - // and timeline handle. - // A fence FD object / Zircon eventpair in the guest is signaled. - SYNC_THREAD_WAIT_VK = 4, - // Command to wait on the presentation the given VkImage. - SYNC_THREAD_WAIT_VK_QSRI = 5, - // Command that consists only of a callback. - SYNC_THREAD_GENERAL = 6, -}; - -struct SyncThreadCmd { - // For use with initialization in multiple thread pools. - int workerId = 0; - // For use with ThreadPool::broadcastIndexed - void setIndex(int id) { workerId = id; } - - SyncThreadOpCode opCode = SYNC_THREAD_EGL_INIT; - union { - FenceSync* fenceSync = nullptr; - VkFence vkFence; - VkImage vkImage; - }; - uint64_t timeline = 0; - - android::base::Lock* lock = nullptr; - android::base::ConditionVariable* cond = nullptr; - android::base::Optional<int>* result = nullptr; - - bool useFenceCompletionCallback = false; - FenceCompletionCallback fenceCompletionCallback;; -}; struct RenderThreadInfo; class SyncThread : public android::base::Thread { @@ -117,12 +76,7 @@ public: void triggerWaitWithCompletionCallback(FenceSync* fenceSync, FenceCompletionCallback); void triggerWaitVkWithCompletionCallback(VkFence fenceHandle, FenceCompletionCallback); void triggerWaitVkQsriWithCompletionCallback(VkImage image, FenceCompletionCallback); - void triggerWaitVkQsriBlockedNoTimeline(VkImage image); - // Similar to triggerGeneral, but will dispatch the task to the dedicated - // signalPresentCompleteWorkerThreadPool. - void triggerSignalVkPresentComplete(FenceCompletionCallback); - - void triggerGeneral(FenceCompletionCallback); + void triggerGeneral(FenceCompletionCallback, std::string description); // |cleanup|: for use with destructors and other cleanup functions. // it destroys the sync context and exits the sync thread. @@ -141,6 +95,13 @@ public: static void destroy(); private: + using WorkerId = android::base::ThreadPoolWorkerId; + struct Command { + std::packaged_task<int(WorkerId)> mTask; + std::string mDescription; + }; + using ThreadPool = android::base::ThreadPool<Command>; + // |initSyncContext| creates an EGL context expressly for calling // eglClientWaitSyncKHR in the processing caused by |triggerWait|. // This is used by the constructor only. It is non-blocking. @@ -151,25 +112,19 @@ public: // It keeps the workers runner until |mExiting| is set. virtual intptr_t main() override final; - // These two functions are used to communicate with the sync thread - // from another thread: - // - |sendAndWaitForResult| issues |cmd| to the sync thread, - // and blocks until it receives the result of the command. - // - |sendAsync| issues |cmd| to the sync thread and does not - // wait for the result, returning immediately after. - int sendAndWaitForResult(SyncThreadCmd& cmd); - void sendAsync(SyncThreadCmd& cmd); - - // |doSyncThreadCmd| and related functions below - // execute the actual commands. These run on the sync thread. - int doSyncThreadCmd(SyncThreadCmd* cmd); - void doSyncEGLContextInit(SyncThreadCmd* cmd); - void doSyncWait(SyncThreadCmd* cmd); - int doSyncWaitVk(SyncThreadCmd* cmd); - int doSyncWaitVkQsri(SyncThreadCmd* cmd); - int doSyncGeneral(SyncThreadCmd* cmd); - void doSyncBlockedWaitNoTimeline(SyncThreadCmd* cmd); - void doExit(SyncThreadCmd* cmd); + // These two functions are used to communicate with the sync thread from another thread: + // - |sendAndWaitForResult| issues |job| to the sync thread, and blocks until it receives the + // result of the job. + // - |sendAsync| issues |job| to the sync thread and does not wait for the result, returning + // immediately after. + int sendAndWaitForResult(std::function<int(WorkerId)> job, std::string description); + void sendAsync(std::function<void(WorkerId)> job, std::string description); + + // |doSyncThreadCmd| execute the actual task. These run on the sync thread. + static void doSyncThreadCmd(Command&& command, ThreadPool::WorkerId); + + void doSyncWait(FenceSync* fenceSync, std::function<void()> onComplete); + static int doSyncWaitVk(VkFence, std::function<void()> onComplete); // EGL objects / object handles specific to // a sync thread. @@ -182,8 +137,7 @@ public: bool mExiting = false; android::base::Lock mLock; android::base::ConditionVariable mCv; - android::base::ThreadPool<SyncThreadCmd> mWorkerThreadPool; - android::base::ThreadPool<SyncThreadCmd> mSignalPresentCompleteWorkerThreadPool; + ThreadPool mWorkerThreadPool; bool mNoGL; }; diff --git a/stream-servers/vulkan/VkAndroidNativeBuffer.cpp b/stream-servers/vulkan/VkAndroidNativeBuffer.cpp index 0512caef..96b33aae 100644 --- a/stream-servers/vulkan/VkAndroidNativeBuffer.cpp +++ b/stream-servers/vulkan/VkAndroidNativeBuffer.cpp @@ -12,17 +12,19 @@ // limitations under the License. #include "VkAndroidNativeBuffer.h" -#include "cereal/common/goldfish_vk_private_defs.h" -#include "cereal/common/goldfish_vk_extension_structs.h" +#include <string.h> + +#include <future> -#include "host-common/GfxstreamFatalError.h" -#include "stream-servers/FrameBuffer.h" #include "GrallocDefs.h" +#include "SyncThread.h" #include "VkCommonOperations.h" #include "VulkanDispatch.h" -#include "SyncThread.h" - -#include <string.h> +#include "cereal/common/goldfish_vk_extension_structs.h" +#include "cereal/common/goldfish_vk_private_defs.h" +#include "host-common/GfxstreamFatalError.h" +#include "stream-servers/FrameBuffer.h" +#include "vulkan/vk_enum_string_helper.h" #define VK_ANB_ERR(fmt,...) fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, ##__VA_ARGS__); @@ -43,40 +45,58 @@ using emugl::FatalError; namespace goldfish_vk { -VkFence AndroidNativeBufferInfo::QsriWaitInfo::getFenceFromPoolLocked() { - VK_ANB_DEBUG("enter"); - - if (!vk) return VK_NULL_HANDLE; +AndroidNativeBufferInfo::QsriWaitFencePool::QsriWaitFencePool(VulkanDispatch* vk, VkDevice device) + : mVk(vk), mDevice(device) {} - if (fencePool.empty()) { - VkFence fence; +VkFence AndroidNativeBufferInfo::QsriWaitFencePool::getFenceFromPool() { + VK_ANB_DEBUG("enter"); + AutoLock lock(mLock); + VkFence fence = VK_NULL_HANDLE; + if (mAvailableFences.empty()) { VkFenceCreateInfo fenceCreateInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, 0, 0, }; - vk->vkCreateFence(device, &fenceCreateInfo, nullptr, &fence); + mVk->vkCreateFence(mDevice, &fenceCreateInfo, nullptr, &fence); VK_ANB_DEBUG("no fences in pool, created %p", fence); - return fence; } else { - VkFence res = fencePool.back(); - fencePool.pop_back(); - vk->vkResetFences(device, 1, &res); - VK_ANB_DEBUG("existing fence in pool: %p. also reset the fence", res); - return res; + fence = mAvailableFences.back(); + mAvailableFences.pop_back(); + VkResult res = mVk->vkResetFences(mDevice, 1, &fence); + if (res != VK_SUCCESS) { + GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER)) + << "Fail to reset Qsri VkFence: " << res << "(" << string_VkResult(res) << ")."; + } + VK_ANB_DEBUG("existing fence in pool: %p. also reset the fence", fence); } + mUsedFences.emplace(fence); + VK_ANB_DEBUG("exit"); + return fence; } -AndroidNativeBufferInfo::QsriWaitInfo::~QsriWaitInfo() { +AndroidNativeBufferInfo::QsriWaitFencePool::~QsriWaitFencePool() { VK_ANB_DEBUG("enter"); - if (!vk) return; - if (!device) return; // Nothing in the fence pool is unsignaled - for (auto fence : fencePool) { + if (!mUsedFences.empty()) { + VK_ANB_ERR("%zu VkFences are still being used when destroying the Qsri fence pool.", + mUsedFences.size()); + } + for (auto fence : mAvailableFences) { VK_ANB_DEBUG("destroy fence %p", fence); - vk->vkDestroyFence(device, fence, nullptr); + mVk->vkDestroyFence(mDevice, fence, nullptr); } VK_ANB_DEBUG("exit"); } +void AndroidNativeBufferInfo::QsriWaitFencePool::returnFence(VkFence fence) { + AutoLock lock(mLock); + if (!mUsedFences.erase(fence)) { + GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER)) + << "Return an unmanaged Qsri VkFence back to the pool."; + return; + } + mAvailableFences.push_back(fence); +} + bool parseAndroidNativeBufferInfo( const VkImageCreateInfo* pCreateInfo, AndroidNativeBufferInfo* info_out) { @@ -309,6 +329,9 @@ VkResult prepareAndroidNativeBufferImage( } } + out->qsriWaitFencePool = + std::make_unique<AndroidNativeBufferInfo::QsriWaitFencePool>(out->vk, out->device); + out->qsriTimeline = std::make_unique<VkQsriTimeline>(); return VK_SUCCESS; } @@ -345,9 +368,7 @@ void teardownAndroidNativeBufferImage( anbInfo->mappedStagingPtr = nullptr; anbInfo->stagingMemory = VK_NULL_HANDLE; - AutoLock lock(anbInfo->qsriWaitInfo.lock); - anbInfo->qsriWaitInfo.presentCount = 0; - anbInfo->qsriWaitInfo.requestedPresentCount = 0; + anbInfo->qsriWaitFencePool = nullptr; } void getGralloc0Usage(VkFormat format, VkImageUsageFlags imageUsage, @@ -599,12 +620,6 @@ VkResult syncImageToColorBuffer( std::shared_ptr<AndroidNativeBufferInfo> anbInfo) { auto anbInfoPtr = anbInfo.get(); - { - AutoLock lock(anbInfo->qsriWaitInfo.lock); - VK_ANB_DEBUG_OBJ(anbInfoPtr, "ensure dispatch %p device %p", vk, anbInfo->device); - anbInfo->qsriWaitInfo.ensureDispatchAndDevice(vk, anbInfo->device); - } - auto fb = FrameBuffer::getFB(); fb->lock(); @@ -752,48 +767,44 @@ VkResult syncImageToColorBuffer( }; // TODO(kaiyili): initiate ownership transfer to DisplayVk here. - VkFence qsriFence = VK_NULL_HANDLE; - { - VK_ANB_DEBUG_OBJ(anbInfoPtr, "trying to get qsri fence"); - AutoLock lock(anbInfo->qsriWaitInfo.lock); - VK_ANB_DEBUG_OBJ(anbInfoPtr, "trying to get qsri fence (got lock)"); - qsriFence = anbInfo->qsriWaitInfo.getFenceFromPoolLocked(); - VK_ANB_DEBUG_OBJ(anbInfoPtr, "got qsri fence %p", qsriFence); - } + VkFence qsriFence = anbInfo->qsriWaitFencePool->getFenceFromPool(); AutoLock qLock(*queueLock); vk->vkQueueSubmit(queueState.queue, 1, &submitInfo, qsriFence); + auto waitForQsriFenceTask = [anbInfoPtr, anbInfo, vk, device = anbInfo->device, qsriFence] { + (void)anbInfoPtr; + VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: enter"); + VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: wait for fence %p...", qsriFence); + VkResult res = vk->vkWaitForFences(device, 1, &qsriFence, VK_FALSE, kTimeoutNs); + switch (res) { + case VK_SUCCESS: + break; + case VK_TIMEOUT: + VK_ANB_ERR("Timeout when waiting for the Qsri fence."); + break; + default: + GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER)) + << "Fail to wait for the Qsri VkFence: " << res << "(" << string_VkResult(res) + << ")."; + } + VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: wait for fence %p...(done)", qsriFence); + anbInfo->qsriWaitFencePool->returnFence(qsriFence); + }; fb->unlock(); if (anbInfo->useVulkanNativeImage) { VK_ANB_DEBUG_OBJ(anbInfoPtr, "using native image, so use sync thread to wait"); fb->setColorBufferInUse(anbInfo->colorBufferHandle, false); - VkDevice device = anbInfo->device; // Queue wait to sync thread with completion callback // Pass anbInfo by value to get a ref - SyncThread::get()->triggerSignalVkPresentComplete([anbInfoPtr, anbInfo, vk, device, - qsriFence] { - (void)anbInfoPtr; - VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: enter"); - if (qsriFence) { - VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: wait for fence %p...", qsriFence); - vk->vkWaitForFences(device, 1, &qsriFence, VK_FALSE, kTimeoutNs); - VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: wait for fence %p...(done)", qsriFence); - } - AutoLock lock(anbInfo->qsriWaitInfo.lock); - VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: return fence and signal"); - if (qsriFence) { - anbInfo->qsriWaitInfo.returnFenceLocked(qsriFence); - } - uint64_t presentCount = ++anbInfo->qsriWaitInfo.presentCount; - VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: done, present count is now %llu", (unsigned long long)presentCount); - anbInfo->qsriWaitInfo.cv.signal(); - VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: exit"); - }); + SyncThread::get()->triggerGeneral( + [waitForQsriFenceTask = std::move(waitForQsriFenceTask), anbInfo]() mutable { + waitForQsriFenceTask(); + anbInfo->qsriTimeline->signalNextPresentAndPoll(); + }, + "wait for the guest Qsri VkFence signaled"); } else { VK_ANB_DEBUG_OBJ(anbInfoPtr, "not using native image, so wait right away"); - if (qsriFence) { - vk->vkWaitForFences(anbInfo->device, 1, &qsriFence, VK_FALSE, kTimeoutNs); - } + waitForQsriFenceTask(); VkMappedMemoryRange toInvalidate = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 0, @@ -827,14 +838,7 @@ VkResult syncImageToColorBuffer( colorBufferHandle, anbInfo->mappedStagingPtr, bpp * anbInfo->extent.width * anbInfo->extent.height); - - AutoLock lock(anbInfo->qsriWaitInfo.lock); - uint64_t presentCount = ++anbInfo->qsriWaitInfo.presentCount; - VK_ANB_DEBUG_OBJ(anbInfoPtr, "done, present count is now %llu", (unsigned long long)presentCount); - anbInfo->qsriWaitInfo.cv.signal(); - if (qsriFence) { - anbInfo->qsriWaitInfo.returnFenceLocked(qsriFence); - } + anbInfo->qsriTimeline->signalNextPresentAndPoll(); } return VK_SUCCESS; diff --git a/stream-servers/vulkan/VkAndroidNativeBuffer.h b/stream-servers/vulkan/VkAndroidNativeBuffer.h index 8d0f17ee..7fb53996 100644 --- a/stream-servers/vulkan/VkAndroidNativeBuffer.h +++ b/stream-servers/vulkan/VkAndroidNativeBuffer.h @@ -13,19 +13,20 @@ // limitations under the License. #pragma once -#include "VkCommonOperations.h" - #include <vulkan/vulkan.h> -#include "base/Lock.h" -#include "base/ConditionVariable.h" -#include "cereal/common/goldfish_vk_private_defs.h" - #include <atomic> #include <deque> #include <memory> +#include <unordered_set> #include <vector> +#include "VkCommonOperations.h" +#include "VkQsriTimeline.h" +#include "base/ConditionVariable.h" +#include "base/Lock.h" +#include "cereal/common/goldfish_vk_private_defs.h" + namespace goldfish_vk { struct AndroidNativeBufferInfo; @@ -123,40 +124,27 @@ struct AndroidNativeBufferInfo { // State that is of interest when interacting with sync fds and SyncThread. // Protected by this lock and condition variable. - struct QsriWaitInfo { - android::base::Lock lock; - android::base::ConditionVariable cv; - - VulkanDispatch* vk = nullptr; - VkDevice device = VK_NULL_HANDLE; - - // A pool of vkFences for waiting (optimization so we don't keep recreating them every time). - std::vector<VkFence> fencePool; - - // How many times the image was presented via vkQueueSignalReleaseImageANDROID - // versus how many times we want it to be (for sync fd fence waiting). - // Incremented by waitQsri. - std::atomic_uint64_t requestedPresentCount = 0; - // Incremented by waitQsri if vkWaitForFences there succeeds, - // or by syncImageToColorBuffer in the non-zero-copy case. - std::atomic_uint64_t presentCount = 0; - - void ensureDispatchAndDevice(VulkanDispatch* vkIn, VkDevice deviceIn) { - vk = vkIn; - device = deviceIn; - } - - VkFence getFenceFromPoolLocked(); - - // requires fence to be signaled - void returnFenceLocked(VkFence fence) { - fencePool.push_back(fence); - } - - ~QsriWaitInfo(); + class QsriWaitFencePool { + public: + QsriWaitFencePool(VulkanDispatch*, VkDevice); + ~QsriWaitFencePool(); + VkFence getFenceFromPool(); + void returnFence(VkFence fence); + + private: + android::base::Lock mLock; + + VulkanDispatch* mVk; + VkDevice mDevice; + + // A pool of vkFences for waiting (optimization so we don't keep recreating them every + // time). + std::vector<VkFence> mAvailableFences; + std::unordered_set<VkFence> mUsedFences; }; - QsriWaitInfo qsriWaitInfo; + std::unique_ptr<QsriWaitFencePool> qsriWaitFencePool = nullptr; + std::unique_ptr<VkQsriTimeline> qsriTimeline = nullptr; }; VkResult prepareAndroidNativeBufferImage( diff --git a/stream-servers/vulkan/VkDecoderGlobalState.cpp b/stream-servers/vulkan/VkDecoderGlobalState.cpp index f067c7da..3050effd 100644 --- a/stream-servers/vulkan/VkDecoderGlobalState.cpp +++ b/stream-servers/vulkan/VkDecoderGlobalState.cpp @@ -4740,9 +4740,7 @@ public: return vk->vkGetFenceStatus(device, fence); } - VkResult waitQsri(VkImage boxed_image, uint64_t timeout) { - (void)timeout; // TODO - + VkResult registerQsriCallback(VkImage boxed_image, VkQsriTimeline::Callback callback) { AutoLock lock(mLock); VkImage image = unbox_VkImage(boxed_image); @@ -4774,20 +4772,10 @@ public: return VK_SUCCESS; } - AutoLock qsriLock(anbInfo->qsriWaitInfo.lock); - uint64_t targetPresentCount = ++anbInfo->qsriWaitInfo.requestedPresentCount; - - if (mLogging) { - fprintf(stderr, "%s:%p New target present count %llu\n", - __func__, anbInfo.get(), (unsigned long long)targetPresentCount); - } - - anbInfo->qsriWaitInfo.cv.wait(&anbInfo->qsriWaitInfo.lock, [anbInfo, targetPresentCount] { - return targetPresentCount <= anbInfo->qsriWaitInfo.presentCount; - }); + anbInfo->qsriTimeline->registerCallbackForNextPresentAndPoll(std::move(callback)); if (mLogging) { - fprintf(stderr, "%s:%p Done waiting\n", __func__, anbInfo.get()); + fprintf(stderr, "%s:%p Done registering\n", __func__, anbInfo.get()); } return VK_SUCCESS; } @@ -7926,8 +7914,9 @@ VkResult VkDecoderGlobalState::getFenceStatus(VkFence boxed_fence) { return mImpl->getFenceStatus(boxed_fence); } -VkResult VkDecoderGlobalState::waitQsri(VkImage image, uint64_t timeout) { - return mImpl->waitQsri(image, timeout); +VkResult VkDecoderGlobalState::registerQsriCallback(VkImage image, + VkQsriTimeline::Callback callback) { + return mImpl->registerQsriCallback(image, std::move(callback)); } void VkDecoderGlobalState::deviceMemoryTransform_tohost( diff --git a/stream-servers/vulkan/VkDecoderGlobalState.h b/stream-servers/vulkan/VkDecoderGlobalState.h index 48491b7b..a32d4aa4 100644 --- a/stream-servers/vulkan/VkDecoderGlobalState.h +++ b/stream-servers/vulkan/VkDecoderGlobalState.h @@ -13,13 +13,13 @@ // limitations under the License. #pragma once -#include "VulkanHandleMapping.h" -#include "VulkanDispatch.h" - #include <vulkan/vulkan.h> #include <memory> +#include "VkQsriTimeline.h" +#include "VulkanDispatch.h" +#include "VulkanHandleMapping.h" #include "cereal/common/goldfish_vk_private_defs.h" #include "cereal/common/goldfish_vk_transform.h" @@ -791,7 +791,7 @@ public: // presented so far, so it ends up incrementing a "target present count" // for this image, and then waiting for the image to get vkQSRI'ed at least // that many times. - VkResult waitQsri(VkImage boxed_image, uint64_t timeout); + VkResult registerQsriCallback(VkImage boxed_image, VkQsriTimeline::Callback callback); // Transformations void deviceMemoryTransform_tohost( diff --git a/stream-servers/vulkan/VkQsriTimeline.h b/stream-servers/vulkan/VkQsriTimeline.h new file mode 100644 index 00000000..19e8ea0e --- /dev/null +++ b/stream-servers/vulkan/VkQsriTimeline.h @@ -0,0 +1,65 @@ +#ifndef VK_QSRI_TIMELINE_H +#define VK_QSRI_TIMELINE_H + +#include <cstdint> +#include <functional> +#include <map> +#include <mutex> +#include <sstream> + +#include "host-common/logging.h" + +namespace goldfish_vk { +class VkQsriTimeline { + public: + using Callback = std::function<void()>; + + void signalNextPresentAndPoll() { + std::lock_guard<std::mutex> guard(mLock); + mPresentCount++; + pollLocked(); + } + + void registerCallbackForNextPresentAndPoll(Callback callback) { + std::lock_guard<std::mutex> guard(mLock); + uint64_t requestPresentCount = mRequestPresentCount; + mRequestPresentCount++; + mPendingCallbacks.emplace(requestPresentCount, std::move(callback)); + pollLocked(); + } + + VkQsriTimeline() : mPresentCount(0), mRequestPresentCount(0) {} + ~VkQsriTimeline() { + std::lock_guard<std::mutex> guard(mLock); + if (mPendingCallbacks.empty()) { + return; + } + std::stringstream ss; + ss << mPendingCallbacks.size() + << " pending QSRI callbacks found when destroying the timeline, waiting for: "; + for (auto& [requiredPresentCount, callback] : mPendingCallbacks) { + callback(); + ss << requiredPresentCount << ", "; + } + ss << "just call all of callbacks."; + ERR("%s", ss.str().c_str()); + } + + private: + std::map<uint64_t, Callback> mPendingCallbacks; + std::mutex mLock; + uint64_t mPresentCount; + uint64_t mRequestPresentCount; + + void pollLocked() { + auto firstPendingCallback = mPendingCallbacks.lower_bound(mPresentCount); + for (auto readyCallback = mPendingCallbacks.begin(); readyCallback != firstPendingCallback; + readyCallback++) { + readyCallback->second(); + } + mPendingCallbacks.erase(mPendingCallbacks.begin(), firstPendingCallback); + } +}; +} // namespace goldfish_vk + +#endif // VK_QSRI_TIMELINE_H
\ No newline at end of file diff --git a/stream-servers/vulkan/VkQsriTimeline_unittest.cpp b/stream-servers/vulkan/VkQsriTimeline_unittest.cpp new file mode 100644 index 00000000..d5d7af7c --- /dev/null +++ b/stream-servers/vulkan/VkQsriTimeline_unittest.cpp @@ -0,0 +1,42 @@ +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "VkQsriTimeline.h" + +namespace goldfish_vk { +namespace { +using ::testing::InSequence; +using ::testing::MockFunction; + +TEST(VkQsriTImelineTest, signalFirstRegisterCallbackLater) { + MockFunction<void()> mockCallback1, mockCallback2; + VkQsriTimeline qsriTimeline; + { + InSequence s; + EXPECT_CALL(mockCallback1, Call()).Times(1); + EXPECT_CALL(mockCallback2, Call()).Times(1); + } + qsriTimeline.signalNextPresentAndPoll(); + qsriTimeline.signalNextPresentAndPoll(); + qsriTimeline.registerCallbackForNextPresentAndPoll(mockCallback1.AsStdFunction()); + qsriTimeline.registerCallbackForNextPresentAndPoll(mockCallback2.AsStdFunction()); +} + +TEST(VkQsriTImelineTest, registerCallbackFirstSignalLater) { + MockFunction<void()> mockCallback1, mockCallback2, beforeSignal; + VkQsriTimeline qsriTimeline; + { + InSequence s; + EXPECT_CALL(beforeSignal, Call()).Times(1); + EXPECT_CALL(mockCallback1, Call()).Times(1); + EXPECT_CALL(mockCallback2, Call()).Times(1); + } + qsriTimeline.registerCallbackForNextPresentAndPoll(mockCallback1.AsStdFunction()); + qsriTimeline.registerCallbackForNextPresentAndPoll(mockCallback2.AsStdFunction()); + beforeSignal.Call(); + qsriTimeline.signalNextPresentAndPoll(); + qsriTimeline.signalNextPresentAndPoll(); +} + +} // namespace +} // namespace goldfish_vk
\ No newline at end of file |