Snap for 8300341 from ea15f40672340474611490d751a8af0913dbe699 to emu-31-release

Change-Id: Iacaf501b954e078de72b2eb4d0e866ab1ad4a13e
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2022-03-21 19:23:16 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2022-03-21 19:23:16 +0000
commit: a6bafe0649b982e0dd433e727630becf2775f116 (patch)
tree: fce235dfe4ce4260278b9eca913106bbb72cf3a5
parent: 829d193568fa069689fcb42535c4d12e50b1e621 (diff)
parent: ea15f40672340474611490d751a8af0913dbe699 (diff)
download: vulkan-cereal-a6bafe0649b982e0dd433e727630becf2775f116.tar.gz
13 files changed, 491 insertions, 608 deletions
diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt
index 978b7214..81a1bccb 100644
--- a/base/CMakeLists.txt
+++ b/base/CMakeLists.txt
@@ -44,22 +44,10 @@ else()
         ${gfxstream-base-posix-sources})
 endif()
 
-if (RECORDER_DELEGATE_LIB)
-    set(gfxstream-base-metrics-sources
-        MetricsRecorderDelegate.cpp)
-    set(gfxstream-base-metrics-link-libraries
-        recorder_delegate_lib)
-else()
-    set(gfxstream-base-metrics-sources
-        MetricsNoOp.cpp)
-    set(gfxstream-base-metrics-link-libraries)
-endif()
-
 add_library(
     gfxstream-base
     ${gfxstream-base-common-sources}
-    ${gfxstream-platform-sources}
-    ${gfxstream-base-metrics-sources})
+    ${gfxstream-platform-sources})
 
 if (WIN32)
     set(gfxstream-base-platform-deps "")
@@ -72,8 +60,7 @@ target_link_libraries(
     PRIVATE
     lz4
     perfetto-tracing-only
-    ${gfxstream-base-platform-deps}
-    ${gfxstream-base-metrics-link-libraries})
+    ${gfxstream-base-platform-deps})
 
 target_include_directories(
     gfxstream-base PUBLIC ${GFXSTREAM_REPO_ROOT})
diff --git a/base/ThreadPool.h b/base/ThreadPool.h
index 9e4aabca..c985c03a 100644
--- a/base/ThreadPool.h
+++ b/base/ThreadPool.h
@@ -14,17 +14,19 @@
 
 #pragma once
 
-#include "base/Compiler.h"
-#include "base/Optional.h"
-#include "base/System.h"
-#include "base/WorkerThread.h"
-
 #include <atomic>
+#include <cstdint>
 #include <functional>
 #include <memory>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
+#include "base/Compiler.h"
+#include "base/Optional.h"
+#include "base/System.h"
+#include "base/WorkerThread.h"
+
 //
 // ThreadPool<Item> - a simple collection of worker threads to process enqueued
 // items on multiple cores.
@@ -58,27 +60,51 @@
 namespace android {
 namespace base {
 
+using ThreadPoolWorkerId = uint32_t;
+
 template <class ItemT>
 class ThreadPool {
     DISALLOW_COPY_AND_ASSIGN(ThreadPool);
 
 public:
     using Item = ItemT;
-    using Worker = WorkerThread<Optional<Item>>;
-    using Processor = std::function<void(Item&&)>;
-
-    ThreadPool(int threads, Processor&& processor)
-        : mProcessor(std::move(processor)) {
+    using WorkerId = ThreadPoolWorkerId;
+    using Processor = std::function<void(Item&&, WorkerId)>;
+
+   private:
+    struct Command {
+        Item mItem;
+        WorkerId mWorkerId;
+
+        Command(Item&& item, WorkerId workerId) : mItem(std::move(item)), mWorkerId(workerId) {}
+        DISALLOW_COPY_AND_ASSIGN(Command);
+        Command(Command&&) = default;
+    };
+    using Worker = WorkerThread<Optional<Command>>;
+
+   public:
+    // Fn is the type of the processor, it can either have 2 parameters: 1 for the Item, 1 for the
+    // WorkerId, or have only 1 Item parameter.
+    template <class Fn, typename = std::enable_if_t<std::is_invocable_v<Fn, Item, WorkerId> ||
+                                                    std::is_invocable_v<Fn, Item>>>
+    ThreadPool(int threads, Fn&& processor) : mProcessor() {
+        if constexpr (std::is_invocable_v<Fn, Item, WorkerId>) {
+            mProcessor = std::move(processor);
+        } else if constexpr (std::is_invocable_v<Fn, Item>) {
+            using namespace std::placeholders;
+            mProcessor = std::bind(std::move(processor), _1);
+        }
         if (threads < 1) {
             threads = android::base::getCpuCoreCount();
         }
         mWorkers = std::vector<Optional<Worker>>(threads);
         for (auto& workerPtr : mWorkers) {
-            workerPtr.emplace([this](Optional<Item>&& item) {
-                if (!item) {
+            workerPtr.emplace([this](Optional<Command>&& commandOpt) {
+                if (!commandOpt) {
                     return Worker::Result::Stop;
                 }
-                mProcessor(std::move(item.value()));
+                Command command = std::move(commandOpt.value());
+                mProcessor(std::move(command.mItem), command.mWorkerId);
                 return Worker::Result::Continue;
             });
         }
@@ -123,47 +149,24 @@ public:
         for (;;) {
             int currentIndex =
                     mNextWorkerIndex.fetch_add(1, std::memory_order_relaxed);
-            auto& workerPtr = mWorkers[currentIndex % mWorkers.size()];
-            if (workerPtr) {
-                workerPtr->enqueue(std::move(item));
-                break;
-            }
-        }
-    }
-
-    void enqueueIndexed(const Item& item) {
-        for (;;) {
-            int currentIndex =
-                    mNextWorkerIndex.fetch_add(1, std::memory_order_relaxed);
             int workerIndex = currentIndex % mWorkers.size();
             auto& workerPtr = mWorkers[workerIndex];
             if (workerPtr) {
-                Item itemCopy = item;
-                itemCopy.setIndex(workerIndex);
-                workerPtr->enqueue(std::move(itemCopy));
+                Command command(std::forward<Item>(item), workerIndex);
+                workerPtr->enqueue(std::move(command));
                 break;
             }
         }
     }
 
-    // Runs the same Item for all workers.
-    void broadcast(const Item& item) {
-        for (auto workerOpt : mWorkers) {
-            if (!workerOpt) continue;
-            Item itemCopy = item;
-            workerOpt->enqueue(std::move(itemCopy));
-        }
-    }
-
-    // broadcast(), except the Item type must support a method to expose the
-    // worker id.
-    void broadcastIndexed(const Item& item) {
+    // The itemFactory will be called multiple times to generate one item for each worker thread.
+    template <class Fn, typename = std::enable_if_t<std::is_invocable_r_v<Item, Fn>>>
+    void broadcast(Fn&& itemFactory) {
         int i = 0;
         for (auto& workerOpt : mWorkers) {
             if (!workerOpt) continue;
-            Item itemCopy = item;
-            itemCopy.setIndex(i);
-            workerOpt->enqueue(std::move(itemCopy));
+            Command command(std::move(itemFactory()), i);
+            workerOpt->enqueue(std::move(command));
             ++i;
         }
     }
diff --git a/host-common/CMakeLists.txt b/host-common/CMakeLists.txt
index c3d62412..423d8fd0 100644
--- a/host-common/CMakeLists.txt
+++ b/host-common/CMakeLists.txt
@@ -10,16 +10,32 @@ else()
         opengl/NativeGpuInfo_linux.cpp)
 endif()
 
+if (RECORDER_DELEGATE_LIB)
+    set(gfxstream-base-metrics-sources
+        ../base/MetricsRecorderDelegate.cpp)
+    set(gfxstream-base-metrics-link-libraries
+        recorder_delegate_lib)
+else()
+    set(gfxstream-base-metrics-sources
+        ../base/MetricsNoOp.cpp)
+    set(gfxstream-base-metrics-link-libraries)
+endif()
+
 # TODO(gregschlom) move this to base
 add_library(logging-base
     STATIC
     logging.cpp
-    GfxstreamFatalError.cpp)
+    GfxstreamFatalError.cpp
+    ${gfxstream-base-metrics-sources})
 target_include_directories(
     logging-base
     PRIVATE
     ${GFXSTREAM_REPO_ROOT}
     ${GFXSTREAM_REPO_ROOT}/include)
+target_link_libraries(
+    logging-base
+    PRIVATE
+    ${gfxstream-base-metrics-link-libraries})
 
 add_library(
     gfxstream-host-common
diff --git a/stream-servers/CMakeLists.txt b/stream-servers/CMakeLists.txt
index 0bf771cb..d7c8cfd1 100644
--- a/stream-servers/CMakeLists.txt
+++ b/stream-servers/CMakeLists.txt
@@ -220,6 +220,7 @@ add_executable(
     tests/DisplayVk_unittest.cpp
     tests/VirtioGpuTimelines_unittest.cpp
     vulkan/vk_util_unittest.cpp
+    vulkan/VkQsriTimeline_unittest.cpp
 )
 target_link_libraries(
     Vulkan_unittests
diff --git a/stream-servers/FrameBuffer.cpp b/stream-servers/FrameBuffer.cpp
index f39b3e10..26d03e9a 100644
--- a/stream-servers/FrameBuffer.cpp
+++ b/stream-servers/FrameBuffer.cpp
@@ -998,15 +998,15 @@ WorkerProcessingResult FrameBuffer::postWorkerFunc(Post& post) {
                                       post.composeBuffer.size(),
                                       std::move(post.composeCallback));
             } else {
-                auto composeCallback =
-                    std::make_shared<Post::ComposeCallback>(
-                        [composeCallback = std::move(post.composeCallback)]
-                            (std::shared_future<void> waitForGpu) {
-                                SyncThread::get()->triggerGeneral(
-                                    [composeCallback = std::move(composeCallback), waitForGpu]{
-                                        (*composeCallback)(waitForGpu);
-                                    });
-                            });
+                auto composeCallback = std::make_shared<Post::ComposeCallback>(
+                    [composeCallback =
+                         std::move(post.composeCallback)](std::shared_future<void> waitForGpu) {
+                        SyncThread::get()->triggerGeneral(
+                            [composeCallback = std::move(composeCallback), waitForGpu] {
+                                (*composeCallback)(waitForGpu);
+                            },
+                            "Wait for host composition");
+                    });
                 m_postWorker->compose(
                     (ComposeDevice_v2*)post.composeBuffer.data(),
                     post.composeBuffer.size(), std::move(composeCallback));
diff --git a/stream-servers/SyncThread.cpp b/stream-servers/SyncThread.cpp
index 548fcfd8..fbad448f 100644
--- a/stream-servers/SyncThread.cpp
+++ b/stream-servers/SyncThread.cpp
@@ -98,13 +98,10 @@ static const uint64_t kDefaultTimeoutNsecs = 5ULL * 1000ULL * 1000ULL * 1000ULL;
 
 SyncThread::SyncThread(bool noGL)
     : android::base::Thread(android::base::ThreadFlags::MaskSignals, 512 * 1024),
-      mWorkerThreadPool(kNumWorkerThreads, [this](SyncThreadCmd&& cmd) { doSyncThreadCmd(&cmd); }),
-      mSignalPresentCompleteWorkerThreadPool(
-          kNumWorkerThreads, [this](SyncThreadCmd&& cmd) { doSyncThreadCmd(&cmd); }),
+      mWorkerThreadPool(kNumWorkerThreads, doSyncThreadCmd),
       mNoGL(noGL) {
     this->start();
     mWorkerThreadPool.start();
-    mSignalPresentCompleteWorkerThreadPool.start();
     if (!noGL) {
         initSyncEGLContext();
     }
@@ -116,106 +113,97 @@ SyncThread::~SyncThread() {
 
 void SyncThread::triggerWait(FenceSync* fenceSync,
                              uint64_t timeline) {
-    DPRINT("fenceSyncInfo=0x%llx timeline=0x%lx ...",
-            fenceSync, timeline);
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_WAIT;
-    to_send.fenceSync = fenceSync;
-    to_send.timeline = timeline;
-    DPRINT("opcode=%u", to_send.opCode);
-    sendAsync(to_send);
-    DPRINT("exit");
+    std::stringstream ss;
+    ss << "triggerWait fenceSyncInfo=0x" << std::hex << reinterpret_cast<uintptr_t>(fenceSync)
+       << " timeline=0x" << std::hex << timeline;
+    sendAsync(
+        [fenceSync, timeline, this](WorkerId) {
+            doSyncWait(fenceSync, [timeline] {
+                DPRINT("wait done (with fence), use goldfish sync timeline inc");
+                emugl::emugl_sync_timeline_inc(timeline, kTimelineInterval);
+            });
+        },
+        ss.str());
 }
 
 void SyncThread::triggerWaitVk(VkFence vkFence, uint64_t timeline) {
-    DPRINT("fenceSyncInfo=0x%llx timeline=0x%lx ...", fenceSync, timeline);
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_WAIT_VK;
-    to_send.vkFence = vkFence;
-    to_send.timeline = timeline;
-    DPRINT("opcode=%u", to_send.opCode);
-    sendAsync(to_send);
-    DPRINT("exit");
+    std::stringstream ss;
+    ss << "triggerWaitVk vkFence=0x" << std::hex << reinterpret_cast<uintptr_t>(vkFence)
+       << " timeline=0x" << std::hex << timeline;
+    sendAsync(
+        [vkFence, timeline](WorkerId) {
+            doSyncWaitVk(vkFence, [timeline] {
+                DPRINT("vk wait done, use goldfish sync timeline inc");
+                emugl::emugl_sync_timeline_inc(timeline, kTimelineInterval);
+            });
+        },
+        ss.str());
 }
 
 void SyncThread::triggerBlockedWaitNoTimeline(FenceSync* fenceSync) {
-    DPRINT("fenceSyncInfo=0x%llx ...", fenceSync);
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_BLOCKED_WAIT_NO_TIMELINE;
-    to_send.fenceSync = fenceSync;
-    DPRINT("opcode=%u", to_send.opCode);
-    sendAndWaitForResult(to_send);
-    DPRINT("exit");
+    std::stringstream ss;
+    ss << "triggerBlockedWaitNoTimeline fenceSyncInfo=0x" << std::hex
+       << reinterpret_cast<uintptr_t>(fenceSync);
+    sendAndWaitForResult(
+        [fenceSync, this](WorkerId) {
+            doSyncWait(fenceSync, std::function<void()>());
+            return 0;
+        },
+        ss.str());
 }
 
 void SyncThread::triggerWaitWithCompletionCallback(FenceSync* fenceSync, FenceCompletionCallback cb) {
-    DPRINT("fenceSyncInfo=0x%llx ...", fenceSync);
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_WAIT;
-    to_send.fenceSync = fenceSync;
-    to_send.useFenceCompletionCallback = true;
-    to_send.fenceCompletionCallback = cb;
-    DPRINT("opcode=%u", to_send.opCode);
-    sendAsync(to_send);
-    DPRINT("exit");
+    std::stringstream ss;
+    ss << "triggerWaitWithCompletionCallback fenceSyncInfo=0x" << std::hex
+       << reinterpret_cast<uintptr_t>(fenceSync);
+    sendAsync(
+        [fenceSync, cb = std::move(cb), this](WorkerId) { doSyncWait(fenceSync, std::move(cb)); },
+        ss.str());
 }
 
 
 void SyncThread::triggerWaitVkWithCompletionCallback(VkFence vkFence, FenceCompletionCallback cb) {
-    DPRINT("fenceSyncInfo=0x%llx ...", fenceSync);
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_WAIT_VK;
-    to_send.vkFence = vkFence;
-    to_send.useFenceCompletionCallback = true;
-    to_send.fenceCompletionCallback = cb;
-    DPRINT("opcode=%u", to_send.opCode);
-    sendAsync(to_send);
-    DPRINT("exit");
+    std::stringstream ss;
+    ss << "triggerWaitVkWithCompletionCallback vkFence=0x" << std::hex
+       << reinterpret_cast<uintptr_t>(vkFence);
+    sendAsync([vkFence, cb = std::move(cb)](WorkerId) { doSyncWaitVk(vkFence, std::move(cb)); },
+              ss.str());
 }
 
 void SyncThread::triggerWaitVkQsriWithCompletionCallback(VkImage vkImage, FenceCompletionCallback cb) {
-    DPRINT("fenceSyncInfo=0x%llx ...", fenceSync);
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_WAIT_VK_QSRI;
-    to_send.vkImage = vkImage;
-    to_send.useFenceCompletionCallback = true;
-    to_send.fenceCompletionCallback = cb;
-    DPRINT("opcode=%u", to_send.opCode);
-    sendAsync(to_send);
-    DPRINT("exit");
+    std::stringstream ss;
+    ss << "triggerWaitVkQsriWithCompletionCallback vkImage=0x"
+       << reinterpret_cast<uintptr_t>(vkImage);
+    sendAsync(
+        [vkImage, cb = std::move(cb)](WorkerId) {
+            auto decoder = goldfish_vk::VkDecoderGlobalState::get();
+            decoder->registerQsriCallback(vkImage, std::move(cb));
+        },
+        ss.str());
 }
 
-void SyncThread::triggerWaitVkQsriBlockedNoTimeline(VkImage vkImage) {
-    DPRINT("fenceSyncInfo=0x%llx ...", fenceSync);
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_WAIT_VK_QSRI;
-    to_send.vkImage = vkImage;
-    DPRINT("opcode=%u", to_send.opCode);
-    sendAndWaitForResult(to_send);
-    DPRINT("exit");
-}
-
-void SyncThread::triggerGeneral(FenceCompletionCallback cb) {
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_GENERAL;
-    to_send.useFenceCompletionCallback = true;
-    to_send.fenceCompletionCallback = cb;
-    sendAsync(to_send);
-}
-
-void SyncThread::triggerSignalVkPresentComplete(FenceCompletionCallback cb) {
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_GENERAL;
-    to_send.useFenceCompletionCallback = true;
-    to_send.fenceCompletionCallback = cb;
-    mSignalPresentCompleteWorkerThreadPool.enqueue(std::move(to_send));
+void SyncThread::triggerGeneral(FenceCompletionCallback cb, std::string description) {
+    std::stringstream ss;
+    ss << "triggerGeneral: " << description;
+    sendAsync(std::bind(std::move(cb)), ss.str());
 }
 
 void SyncThread::cleanup() {
-    DPRINT("enter");
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_EXIT;
-    sendAndWaitForResult(to_send);
+    sendAndWaitForResult(
+        [this](WorkerId workerId) {
+            if (!mNoGL) {
+                const EGLDispatch* egl = emugl::LazyLoadedEGLDispatch::get();
+
+                egl->eglMakeCurrent(mDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
+
+                egl->eglDestroyContext(mDisplay, mContext[workerId]);
+                egl->eglDestroySurface(mDisplay, mSurface[workerId]);
+                mContext[workerId] = EGL_NO_CONTEXT;
+                mSurface[workerId] = EGL_NO_SURFACE;
+            }
+            return 0;
+        },
+        "cleanup");
     DPRINT("signal");
     mLock.lock();
     mExiting = true;
@@ -230,15 +218,6 @@ void SyncThread::cleanup() {
 
 // Private methods below////////////////////////////////////////////////////////
 
-void SyncThread::initSyncEGLContext() {
-    DPRINT("enter");
-    SyncThreadCmd to_send;
-    to_send.opCode = SYNC_THREAD_EGL_INIT;
-    mWorkerThreadPool.broadcastIndexed(to_send);
-    mWorkerThreadPool.waitAllItems();
-    DPRINT("exit");
-}
-
 intptr_t SyncThread::main() {
     DPRINT("in sync thread");
     mLock.lock();
@@ -246,96 +225,96 @@ intptr_t SyncThread::main() {
 
     mWorkerThreadPool.done();
     mWorkerThreadPool.join();
-    mSignalPresentCompleteWorkerThreadPool.done();
-    mSignalPresentCompleteWorkerThreadPool.join();
     DPRINT("exited sync thread");
     return 0;
 }
 
-int SyncThread::sendAndWaitForResult(SyncThreadCmd& cmd) {
-    DPRINT("send with opcode=%d", cmd.opCode);
-    android::base::Lock lock;
-    android::base::ConditionVariable cond;
-    android::base::Optional<int> result = android::base::kNullopt;
-    cmd.lock = &lock;
-    cmd.cond = &cond;
-    cmd.result = &result;
-
-    lock.lock();
-    mWorkerThreadPool.enqueue(std::move(cmd));
-    cond.wait(&lock, [&result] { return result.hasValue(); });
-
-    DPRINT("result=%d", *result);
-    int final_result = *result;
-
-    // Clear these to prevent dangling pointers after we return.
-    cmd.lock = nullptr;
-    cmd.cond = nullptr;
-    cmd.result = nullptr;
-    return final_result;
-}
-
-void SyncThread::sendAsync(SyncThreadCmd& cmd) {
-    DPRINT("send with opcode=%u fenceSyncInfo=0x%llx",
-           cmd.opCode, cmd.fenceSync);
-    mWorkerThreadPool.enqueue(std::move(cmd));
-}
-
-void SyncThread::doSyncEGLContextInit(SyncThreadCmd* cmd) {
-    DPRINT("for worker id: %d", cmd->workerId);
-    // We shouldn't initialize EGL context, when SyncThread is initialized
-    // without GL enabled.
-    SYNC_THREAD_CHECK(!mNoGL);
-
-    const EGLDispatch* egl = emugl::LazyLoadedEGLDispatch::get();
-
-    mDisplay = egl->eglGetDisplay(EGL_DEFAULT_DISPLAY);
-    int eglMaj, eglMin;
-    egl->eglInitialize(mDisplay, &eglMaj , &eglMin);
-
-    const EGLint configAttribs[] = {
-        EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
-        EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
-        EGL_RED_SIZE, 8,
-        EGL_GREEN_SIZE, 8,
-        EGL_BLUE_SIZE, 8,
-        EGL_NONE,
+int SyncThread::sendAndWaitForResult(std::function<int(WorkerId)> job, std::string description) {
+    DPRINT("sendAndWaitForResult task(%s)", description.c_str());
+    std::packaged_task<int(WorkerId)> task(std::move(job));
+    std::future<int> resFuture = task.get_future();
+    Command command = {
+        .mTask = std::move(task),
+        .mDescription = std::move(description),
     };
 
-    EGLint nConfigs;
-    EGLConfig config;
-
-    egl->eglChooseConfig(mDisplay, configAttribs, &config, 1, &nConfigs);
-
-    const EGLint pbufferAttribs[] = {
-        EGL_WIDTH, 1,
-        EGL_HEIGHT, 1,
-        EGL_NONE,
-    };
-
-    mSurface[cmd->workerId] =
-        egl->eglCreatePbufferSurface(mDisplay, config, pbufferAttribs);
+    mWorkerThreadPool.enqueue(std::move(command));
+    auto res = resFuture.get();
+    DPRINT("exit");
+    return res;
+}
 
-    const EGLint contextAttribs[] = { EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE };
-    mContext[cmd->workerId] = egl->eglCreateContext(mDisplay, config, EGL_NO_CONTEXT, contextAttribs);
+void SyncThread::sendAsync(std::function<void(WorkerId)> job, std::string description) {
+    DPRINT("send task(%s)", description.c_str());
+    mWorkerThreadPool.enqueue(Command{
+        .mTask =
+            std::packaged_task<int(WorkerId)>([job = std::move(job)](WorkerId workerId) mutable {
+                job(workerId);
+                return 0;
+            }),
+        .mDescription = std::move(description),
+    });
+    DPRINT("exit");
+}
 
-    egl->eglMakeCurrent(mDisplay, mSurface[cmd->workerId], mSurface[cmd->workerId], mContext[cmd->workerId]);
+void SyncThread::initSyncEGLContext() {
+    mWorkerThreadPool.broadcast([this] {
+        return Command{
+            .mTask = std::packaged_task<int(WorkerId)>([this](WorkerId workerId) {
+                DPRINT("for worker id: %d", workerId);
+                // We shouldn't initialize EGL context, when SyncThread is initialized
+                // without GL enabled.
+                SYNC_THREAD_CHECK(!mNoGL);
+
+                const EGLDispatch* egl = emugl::LazyLoadedEGLDispatch::get();
+
+                mDisplay = egl->eglGetDisplay(EGL_DEFAULT_DISPLAY);
+                int eglMaj, eglMin;
+                egl->eglInitialize(mDisplay, &eglMaj, &eglMin);
+
+                const EGLint configAttribs[] = {
+                    EGL_SURFACE_TYPE,
+                    EGL_PBUFFER_BIT,
+                    EGL_RENDERABLE_TYPE,
+                    EGL_OPENGL_ES2_BIT,
+                    EGL_RED_SIZE,
+                    8,
+                    EGL_GREEN_SIZE,
+                    8,
+                    EGL_BLUE_SIZE,
+                    8,
+                    EGL_NONE,
+                };
+
+                EGLint nConfigs;
+                EGLConfig config;
+
+                egl->eglChooseConfig(mDisplay, configAttribs, &config, 1, &nConfigs);
+
+                const EGLint pbufferAttribs[] = {
+                    EGL_WIDTH, 1, EGL_HEIGHT, 1, EGL_NONE,
+                };
+
+                mSurface[workerId] = egl->eglCreatePbufferSurface(mDisplay, config, pbufferAttribs);
+
+                const EGLint contextAttribs[] = {EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE};
+                mContext[workerId] =
+                    egl->eglCreateContext(mDisplay, config, EGL_NO_CONTEXT, contextAttribs);
+
+                egl->eglMakeCurrent(mDisplay, mSurface[workerId], mSurface[workerId],
+                                    mContext[workerId]);
+                return 0;
+            }),
+            .mDescription = "init sync EGL context",
+        };
+    });
 }
 
-void SyncThread::doSyncWait(SyncThreadCmd* cmd) {
+void SyncThread::doSyncWait(FenceSync* fenceSync, std::function<void()> onComplete) {
     DPRINT("enter");
 
-    FenceSync* fenceSync =
-        FenceSync::getFromHandle((uint64_t)(uintptr_t)cmd->fenceSync);
-
-    if (!fenceSync) {
-        if (cmd->useFenceCompletionCallback) {
-            DPRINT("wait done (null fence), use completion callback");
-            cmd->fenceCompletionCallback();
-        } else {
-            DPRINT("wait done (null fence), use sync timeline inc");
-            emugl::emugl_sync_timeline_inc(cmd->timeline, kTimelineInterval);
-        }
+    if (!FenceSync::getFromHandle((uint64_t)(uintptr_t)fenceSync) && onComplete) {
+        onComplete();
         return;
     }
     // We shouldn't use FenceSync to wait, when SyncThread is initialized
@@ -344,16 +323,18 @@ void SyncThread::doSyncWait(SyncThreadCmd* cmd) {
 
     EGLint wait_result = 0x0;
 
-    DPRINT("wait on sync obj: %p", cmd->fenceSync);
-    wait_result = cmd->fenceSync->wait(kDefaultTimeoutNsecs);
+    DPRINT("wait on sync obj: %p", fenceSync);
+    wait_result = fenceSync->wait(kDefaultTimeoutNsecs);
 
     DPRINT("done waiting, with wait result=0x%x. "
            "increment timeline (and signal fence)",
            wait_result);
 
     if (wait_result != EGL_CONDITION_SATISFIED_KHR) {
-        DPRINT("error: eglClientWaitSync abnormal exit 0x%x. sync handle 0x%llx\n",
-               wait_result, (unsigned long long)cmd->fenceSync);
+        EGLint error = s_egl.eglGetError();
+        DPRINT("error: eglClientWaitSync abnormal exit 0x%x. sync handle 0x%llx. egl error = %#x\n",
+               wait_result, (unsigned long long)fenceSync, error);
+        (void)error;
     }
 
     DPRINT("issue timeline increment");
@@ -382,12 +363,8 @@ void SyncThread::doSyncWait(SyncThreadCmd* cmd) {
     //   incrementing the timeline means that the app's rendering freezes.
     //   So, despite the faulty GPU driver, not incrementing is too heavyweight a response.
 
-    if (cmd->useFenceCompletionCallback) {
-        DPRINT("wait done (with fence), use completion callback");
-        cmd->fenceCompletionCallback();
-    } else {
-        DPRINT("wait done (with fence), use goldfish sync timeline inc");
-        emugl::emugl_sync_timeline_inc(cmd->timeline, kTimelineInterval);
+    if (onComplete) {
+        onComplete();
     }
     FenceSync::incrementTimelineAndDeleteOldFences();
 
@@ -396,15 +373,15 @@ void SyncThread::doSyncWait(SyncThreadCmd* cmd) {
     DPRINT("exit");
 }
 
-int SyncThread::doSyncWaitVk(SyncThreadCmd* cmd) {
+int SyncThread::doSyncWaitVk(VkFence vkFence, std::function<void()> onComplete) {
     DPRINT("enter");
 
     auto decoder = goldfish_vk::VkDecoderGlobalState::get();
-    auto result = decoder->waitForFence(cmd->vkFence, kDefaultTimeoutNsecs);
+    auto result = decoder->waitForFence(vkFence, kDefaultTimeoutNsecs);
     if (result == VK_TIMEOUT) {
-        DPRINT("SYNC_WAIT_VK timeout: vkFence=%p", cmd->vkFence);
+        DPRINT("SYNC_WAIT_VK timeout: vkFence=%p", vkFence);
     } else if (result != VK_SUCCESS) {
-        DPRINT("SYNC_WAIT_VK error: %d vkFence=%p", result, cmd->vkFence);
+        DPRINT("SYNC_WAIT_VK error: %d vkFence=%p", result, vkFence);
     }
 
     DPRINT("issue timeline increment");
@@ -412,12 +389,8 @@ int SyncThread::doSyncWaitVk(SyncThreadCmd* cmd) {
     // We always unconditionally increment timeline at this point, even
     // if the call to vkWaitForFences returned abnormally.
     // See comments in |doSyncWait| about the rationale.
-    if (cmd->useFenceCompletionCallback) {
-        DPRINT("vk wait done, use completion callback");
-        cmd->fenceCompletionCallback();
-    } else {
-        DPRINT("vk wait done, use goldfish sync timeline inc");
-        emugl::emugl_sync_timeline_inc(cmd->timeline, kTimelineInterval);
+    if (onComplete) {
+        onComplete();
     }
 
     DPRINT("done timeline increment");
@@ -426,148 +399,9 @@ int SyncThread::doSyncWaitVk(SyncThreadCmd* cmd) {
     return result;
 }
 
-int SyncThread::doSyncWaitVkQsri(SyncThreadCmd* cmd) {
-    DPRINT("enter");
-
-    auto decoder = goldfish_vk::VkDecoderGlobalState::get();
-    DPRINT("doSyncWaitVkQsri for image %p", cmd->vkImage);
-    auto result = decoder->waitQsri(cmd->vkImage, kDefaultTimeoutNsecs);
-    DPRINT("doSyncWaitVkQsri for image %p (done, do signal/callback)", cmd->vkImage);
-    if (result == VK_TIMEOUT) {
-        fprintf(stderr, "SyncThread::%s: SYNC_WAIT_VK_QSRI timeout: vkImage=%p\n",
-                __func__, cmd->vkImage);
-    } else if (result != VK_SUCCESS) {
-        fprintf(stderr, "SyncThread::%s: SYNC_WAIT_VK_QSRI error: %d vkImage=%p\n",
-                __func__, result, cmd->vkImage);
-    }
-
-    DPRINT("issue timeline increment");
-
-    // We always unconditionally increment timeline at this point, even
-    // if the call to vkWaitForFences returned abnormally.
-    // See comments in |doSyncWait| about the rationale.
-    if (cmd->useFenceCompletionCallback) {
-        DPRINT("wait done, use completion callback");
-        cmd->fenceCompletionCallback();
-    } else {
-        DPRINT("wait done, use goldfish sync timeline inc");
-        emugl::emugl_sync_timeline_inc(cmd->timeline, kTimelineInterval);
-    }
-
-    DPRINT("done timeline increment");
-
-    DPRINT("exit");
-    return result;
-}
-
-int SyncThread::doSyncGeneral(SyncThreadCmd* cmd) {
-    DPRINT("enter");
-    if (cmd->useFenceCompletionCallback) {
-        DPRINT("wait done, use completion callback");
-        cmd->fenceCompletionCallback();
-    } else {
-        DPRINT("warning, completion callback not provided in general op!");
-    }
-
-    return 0;
-}
-
-void SyncThread::doSyncBlockedWaitNoTimeline(SyncThreadCmd* cmd) {
-    DPRINT("enter");
-
-    FenceSync* fenceSync =
-        FenceSync::getFromHandle((uint64_t)(uintptr_t)cmd->fenceSync);
-
-    if (!fenceSync) {
-        return;
-    }
-
-    // We shouldn't use FenceSync to wait, when SyncThread is initialized
-    // without GL enabled, because FenceSync uses EGL/GLES.
-    SYNC_THREAD_CHECK(!mNoGL);
-
-    EGLint wait_result = 0x0;
-
-    DPRINT("wait on sync obj: %p", cmd->fenceSync);
-    wait_result = cmd->fenceSync->wait(kDefaultTimeoutNsecs);
-
-    DPRINT("done waiting, with wait result=0x%x. "
-           "increment timeline (and signal fence)",
-           wait_result);
-
-    if (wait_result != EGL_CONDITION_SATISFIED_KHR) {
-        EGLint error = s_egl.eglGetError();
-        fprintf(stderr, "error: eglClientWaitSync abnormal exit 0x%x %p %#x\n",
-                wait_result, cmd->fenceSync, error);
-    }
-
-    FenceSync::incrementTimelineAndDeleteOldFences();
-}
-
-void SyncThread::doExit(SyncThreadCmd* cmd) {
-    if (!mNoGL) {
-        const EGLDispatch* egl = emugl::LazyLoadedEGLDispatch::get();
-
-        egl->eglMakeCurrent(mDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE,
-                            EGL_NO_CONTEXT);
-
-        egl->eglDestroyContext(mDisplay, mContext[cmd->workerId]);
-        egl->eglDestroySurface(mDisplay, mSurface[cmd->workerId]);
-        mContext[cmd->workerId] = EGL_NO_CONTEXT;
-        mSurface[cmd->workerId] = EGL_NO_SURFACE;
-    }
-}
-
-int SyncThread::doSyncThreadCmd(SyncThreadCmd* cmd) {
-#if DEBUG
-    thread_local static auto threadId = android::base::getCurrentThreadId();
-    thread_local static size_t numCommands = 0U;
-    DPRINT("threadId = %lu numCommands = %lu cmd = %p", threadId, ++numCommands,
-           cmd);
-#endif  // DEBUG
-
-    int result = 0;
-    switch (cmd->opCode) {
-    case SYNC_THREAD_EGL_INIT:
-        DPRINT("exec SYNC_THREAD_EGL_INIT");
-        doSyncEGLContextInit(cmd);
-        break;
-    case SYNC_THREAD_WAIT:
-        DPRINT("exec SYNC_THREAD_WAIT");
-        doSyncWait(cmd);
-        break;
-    case SYNC_THREAD_WAIT_VK:
-        DPRINT("exec SYNC_THREAD_WAIT_VK");
-        result = doSyncWaitVk(cmd);
-        break;
-    case SYNC_THREAD_WAIT_VK_QSRI:
-        DPRINT("exec SYNC_THREAD_WAIT_VK_QSRI");
-        result = doSyncWaitVkQsri(cmd);
-        break;
-    case SYNC_THREAD_GENERAL:
-        DPRINT("exec SYNC_THREAD_GENERAL");
-        result = doSyncGeneral(cmd);
-        break;
-    case SYNC_THREAD_EXIT:
-        DPRINT("exec SYNC_THREAD_EXIT");
-        doExit(cmd);
-        break;
-    case SYNC_THREAD_BLOCKED_WAIT_NO_TIMELINE:
-        DPRINT("exec SYNC_THREAD_BLOCKED_WAIT_NO_TIMELINE");
-        doSyncBlockedWaitNoTimeline(cmd);
-        break;
-    }
-
-    bool need_reply = cmd->lock != nullptr && cmd->cond != nullptr;
-    if (need_reply) {
-        cmd->lock->lock();
-        *cmd->result = android::base::makeOptional(result);
-        cmd->cond->signalAndUnlock(cmd->lock);
-    }
-    return result;
-}
-
 /* static */
+void SyncThread::doSyncThreadCmd(Command&& command, WorkerId workerId) { command.mTask(workerId); }
+
 SyncThread* SyncThread::get() {
     auto res = sGlobalSyncThread()->syncThreadPtr();
     SYNC_THREAD_CHECK(res);
diff --git a/stream-servers/SyncThread.h b/stream-servers/SyncThread.h
index 24f6f00e..0adbb0f3 100644
--- a/stream-servers/SyncThread.h
+++ b/stream-servers/SyncThread.h
@@ -16,70 +16,29 @@
 
 #pragma once
 
-#include "FenceSync.h"
-
 #include <EGL/egl.h>
 #include <GLES2/gl2.h>
 #include <GLES2/gl2ext.h>
 
-#include "base/Optional.h"
+#include <functional>
+#include <future>
+#include <string>
+#include <type_traits>
+
+#include "FenceSync.h"
 #include "base/ConditionVariable.h"
 #include "base/Lock.h"
+#include "base/MessageChannel.h"
+#include "base/Optional.h"
 #include "base/Thread.h"
 #include "base/ThreadPool.h"
-#include "base/MessageChannel.h"
-#include "vulkan/VkDecoderGlobalState.h"
 #include "virtio_gpu_ops.h"
+#include "vulkan/VkDecoderGlobalState.h"
 
 // SyncThread///////////////////////////////////////////////////////////////////
 // The purpose of SyncThread is to track sync device timelines and give out +
 // signal FD's that correspond to the completion of host-side GL fence commands.
 
-// We communicate with the sync thread in 3 ways:
-enum SyncThreadOpCode {
-    // Nonblocking command to initialize sync thread's EGL contexts for sync
-    // operations
-    SYNC_THREAD_EGL_INIT = 0,
-    // Nonblocking command to wait on a given FenceSync object
-    // and timeline handle.
-    // A fence FD object in the guest is signaled.
-    SYNC_THREAD_WAIT = 1,
-    // Blocking command to clean up and exit the sync thread.
-    SYNC_THREAD_EXIT = 2,
-    // Blocking command to wait on a given FenceSync object.
-    // No timeline handling is done.
-    SYNC_THREAD_BLOCKED_WAIT_NO_TIMELINE = 3,
-    // Nonblocking command to wait on a given VkFence
-    // and timeline handle.
-    // A fence FD object / Zircon eventpair in the guest is signaled.
-    SYNC_THREAD_WAIT_VK = 4,
-    // Command to wait on the presentation the given VkImage.
-    SYNC_THREAD_WAIT_VK_QSRI = 5,
-    // Command that consists only of a callback.
-    SYNC_THREAD_GENERAL = 6,
-};
-
-struct SyncThreadCmd {
-    // For use with initialization in multiple thread pools.
-    int workerId = 0;
-    // For use with ThreadPool::broadcastIndexed
-    void setIndex(int id) { workerId = id; }
-
-    SyncThreadOpCode opCode = SYNC_THREAD_EGL_INIT;
-    union {
-        FenceSync* fenceSync = nullptr;
-        VkFence vkFence;
-        VkImage vkImage;
-    };
-    uint64_t timeline = 0;
-
-    android::base::Lock* lock = nullptr;
-    android::base::ConditionVariable* cond = nullptr;
-    android::base::Optional<int>* result = nullptr;
-
-    bool useFenceCompletionCallback = false;
-    FenceCompletionCallback fenceCompletionCallback;;
-};
 
 struct RenderThreadInfo;
 class SyncThread : public android::base::Thread {
@@ -117,12 +76,7 @@ public:
     void triggerWaitWithCompletionCallback(FenceSync* fenceSync, FenceCompletionCallback);
     void triggerWaitVkWithCompletionCallback(VkFence fenceHandle, FenceCompletionCallback);
     void triggerWaitVkQsriWithCompletionCallback(VkImage image, FenceCompletionCallback);
-    void triggerWaitVkQsriBlockedNoTimeline(VkImage image);
-    // Similar to triggerGeneral, but will dispatch the task to the dedicated
-    // signalPresentCompleteWorkerThreadPool.
-    void triggerSignalVkPresentComplete(FenceCompletionCallback);
-
-    void triggerGeneral(FenceCompletionCallback);
+    void triggerGeneral(FenceCompletionCallback, std::string description);
 
     // |cleanup|: for use with destructors and other cleanup functions.
     // it destroys the sync context and exits the sync thread.
@@ -141,6 +95,13 @@ public:
     static void destroy();
 
    private:
+    using WorkerId = android::base::ThreadPoolWorkerId;
+    struct Command {
+        std::packaged_task<int(WorkerId)> mTask;
+        std::string mDescription;
+    };
+    using ThreadPool = android::base::ThreadPool<Command>;
+
     // |initSyncContext| creates an EGL context expressly for calling
     // eglClientWaitSyncKHR in the processing caused by |triggerWait|.
     // This is used by the constructor only. It is non-blocking.
@@ -151,25 +112,19 @@ public:
     // It keeps the workers runner until |mExiting| is set.
     virtual intptr_t main() override final;
 
-    // These two functions are used to communicate with the sync thread
-    // from another thread:
-    // - |sendAndWaitForResult| issues |cmd| to the sync thread,
-    //   and blocks until it receives the result of the command.
-    // - |sendAsync| issues |cmd| to the sync thread and does not
-    //   wait for the result, returning immediately after.
-    int sendAndWaitForResult(SyncThreadCmd& cmd);
-    void sendAsync(SyncThreadCmd& cmd);
-
-    // |doSyncThreadCmd| and related functions below
-    // execute the actual commands. These run on the sync thread.
-    int doSyncThreadCmd(SyncThreadCmd* cmd);
-    void doSyncEGLContextInit(SyncThreadCmd* cmd);
-    void doSyncWait(SyncThreadCmd* cmd);
-    int doSyncWaitVk(SyncThreadCmd* cmd);
-    int doSyncWaitVkQsri(SyncThreadCmd* cmd);
-    int doSyncGeneral(SyncThreadCmd* cmd);
-    void doSyncBlockedWaitNoTimeline(SyncThreadCmd* cmd);
-    void doExit(SyncThreadCmd* cmd);
+    // These two functions are used to communicate with the sync thread from another thread:
+    // - |sendAndWaitForResult| issues |job| to the sync thread, and blocks until it receives the
+    // result of the job.
+    // - |sendAsync| issues |job| to the sync thread and does not wait for the result, returning
+    // immediately after.
+    int sendAndWaitForResult(std::function<int(WorkerId)> job, std::string description);
+    void sendAsync(std::function<void(WorkerId)> job, std::string description);
+
+    // |doSyncThreadCmd| execute the actual task. These run on the sync thread.
+    static void doSyncThreadCmd(Command&& command, ThreadPool::WorkerId);
+
+    void doSyncWait(FenceSync* fenceSync, std::function<void()> onComplete);
+    static int doSyncWaitVk(VkFence, std::function<void()> onComplete);
 
     // EGL objects / object handles specific to
     // a sync thread.
@@ -182,8 +137,7 @@ public:
     bool mExiting = false;
     android::base::Lock mLock;
     android::base::ConditionVariable mCv;
-    android::base::ThreadPool<SyncThreadCmd> mWorkerThreadPool;
-    android::base::ThreadPool<SyncThreadCmd> mSignalPresentCompleteWorkerThreadPool;
+    ThreadPool mWorkerThreadPool;
     bool mNoGL;
 };
 
diff --git a/stream-servers/vulkan/VkAndroidNativeBuffer.cpp b/stream-servers/vulkan/VkAndroidNativeBuffer.cpp
index 0512caef..96b33aae 100644
--- a/stream-servers/vulkan/VkAndroidNativeBuffer.cpp
+++ b/stream-servers/vulkan/VkAndroidNativeBuffer.cpp
@@ -12,17 +12,19 @@
 // limitations under the License.
 #include "VkAndroidNativeBuffer.h"
 
-#include "cereal/common/goldfish_vk_private_defs.h"
-#include "cereal/common/goldfish_vk_extension_structs.h"
+#include <string.h>
+
+#include <future>
 
-#include "host-common/GfxstreamFatalError.h"
-#include "stream-servers/FrameBuffer.h"
 #include "GrallocDefs.h"
+#include "SyncThread.h"
 #include "VkCommonOperations.h"
 #include "VulkanDispatch.h"
-#include "SyncThread.h"
-
-#include <string.h>
+#include "cereal/common/goldfish_vk_extension_structs.h"
+#include "cereal/common/goldfish_vk_private_defs.h"
+#include "host-common/GfxstreamFatalError.h"
+#include "stream-servers/FrameBuffer.h"
+#include "vulkan/vk_enum_string_helper.h"
 
 #define VK_ANB_ERR(fmt,...) fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, ##__VA_ARGS__);
 
@@ -43,40 +45,58 @@ using emugl::FatalError;
 
 namespace goldfish_vk {
 
-VkFence AndroidNativeBufferInfo::QsriWaitInfo::getFenceFromPoolLocked() {
-    VK_ANB_DEBUG("enter");
-
-    if (!vk) return VK_NULL_HANDLE;
+AndroidNativeBufferInfo::QsriWaitFencePool::QsriWaitFencePool(VulkanDispatch* vk, VkDevice device)
+    : mVk(vk), mDevice(device) {}
 
-    if (fencePool.empty()) {
-        VkFence fence;
+VkFence AndroidNativeBufferInfo::QsriWaitFencePool::getFenceFromPool() {
+    VK_ANB_DEBUG("enter");
+    AutoLock lock(mLock);
+    VkFence fence = VK_NULL_HANDLE;
+    if (mAvailableFences.empty()) {
         VkFenceCreateInfo fenceCreateInfo = {
             VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, 0, 0,
         };
-        vk->vkCreateFence(device, &fenceCreateInfo, nullptr, &fence);
+        mVk->vkCreateFence(mDevice, &fenceCreateInfo, nullptr, &fence);
         VK_ANB_DEBUG("no fences in pool, created %p", fence);
-        return fence;
     } else {
-        VkFence res = fencePool.back();
-        fencePool.pop_back();
-        vk->vkResetFences(device, 1, &res);
-        VK_ANB_DEBUG("existing fence in pool: %p. also reset the fence", res);
-        return res;
+        fence = mAvailableFences.back();
+        mAvailableFences.pop_back();
+        VkResult res = mVk->vkResetFences(mDevice, 1, &fence);
+        if (res != VK_SUCCESS) {
+            GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER))
+                << "Fail to reset Qsri VkFence: " << res << "(" << string_VkResult(res) << ").";
+        }
+        VK_ANB_DEBUG("existing fence in pool: %p. also reset the fence", fence);
     }
+    mUsedFences.emplace(fence);
+    VK_ANB_DEBUG("exit");
+    return fence;
 }
 
-AndroidNativeBufferInfo::QsriWaitInfo::~QsriWaitInfo() {
+AndroidNativeBufferInfo::QsriWaitFencePool::~QsriWaitFencePool() {
     VK_ANB_DEBUG("enter");
-    if (!vk) return;
-    if (!device) return;
     // Nothing in the fence pool is unsignaled
-    for (auto fence : fencePool) {
+    if (!mUsedFences.empty()) {
+        VK_ANB_ERR("%zu VkFences are still being used when destroying the Qsri fence pool.",
+                   mUsedFences.size());
+    }
+    for (auto fence : mAvailableFences) {
         VK_ANB_DEBUG("destroy fence %p", fence);
-        vk->vkDestroyFence(device, fence, nullptr);
+        mVk->vkDestroyFence(mDevice, fence, nullptr);
     }
     VK_ANB_DEBUG("exit");
 }
 
+void AndroidNativeBufferInfo::QsriWaitFencePool::returnFence(VkFence fence) {
+    AutoLock lock(mLock);
+    if (!mUsedFences.erase(fence)) {
+        GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER))
+            << "Return an unmanaged Qsri VkFence back to the pool.";
+        return;
+    }
+    mAvailableFences.push_back(fence);
+}
+
 bool parseAndroidNativeBufferInfo(
     const VkImageCreateInfo* pCreateInfo,
     AndroidNativeBufferInfo* info_out) {
@@ -309,6 +329,9 @@ VkResult prepareAndroidNativeBufferImage(
         }
     }
 
+    out->qsriWaitFencePool =
+        std::make_unique<AndroidNativeBufferInfo::QsriWaitFencePool>(out->vk, out->device);
+    out->qsriTimeline = std::make_unique<VkQsriTimeline>();
     return VK_SUCCESS;
 }
 
@@ -345,9 +368,7 @@ void teardownAndroidNativeBufferImage(
     anbInfo->mappedStagingPtr = nullptr;
     anbInfo->stagingMemory = VK_NULL_HANDLE;
 
-    AutoLock lock(anbInfo->qsriWaitInfo.lock);
-    anbInfo->qsriWaitInfo.presentCount = 0;
-    anbInfo->qsriWaitInfo.requestedPresentCount = 0;
+    anbInfo->qsriWaitFencePool = nullptr;
 }
 
 void getGralloc0Usage(VkFormat format, VkImageUsageFlags imageUsage,
@@ -599,12 +620,6 @@ VkResult syncImageToColorBuffer(
     std::shared_ptr<AndroidNativeBufferInfo> anbInfo) {
 
     auto anbInfoPtr = anbInfo.get();
-    {
-        AutoLock lock(anbInfo->qsriWaitInfo.lock);
-        VK_ANB_DEBUG_OBJ(anbInfoPtr, "ensure dispatch %p device %p", vk, anbInfo->device);
-        anbInfo->qsriWaitInfo.ensureDispatchAndDevice(vk, anbInfo->device);
-    }
-
     auto fb = FrameBuffer::getFB();
     fb->lock();
 
@@ -752,48 +767,44 @@ VkResult syncImageToColorBuffer(
     };
 
     // TODO(kaiyili): initiate ownership transfer to DisplayVk here.
-    VkFence qsriFence = VK_NULL_HANDLE;
-    {
-        VK_ANB_DEBUG_OBJ(anbInfoPtr, "trying to get qsri fence");
-        AutoLock lock(anbInfo->qsriWaitInfo.lock);
-        VK_ANB_DEBUG_OBJ(anbInfoPtr, "trying to get qsri fence (got lock)");
-        qsriFence = anbInfo->qsriWaitInfo.getFenceFromPoolLocked();
-        VK_ANB_DEBUG_OBJ(anbInfoPtr, "got qsri fence %p", qsriFence);
-    }
+    VkFence qsriFence = anbInfo->qsriWaitFencePool->getFenceFromPool();
     AutoLock qLock(*queueLock);
     vk->vkQueueSubmit(queueState.queue, 1, &submitInfo, qsriFence);
+    auto waitForQsriFenceTask = [anbInfoPtr, anbInfo, vk, device = anbInfo->device, qsriFence] {
+        (void)anbInfoPtr;
+        VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: enter");
+        VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: wait for fence %p...", qsriFence);
+        VkResult res = vk->vkWaitForFences(device, 1, &qsriFence, VK_FALSE, kTimeoutNs);
+        switch (res) {
+            case VK_SUCCESS:
+                break;
+            case VK_TIMEOUT:
+                VK_ANB_ERR("Timeout when waiting for the Qsri fence.");
+                break;
+            default:
+                GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER))
+                    << "Fail to wait for the Qsri VkFence: " << res << "(" << string_VkResult(res)
+                    << ").";
+        }
+        VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: wait for fence %p...(done)", qsriFence);
+        anbInfo->qsriWaitFencePool->returnFence(qsriFence);
+    };
     fb->unlock();
 
     if (anbInfo->useVulkanNativeImage) {
         VK_ANB_DEBUG_OBJ(anbInfoPtr, "using native image, so use sync thread to wait");
         fb->setColorBufferInUse(anbInfo->colorBufferHandle, false);
-        VkDevice device = anbInfo->device;
         // Queue wait to sync thread with completion callback
         // Pass anbInfo by value to get a ref
-        SyncThread::get()->triggerSignalVkPresentComplete([anbInfoPtr, anbInfo, vk, device,
-                                                           qsriFence] {
-            (void)anbInfoPtr;
-            VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: enter");
-            if (qsriFence) {
-                VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: wait for fence %p...", qsriFence);
-                vk->vkWaitForFences(device, 1, &qsriFence, VK_FALSE, kTimeoutNs);
-                VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: wait for fence %p...(done)", qsriFence);
-            }
-            AutoLock lock(anbInfo->qsriWaitInfo.lock);
-            VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: return fence and signal");
-            if (qsriFence) {
-                anbInfo->qsriWaitInfo.returnFenceLocked(qsriFence);
-            }
-            uint64_t presentCount = ++anbInfo->qsriWaitInfo.presentCount;
-            VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: done, present count is now %llu", (unsigned long long)presentCount);
-            anbInfo->qsriWaitInfo.cv.signal();
-            VK_ANB_DEBUG_OBJ(anbInfoPtr, "wait callback: exit");
-        });
+        SyncThread::get()->triggerGeneral(
+            [waitForQsriFenceTask = std::move(waitForQsriFenceTask), anbInfo]() mutable {
+                waitForQsriFenceTask();
+                anbInfo->qsriTimeline->signalNextPresentAndPoll();
+            },
+            "wait for the guest Qsri VkFence signaled");
     } else {
         VK_ANB_DEBUG_OBJ(anbInfoPtr, "not using native image, so wait right away");
-        if (qsriFence) {
-            vk->vkWaitForFences(anbInfo->device, 1, &qsriFence, VK_FALSE, kTimeoutNs);
-        }
+        waitForQsriFenceTask();
 
         VkMappedMemoryRange toInvalidate = {
             VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 0,
@@ -827,14 +838,7 @@ VkResult syncImageToColorBuffer(
                 colorBufferHandle,
                 anbInfo->mappedStagingPtr,
                 bpp * anbInfo->extent.width * anbInfo->extent.height);
-
-        AutoLock lock(anbInfo->qsriWaitInfo.lock);
-        uint64_t presentCount = ++anbInfo->qsriWaitInfo.presentCount;
-        VK_ANB_DEBUG_OBJ(anbInfoPtr, "done, present count is now %llu", (unsigned long long)presentCount);
-        anbInfo->qsriWaitInfo.cv.signal();
-        if (qsriFence) {
-            anbInfo->qsriWaitInfo.returnFenceLocked(qsriFence);
-        }
+        anbInfo->qsriTimeline->signalNextPresentAndPoll();
     }
 
     return VK_SUCCESS;
diff --git a/stream-servers/vulkan/VkAndroidNativeBuffer.h b/stream-servers/vulkan/VkAndroidNativeBuffer.h
index 8d0f17ee..7fb53996 100644
--- a/stream-servers/vulkan/VkAndroidNativeBuffer.h
+++ b/stream-servers/vulkan/VkAndroidNativeBuffer.h
@@ -13,19 +13,20 @@
 // limitations under the License.
 #pragma once
 
-#include "VkCommonOperations.h"
-
 #include <vulkan/vulkan.h>
 
-#include "base/Lock.h"
-#include "base/ConditionVariable.h"
-#include "cereal/common/goldfish_vk_private_defs.h"
-
 #include <atomic>
 #include <deque>
 #include <memory>
+#include <unordered_set>
 #include <vector>
 
+#include "VkCommonOperations.h"
+#include "VkQsriTimeline.h"
+#include "base/ConditionVariable.h"
+#include "base/Lock.h"
+#include "cereal/common/goldfish_vk_private_defs.h"
+
 namespace goldfish_vk {
 
 struct AndroidNativeBufferInfo;
@@ -123,40 +124,27 @@ struct AndroidNativeBufferInfo {
 
     // State that is of interest when interacting with sync fds and SyncThread.
     // Protected by this lock and condition variable.
-    struct QsriWaitInfo {
-        android::base::Lock lock;
-        android::base::ConditionVariable cv;
-
-        VulkanDispatch* vk = nullptr;
-        VkDevice device = VK_NULL_HANDLE;
-
-        // A pool of vkFences for waiting (optimization so we don't keep recreating them every time).
-        std::vector<VkFence> fencePool;
-
-        // How many times the image was presented via vkQueueSignalReleaseImageANDROID
-        // versus how many times we want it to be (for sync fd fence waiting).
-        // Incremented by waitQsri.
-        std::atomic_uint64_t requestedPresentCount = 0;
-        // Incremented by waitQsri if vkWaitForFences there succeeds,
-        // or by syncImageToColorBuffer in the non-zero-copy case.
-        std::atomic_uint64_t presentCount = 0;
-
-        void ensureDispatchAndDevice(VulkanDispatch* vkIn, VkDevice deviceIn) {
-            vk = vkIn;
-            device = deviceIn;
-        }
-
-        VkFence getFenceFromPoolLocked();
-
-        // requires fence to be signaled
-        void returnFenceLocked(VkFence fence) {
-            fencePool.push_back(fence);
-        }
-
-        ~QsriWaitInfo();
+    class QsriWaitFencePool {
+       public:
+        QsriWaitFencePool(VulkanDispatch*, VkDevice);
+        ~QsriWaitFencePool();
+        VkFence getFenceFromPool();
+        void returnFence(VkFence fence);
+
+       private:
+        android::base::Lock mLock;
+
+        VulkanDispatch* mVk;
+        VkDevice mDevice;
+
+        // A pool of vkFences for waiting (optimization so we don't keep recreating them every
+        // time).
+        std::vector<VkFence> mAvailableFences;
+        std::unordered_set<VkFence> mUsedFences;
     };
 
-    QsriWaitInfo qsriWaitInfo;
+    std::unique_ptr<QsriWaitFencePool> qsriWaitFencePool = nullptr;
+    std::unique_ptr<VkQsriTimeline> qsriTimeline = nullptr;
 };
 
 VkResult prepareAndroidNativeBufferImage(
diff --git a/stream-servers/vulkan/VkDecoderGlobalState.cpp b/stream-servers/vulkan/VkDecoderGlobalState.cpp
index f067c7da..3050effd 100644
--- a/stream-servers/vulkan/VkDecoderGlobalState.cpp
+++ b/stream-servers/vulkan/VkDecoderGlobalState.cpp
@@ -4740,9 +4740,7 @@ public:
         return vk->vkGetFenceStatus(device, fence);
     }
 
-    VkResult waitQsri(VkImage boxed_image, uint64_t timeout) {
-        (void)timeout; // TODO
-
+    VkResult registerQsriCallback(VkImage boxed_image, VkQsriTimeline::Callback callback) {
         AutoLock lock(mLock);
 
         VkImage image = unbox_VkImage(boxed_image);
@@ -4774,20 +4772,10 @@ public:
             return VK_SUCCESS;
         }
 
-        AutoLock qsriLock(anbInfo->qsriWaitInfo.lock);
-        uint64_t targetPresentCount = ++anbInfo->qsriWaitInfo.requestedPresentCount;
-
-        if (mLogging) {
-            fprintf(stderr, "%s:%p New target present count %llu\n",
-                    __func__, anbInfo.get(), (unsigned long long)targetPresentCount);
-        }
-
-        anbInfo->qsriWaitInfo.cv.wait(&anbInfo->qsriWaitInfo.lock, [anbInfo, targetPresentCount] {
-            return targetPresentCount <= anbInfo->qsriWaitInfo.presentCount;
-        });
+        anbInfo->qsriTimeline->registerCallbackForNextPresentAndPoll(std::move(callback));
 
         if (mLogging) {
-            fprintf(stderr, "%s:%p Done waiting\n", __func__, anbInfo.get());
+            fprintf(stderr, "%s:%p Done registering\n", __func__, anbInfo.get());
         }
         return VK_SUCCESS;
     }
@@ -7926,8 +7914,9 @@ VkResult VkDecoderGlobalState::getFenceStatus(VkFence boxed_fence) {
     return mImpl->getFenceStatus(boxed_fence);
 }
 
-VkResult VkDecoderGlobalState::waitQsri(VkImage image, uint64_t timeout) {
-    return mImpl->waitQsri(image, timeout);
+VkResult VkDecoderGlobalState::registerQsriCallback(VkImage image,
+                                                    VkQsriTimeline::Callback callback) {
+    return mImpl->registerQsriCallback(image, std::move(callback));
 }
 
 void VkDecoderGlobalState::deviceMemoryTransform_tohost(
diff --git a/stream-servers/vulkan/VkDecoderGlobalState.h b/stream-servers/vulkan/VkDecoderGlobalState.h
index 48491b7b..a32d4aa4 100644
--- a/stream-servers/vulkan/VkDecoderGlobalState.h
+++ b/stream-servers/vulkan/VkDecoderGlobalState.h
@@ -13,13 +13,13 @@
 // limitations under the License.
 #pragma once
 
-#include "VulkanHandleMapping.h"
-#include "VulkanDispatch.h"
-
 #include <vulkan/vulkan.h>
 
 #include <memory>
 
+#include "VkQsriTimeline.h"
+#include "VulkanDispatch.h"
+#include "VulkanHandleMapping.h"
 #include "cereal/common/goldfish_vk_private_defs.h"
 #include "cereal/common/goldfish_vk_transform.h"
 
@@ -791,7 +791,7 @@ public:
     // presented so far, so it ends up incrementing a "target present count"
     // for this image, and then waiting for the image to get vkQSRI'ed at least
     // that many times.
-    VkResult waitQsri(VkImage boxed_image, uint64_t timeout);
+    VkResult registerQsriCallback(VkImage boxed_image, VkQsriTimeline::Callback callback);
 
     // Transformations
     void deviceMemoryTransform_tohost(
diff --git a/stream-servers/vulkan/VkQsriTimeline.h b/stream-servers/vulkan/VkQsriTimeline.h
new file mode 100644
index 00000000..19e8ea0e
--- /dev/null
+++ b/stream-servers/vulkan/VkQsriTimeline.h
@@ -0,0 +1,65 @@
+#ifndef VK_QSRI_TIMELINE_H
+#define VK_QSRI_TIMELINE_H
+
+#include <cstdint>
+#include <functional>
+#include <map>
+#include <mutex>
+#include <sstream>
+
+#include "host-common/logging.h"
+
+namespace goldfish_vk {
+class VkQsriTimeline {
+   public:
+    using Callback = std::function<void()>;
+
+    void signalNextPresentAndPoll() {
+        std::lock_guard<std::mutex> guard(mLock);
+        mPresentCount++;
+        pollLocked();
+    }
+
+    void registerCallbackForNextPresentAndPoll(Callback callback) {
+        std::lock_guard<std::mutex> guard(mLock);
+        uint64_t requestPresentCount = mRequestPresentCount;
+        mRequestPresentCount++;
+        mPendingCallbacks.emplace(requestPresentCount, std::move(callback));
+        pollLocked();
+    }
+
+    VkQsriTimeline() : mPresentCount(0), mRequestPresentCount(0) {}
+    ~VkQsriTimeline() {
+        std::lock_guard<std::mutex> guard(mLock);
+        if (mPendingCallbacks.empty()) {
+            return;
+        }
+        std::stringstream ss;
+        ss << mPendingCallbacks.size()
+           << " pending QSRI callbacks found when destroying the timeline, waiting for: ";
+        for (auto& [requiredPresentCount, callback] : mPendingCallbacks) {
+            callback();
+            ss << requiredPresentCount << ", ";
+        }
+        ss << "just call all of callbacks.";
+        ERR("%s", ss.str().c_str());
+    }
+
+   private:
+    std::map<uint64_t, Callback> mPendingCallbacks;
+    std::mutex mLock;
+    uint64_t mPresentCount;
+    uint64_t mRequestPresentCount;
+
+    void pollLocked() {
+        auto firstPendingCallback = mPendingCallbacks.lower_bound(mPresentCount);
+        for (auto readyCallback = mPendingCallbacks.begin(); readyCallback != firstPendingCallback;
+             readyCallback++) {
+            readyCallback->second();
+        }
+        mPendingCallbacks.erase(mPendingCallbacks.begin(), firstPendingCallback);
+    }
+};
+}  // namespace goldfish_vk
+
+#endif  // VK_QSRI_TIMELINE_H
+\ No newline at end of file
diff --git a/stream-servers/vulkan/VkQsriTimeline_unittest.cpp b/stream-servers/vulkan/VkQsriTimeline_unittest.cpp
new file mode 100644
index 00000000..d5d7af7c
--- /dev/null
+++ b/stream-servers/vulkan/VkQsriTimeline_unittest.cpp
@@ -0,0 +1,42 @@
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "VkQsriTimeline.h"
+
+namespace goldfish_vk {
+namespace {
+using ::testing::InSequence;
+using ::testing::MockFunction;
+
+TEST(VkQsriTImelineTest, signalFirstRegisterCallbackLater) {
+    MockFunction<void()> mockCallback1, mockCallback2;
+    VkQsriTimeline qsriTimeline;
+    {
+        InSequence s;
+        EXPECT_CALL(mockCallback1, Call()).Times(1);
+        EXPECT_CALL(mockCallback2, Call()).Times(1);
+    }
+    qsriTimeline.signalNextPresentAndPoll();
+    qsriTimeline.signalNextPresentAndPoll();
+    qsriTimeline.registerCallbackForNextPresentAndPoll(mockCallback1.AsStdFunction());
+    qsriTimeline.registerCallbackForNextPresentAndPoll(mockCallback2.AsStdFunction());
+}
+
+TEST(VkQsriTImelineTest, registerCallbackFirstSignalLater) {
+    MockFunction<void()> mockCallback1, mockCallback2, beforeSignal;
+    VkQsriTimeline qsriTimeline;
+    {
+        InSequence s;
+        EXPECT_CALL(beforeSignal, Call()).Times(1);
+        EXPECT_CALL(mockCallback1, Call()).Times(1);
+        EXPECT_CALL(mockCallback2, Call()).Times(1);
+    }
+    qsriTimeline.registerCallbackForNextPresentAndPoll(mockCallback1.AsStdFunction());
+    qsriTimeline.registerCallbackForNextPresentAndPoll(mockCallback2.AsStdFunction());
+    beforeSignal.Call();
+    qsriTimeline.signalNextPresentAndPoll();
+    qsriTimeline.signalNextPresentAndPoll();
+}
+
+}  // namespace
+}  // namespace goldfish_vk
+\ No newline at end of file
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2022-03-21 19:23:16 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2022-03-21 19:23:16 +0000
commit	a6bafe0649b982e0dd433e727630becf2775f116 (patch)
tree	fce235dfe4ce4260278b9eca913106bbb72cf3a5
parent	829d193568fa069689fcb42535c4d12e50b1e621 (diff)
parent	ea15f40672340474611490d751a8af0913dbe699 (diff)
download	vulkan-cereal-a6bafe0649b982e0dd433e727630becf2775f116.tar.gz