diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-03-21 23:41:35 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2024-03-21 23:41:35 +0000 |
commit | 4d6f34f32c7828ab935beb54cdca08bd7bbd39c3 (patch) | |
tree | 7aa355fd0b89ec0b2611e17ee84a14c6fa449e22 | |
parent | 7447ad8db319c34111a3d43b64b9485ac1a8115d (diff) | |
parent | 50ea3b267729413aa561eaea5aac7e6fcd565101 (diff) | |
download | libyuv-androidx-fragment-release.tar.gz |
Merge "Snap for 11610999 from 488a2af021e3e7473f083a9435b1472c0d411f3d to androidx-fragment-release" into androidx-fragment-releaseandroidx-fragment-release
-rw-r--r-- | .clang-format (renamed from files/.clang-format) | 0 | ||||
-rw-r--r-- | .gitignore (renamed from files/.gitignore) | 0 | ||||
-rw-r--r-- | .gn (renamed from files/.gn) | 4 | ||||
-rw-r--r-- | .vpython (renamed from files/.vpython) | 0 | ||||
-rw-r--r-- | .vpython3 (renamed from files/.vpython3) | 4 | ||||
-rw-r--r-- | AUTHORS (renamed from files/AUTHORS) | 0 | ||||
-rw-r--r-- | Android.bp | 179 | ||||
-rw-r--r-- | BUILD | 14 | ||||
-rw-r--r-- | BUILD.gn (renamed from files/BUILD.gn) | 77 | ||||
-rw-r--r-- | CM_linux_packages.cmake (renamed from files/CM_linux_packages.cmake) | 4 | ||||
-rw-r--r-- | CMakeLists.txt (renamed from files/CMakeLists.txt) | 26 | ||||
-rw-r--r-- | DEPS (renamed from files/DEPS) | 1114 | ||||
-rw-r--r-- | DIR_METADATA (renamed from files/DIR_METADATA) | 0 | ||||
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | METADATA | 23 | ||||
-rw-r--r-- | OWNERS | 15 | ||||
-rw-r--r-- | OWNERS.android | 1 | ||||
-rw-r--r-- | PATENTS (renamed from files/PATENTS) | 0 | ||||
-rw-r--r-- | PRESUBMIT.py (renamed from files/PRESUBMIT.py) | 0 | ||||
-rw-r--r-- | README.chromium (renamed from files/README.chromium) | 5 | ||||
-rw-r--r-- | README.md (renamed from files/README.md) | 1 | ||||
-rw-r--r-- | README.version | 8 | ||||
-rw-r--r-- | UPDATING | 36 | ||||
-rw-r--r-- | build_overrides/build.gni (renamed from files/build_overrides/build.gni) | 3 | ||||
-rw-r--r-- | build_overrides/gtest.gni (renamed from files/build_overrides/gtest.gni) | 0 | ||||
-rw-r--r-- | build_overrides/partition_alloc.gni | 17 | ||||
-rwxr-xr-x | cleanup_links.py (renamed from files/cleanup_links.py) | 0 | ||||
-rw-r--r-- | codereview.settings | 4 | ||||
-rw-r--r-- | docs/deprecated_builds.md (renamed from files/docs/deprecated_builds.md) | 4 | ||||
-rw-r--r-- | docs/environment_variables.md (renamed from files/docs/environment_variables.md) | 3 | ||||
-rw-r--r-- | docs/filtering.md (renamed from files/docs/filtering.md) | 0 | ||||
-rw-r--r-- | docs/formats.md (renamed from files/docs/formats.md) | 0 | ||||
-rw-r--r-- | docs/getting_started.md (renamed from files/docs/getting_started.md) | 47 | ||||
-rw-r--r-- | docs/rotation.md (renamed from files/docs/rotation.md) | 0 | ||||
-rw-r--r-- | download_vs_toolchain.py (renamed from files/download_vs_toolchain.py) | 0 | ||||
-rw-r--r-- | files/Android.bp | 196 | ||||
-rw-r--r-- | files/LICENSE | 29 | ||||
-rw-r--r-- | files/codereview.settings | 5 | ||||
-rw-r--r-- | files/public.mk | 13 | ||||
-rw-r--r-- | files/source/compare_mmi.cc | 123 | ||||
-rw-r--r-- | files/source/rotate_common.cc | 106 | ||||
-rw-r--r-- | files/source/rotate_mmi.cc | 291 | ||||
-rw-r--r-- | files/source/row_mmi.cc | 7842 | ||||
-rw-r--r-- | files/source/scale_mmi.cc | 1168 | ||||
-rwxr-xr-x | files/tools_libyuv/autoroller/roll_deps.py | 509 | ||||
-rw-r--r-- | include/libyuv.h (renamed from files/include/libyuv.h) | 0 | ||||
-rw-r--r-- | include/libyuv/basic_types.h (renamed from files/include/libyuv/basic_types.h) | 0 | ||||
-rw-r--r-- | include/libyuv/compare.h (renamed from files/include/libyuv/compare.h) | 0 | ||||
-rw-r--r-- | include/libyuv/compare_row.h (renamed from files/include/libyuv/compare_row.h) | 5 | ||||
-rw-r--r-- | include/libyuv/convert.h (renamed from files/include/libyuv/convert.h) | 123 | ||||
-rw-r--r-- | include/libyuv/convert_argb.h (renamed from files/include/libyuv/convert_argb.h) | 126 | ||||
-rw-r--r-- | include/libyuv/convert_from.h (renamed from files/include/libyuv/convert_from.h) | 0 | ||||
-rw-r--r-- | include/libyuv/convert_from_argb.h (renamed from files/include/libyuv/convert_from_argb.h) | 51 | ||||
-rw-r--r-- | include/libyuv/cpu_id.h (renamed from files/include/libyuv/cpu_id.h) | 30 | ||||
-rw-r--r-- | include/libyuv/loongson_intrinsics.h (renamed from files/include/libyuv/loongson_intrinsics.h) | 0 | ||||
-rw-r--r-- | include/libyuv/macros_msa.h (renamed from files/include/libyuv/macros_msa.h) | 0 | ||||
-rw-r--r-- | include/libyuv/mjpeg_decoder.h (renamed from files/include/libyuv/mjpeg_decoder.h) | 0 | ||||
-rw-r--r-- | include/libyuv/planar_functions.h (renamed from files/include/libyuv/planar_functions.h) | 70 | ||||
-rw-r--r-- | include/libyuv/rotate.h (renamed from files/include/libyuv/rotate.h) | 64 | ||||
-rw-r--r-- | include/libyuv/rotate_argb.h (renamed from files/include/libyuv/rotate_argb.h) | 0 | ||||
-rw-r--r-- | include/libyuv/rotate_row.h (renamed from files/include/libyuv/rotate_row.h) | 50 | ||||
-rw-r--r-- | include/libyuv/row.h (renamed from files/include/libyuv/row.h) | 1148 | ||||
-rw-r--r-- | include/libyuv/scale.h (renamed from files/include/libyuv/scale.h) | 55 | ||||
-rw-r--r-- | include/libyuv/scale_argb.h (renamed from files/include/libyuv/scale_argb.h) | 0 | ||||
-rw-r--r-- | include/libyuv/scale_rgb.h (renamed from files/include/libyuv/scale_rgb.h) | 0 | ||||
-rw-r--r-- | include/libyuv/scale_row.h (renamed from files/include/libyuv/scale_row.h) | 187 | ||||
-rw-r--r-- | include/libyuv/scale_uv.h (renamed from files/include/libyuv/scale_uv.h) | 0 | ||||
-rw-r--r-- | include/libyuv/version.h (renamed from files/include/libyuv/version.h) | 2 | ||||
-rw-r--r-- | include/libyuv/video_common.h (renamed from files/include/libyuv/video_common.h) | 0 | ||||
-rw-r--r-- | infra/config/OWNERS | 3 | ||||
-rw-r--r-- | infra/config/PRESUBMIT.py (renamed from files/infra/config/PRESUBMIT.py) | 2 | ||||
-rw-r--r-- | infra/config/README.md (renamed from files/infra/config/README.md) | 0 | ||||
-rw-r--r-- | infra/config/codereview.settings (renamed from files/infra/config/codereview.settings) | 0 | ||||
-rw-r--r-- | infra/config/commit-queue.cfg (renamed from files/infra/config/commit-queue.cfg) | 0 | ||||
-rw-r--r-- | infra/config/cr-buildbucket.cfg (renamed from files/infra/config/cr-buildbucket.cfg) | 401 | ||||
-rw-r--r-- | infra/config/luci-logdog.cfg (renamed from files/infra/config/luci-logdog.cfg) | 0 | ||||
-rw-r--r-- | infra/config/luci-milo.cfg (renamed from files/infra/config/luci-milo.cfg) | 0 | ||||
-rw-r--r-- | infra/config/luci-scheduler.cfg (renamed from files/infra/config/luci-scheduler.cfg) | 0 | ||||
-rwxr-xr-x | infra/config/main.star (renamed from files/infra/config/main.star) | 45 | ||||
-rw-r--r-- | infra/config/project.cfg (renamed from files/infra/config/project.cfg) | 2 | ||||
-rw-r--r-- | infra/config/realms.cfg (renamed from files/infra/config/realms.cfg) | 4 | ||||
-rw-r--r-- | libyuv.gni (renamed from files/libyuv.gni) | 8 | ||||
-rw-r--r-- | libyuv.gyp (renamed from files/libyuv.gyp) | 0 | ||||
-rw-r--r-- | libyuv.gypi (renamed from files/libyuv.gypi) | 0 | ||||
-rw-r--r-- | linux.mk (renamed from files/linux.mk) | 6 | ||||
-rw-r--r-- | public.mk | 2 | ||||
-rw-r--r-- | pylintrc (renamed from files/pylintrc) | 0 | ||||
-rwxr-xr-x | riscv_script/prepare_toolchain_qemu.sh | 74 | ||||
-rw-r--r-- | riscv_script/riscv-clang.cmake | 55 | ||||
-rwxr-xr-x | riscv_script/run_qemu.sh | 15 | ||||
-rw-r--r-- | source/compare.cc (renamed from files/source/compare.cc) | 6 | ||||
-rw-r--r-- | source/compare_common.cc (renamed from files/source/compare_common.cc) | 0 | ||||
-rw-r--r-- | source/compare_gcc.cc (renamed from files/source/compare_gcc.cc) | 2 | ||||
-rw-r--r-- | source/compare_msa.cc (renamed from files/source/compare_msa.cc) | 0 | ||||
-rw-r--r-- | source/compare_neon.cc (renamed from files/source/compare_neon.cc) | 0 | ||||
-rw-r--r-- | source/compare_neon64.cc (renamed from files/source/compare_neon64.cc) | 0 | ||||
-rw-r--r-- | source/compare_win.cc (renamed from files/source/compare_win.cc) | 0 | ||||
-rw-r--r-- | source/convert.cc (renamed from files/source/convert.cc) | 1018 | ||||
-rw-r--r-- | source/convert_argb.cc (renamed from files/source/convert_argb.cc) | 1639 | ||||
-rw-r--r-- | source/convert_from.cc (renamed from files/source/convert_from.cc) | 114 | ||||
-rw-r--r-- | source/convert_from_argb.cc (renamed from files/source/convert_from_argb.cc) | 1129 | ||||
-rw-r--r-- | source/convert_jpeg.cc (renamed from files/source/convert_jpeg.cc) | 0 | ||||
-rw-r--r-- | source/convert_to_argb.cc (renamed from files/source/convert_to_argb.cc) | 0 | ||||
-rw-r--r-- | source/convert_to_i420.cc (renamed from files/source/convert_to_i420.cc) | 0 | ||||
-rw-r--r-- | source/cpu_id.cc (renamed from files/source/cpu_id.cc) | 109 | ||||
-rw-r--r-- | source/mjpeg_decoder.cc (renamed from files/source/mjpeg_decoder.cc) | 4 | ||||
-rw-r--r-- | source/mjpeg_validate.cc (renamed from files/source/mjpeg_validate.cc) | 0 | ||||
-rw-r--r-- | source/planar_functions.cc (renamed from files/source/planar_functions.cc) | 751 | ||||
-rw-r--r-- | source/rotate.cc (renamed from files/source/rotate.cc) | 437 | ||||
-rw-r--r-- | source/rotate_any.cc (renamed from files/source/rotate_any.cc) | 0 | ||||
-rw-r--r-- | source/rotate_argb.cc (renamed from files/source/rotate_argb.cc) | 25 | ||||
-rw-r--r-- | source/rotate_common.cc | 198 | ||||
-rw-r--r-- | source/rotate_gcc.cc (renamed from files/source/rotate_gcc.cc) | 130 | ||||
-rw-r--r-- | source/rotate_lsx.cc (renamed from files/source/rotate_lsx.cc) | 0 | ||||
-rw-r--r-- | source/rotate_msa.cc (renamed from files/source/rotate_msa.cc) | 0 | ||||
-rw-r--r-- | source/rotate_neon.cc (renamed from files/source/rotate_neon.cc) | 40 | ||||
-rw-r--r-- | source/rotate_neon64.cc (renamed from files/source/rotate_neon64.cc) | 71 | ||||
-rw-r--r-- | source/rotate_win.cc (renamed from files/source/rotate_win.cc) | 0 | ||||
-rw-r--r-- | source/row_any.cc (renamed from files/source/row_any.cc) | 852 | ||||
-rw-r--r-- | source/row_common.cc (renamed from files/source/row_common.cc) | 915 | ||||
-rw-r--r-- | source/row_gcc.cc (renamed from files/source/row_gcc.cc) | 679 | ||||
-rw-r--r-- | source/row_lasx.cc (renamed from files/source/row_lasx.cc) | 406 | ||||
-rw-r--r-- | source/row_lsx.cc (renamed from files/source/row_lsx.cc) | 1534 | ||||
-rw-r--r-- | source/row_msa.cc (renamed from files/source/row_msa.cc) | 0 | ||||
-rw-r--r-- | source/row_neon.cc (renamed from files/source/row_neon.cc) | 370 | ||||
-rw-r--r-- | source/row_neon64.cc (renamed from files/source/row_neon64.cc) | 468 | ||||
-rw-r--r-- | source/row_rvv.cc | 1394 | ||||
-rw-r--r-- | source/row_win.cc (renamed from files/source/row_win.cc) | 65 | ||||
-rw-r--r-- | source/scale.cc (renamed from files/source/scale.cc) | 716 | ||||
-rw-r--r-- | source/scale_any.cc (renamed from files/source/scale_any.cc) | 16 | ||||
-rw-r--r-- | source/scale_argb.cc (renamed from files/source/scale_argb.cc) | 330 | ||||
-rw-r--r-- | source/scale_common.cc (renamed from files/source/scale_common.cc) | 220 | ||||
-rw-r--r-- | source/scale_gcc.cc (renamed from files/source/scale_gcc.cc) | 5 | ||||
-rw-r--r-- | source/scale_lsx.cc (renamed from files/source/scale_lsx.cc) | 0 | ||||
-rw-r--r-- | source/scale_msa.cc (renamed from files/source/scale_msa.cc) | 0 | ||||
-rw-r--r-- | source/scale_neon.cc (renamed from files/source/scale_neon.cc) | 39 | ||||
-rw-r--r-- | source/scale_neon64.cc (renamed from files/source/scale_neon64.cc) | 134 | ||||
-rw-r--r-- | source/scale_rgb.cc (renamed from files/source/scale_rgb.cc) | 0 | ||||
-rw-r--r-- | source/scale_rvv.cc | 1040 | ||||
-rw-r--r-- | source/scale_uv.cc (renamed from files/source/scale_uv.cc) | 391 | ||||
-rw-r--r-- | source/scale_win.cc (renamed from files/source/scale_win.cc) | 0 | ||||
-rwxr-xr-x | source/test.sh (renamed from files/source/test.sh) | 0 | ||||
-rw-r--r-- | source/video_common.cc (renamed from files/source/video_common.cc) | 0 | ||||
-rw-r--r-- | tools_libyuv/OWNERS | 4 | ||||
-rwxr-xr-x | tools_libyuv/autoroller/roll_deps.py | 822 | ||||
-rwxr-xr-x | tools_libyuv/autoroller/unittests/roll_deps_test.py (renamed from files/tools_libyuv/autoroller/unittests/roll_deps_test.py) | 0 | ||||
-rw-r--r-- | tools_libyuv/autoroller/unittests/testdata/DEPS (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS) | 0 | ||||
-rw-r--r-- | tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new) | 0 | ||||
-rw-r--r-- | tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old) | 0 | ||||
-rwxr-xr-x | tools_libyuv/get_landmines.py (renamed from files/tools_libyuv/get_landmines.py) | 0 | ||||
-rw-r--r-- | tools_libyuv/msan/OWNERS | 3 | ||||
-rw-r--r-- | tools_libyuv/msan/blacklist.txt (renamed from files/tools_libyuv/msan/blacklist.txt) | 0 | ||||
-rw-r--r-- | tools_libyuv/ubsan/OWNERS | 3 | ||||
-rw-r--r-- | tools_libyuv/ubsan/blacklist.txt (renamed from files/tools_libyuv/ubsan/blacklist.txt) | 0 | ||||
-rw-r--r-- | tools_libyuv/ubsan/vptr_blacklist.txt (renamed from files/tools_libyuv/ubsan/vptr_blacklist.txt) | 0 | ||||
-rw-r--r-- | unit_test/basictypes_test.cc (renamed from files/unit_test/basictypes_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/color_test.cc (renamed from files/unit_test/color_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/compare_test.cc (renamed from files/unit_test/compare_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/convert_argb_test.cc (renamed from files/unit_test/convert_test.cc) | 2625 | ||||
-rw-r--r-- | unit_test/convert_test.cc | 2110 | ||||
-rw-r--r-- | unit_test/cpu_test.cc (renamed from files/unit_test/cpu_test.cc) | 157 | ||||
-rw-r--r-- | unit_test/cpu_thread_test.cc (renamed from files/unit_test/cpu_thread_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/math_test.cc (renamed from files/unit_test/math_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/planar_test.cc (renamed from files/unit_test/planar_test.cc) | 305 | ||||
-rw-r--r-- | unit_test/rotate_argb_test.cc (renamed from files/unit_test/rotate_argb_test.cc) | 106 | ||||
-rw-r--r-- | unit_test/rotate_test.cc (renamed from files/unit_test/rotate_test.cc) | 363 | ||||
-rw-r--r-- | unit_test/scale_argb_test.cc (renamed from files/unit_test/scale_argb_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/scale_plane_test.cc | 470 | ||||
-rw-r--r-- | unit_test/scale_rgb_test.cc (renamed from files/unit_test/scale_rgb_test.cc) | 0 | ||||
-rw-r--r-- | unit_test/scale_test.cc (renamed from files/unit_test/scale_test.cc) | 478 | ||||
-rw-r--r-- | unit_test/scale_uv_test.cc (renamed from files/unit_test/scale_uv_test.cc) | 79 | ||||
-rw-r--r-- | unit_test/testdata/arm_v7.txt (renamed from files/unit_test/testdata/arm_v7.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/juno.txt (renamed from files/unit_test/testdata/juno.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips.txt (renamed from files/unit_test/testdata/mips.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips_loongson2k.txt (renamed from files/unit_test/testdata/mips_loongson2k.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips_loongson3.txt (renamed from files/unit_test/testdata/mips_loongson3.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips_loongson_mmi.txt (renamed from files/unit_test/testdata/mips_loongson_mmi.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/mips_msa.txt (renamed from files/unit_test/testdata/mips_msa.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/riscv64.txt | 4 | ||||
-rw-r--r-- | unit_test/testdata/riscv64_rvv.txt | 4 | ||||
-rw-r--r-- | unit_test/testdata/riscv64_rvv_zvfh.txt | 4 | ||||
-rw-r--r-- | unit_test/testdata/tegra3.txt (renamed from files/unit_test/testdata/tegra3.txt) | 0 | ||||
-rw-r--r-- | unit_test/testdata/test0.jpg (renamed from files/unit_test/testdata/test0.jpg) | bin | 421 -> 421 bytes | |||
-rw-r--r-- | unit_test/testdata/test1.jpg (renamed from files/unit_test/testdata/test1.jpg) | bin | 735 -> 735 bytes | |||
-rw-r--r-- | unit_test/testdata/test2.jpg (renamed from files/unit_test/testdata/test2.jpg) | bin | 685 -> 685 bytes | |||
-rw-r--r-- | unit_test/testdata/test3.jpg (renamed from files/unit_test/testdata/test3.jpg) | bin | 704 -> 704 bytes | |||
-rw-r--r-- | unit_test/testdata/test4.jpg (renamed from files/unit_test/testdata/test4.jpg) | bin | 701 -> 701 bytes | |||
-rw-r--r-- | unit_test/unit_test.cc (renamed from files/unit_test/unit_test.cc) | 16 | ||||
-rw-r--r-- | unit_test/unit_test.h (renamed from files/unit_test/unit_test.h) | 15 | ||||
-rw-r--r-- | unit_test/video_common_test.cc (renamed from files/unit_test/video_common_test.cc) | 0 | ||||
-rw-r--r-- | util/Makefile (renamed from files/util/Makefile) | 0 | ||||
-rw-r--r-- | util/color.cc (renamed from files/util/color.cc) | 0 | ||||
-rw-r--r-- | util/compare.cc (renamed from files/util/compare.cc) | 0 | ||||
-rw-r--r-- | util/cpuid.c (renamed from files/util/cpuid.c) | 66 | ||||
-rw-r--r-- | util/i444tonv12_eg.cc (renamed from files/util/i444tonv12_eg.cc) | 0 | ||||
-rw-r--r-- | util/psnr.cc (renamed from files/util/psnr.cc) | 0 | ||||
-rw-r--r-- | util/psnr.h (renamed from files/util/psnr.h) | 0 | ||||
-rw-r--r-- | util/psnr_main.cc (renamed from files/util/psnr_main.cc) | 0 | ||||
-rw-r--r-- | util/ssim.cc (renamed from files/util/ssim.cc) | 0 | ||||
-rw-r--r-- | util/ssim.h (renamed from files/util/ssim.h) | 0 | ||||
-rw-r--r-- | util/yuvconstants.c (renamed from files/util/yuvconstants.c) | 11 | ||||
-rw-r--r-- | util/yuvconvert.cc (renamed from files/util/yuvconvert.cc) | 10 | ||||
-rw-r--r-- | winarm.mk (renamed from files/winarm.mk) | 0 |
203 files changed, 20413 insertions, 16890 deletions
diff --git a/files/.clang-format b/.clang-format index 59d48705..59d48705 100644 --- a/files/.clang-format +++ b/.clang-format diff --git a/files/.gitignore b/.gitignore index 20d679b7..20d679b7 100644 --- a/files/.gitignore +++ b/.gitignore @@ -34,7 +34,5 @@ exec_script_whitelist = build_dotfile_settings.exec_script_whitelist + default_args = { mac_sdk_min = "10.12" - - # https://bugs.chromium.org/p/libyuv/issues/detail?id=826 - ios_deployment_target = "10.0" + ios_deployment_target = "12.0" } diff --git a/files/.vpython b/.vpython index 4a64fd21..4a64fd21 100644 --- a/files/.vpython +++ b/.vpython diff --git a/files/.vpython3 b/.vpython3 index 0a9aa38b..28d819e7 100644 --- a/files/.vpython3 +++ b/.vpython3 @@ -76,8 +76,8 @@ wheel: < version: "version:5.8.0.chromium.2" > wheel: < - name: "infra/python/wheels/requests-py2_py3" - version: "version:2.26.0" + name: "infra/python/wheels/requests-py3" + version: "version:2.31.0" > # Used by various python unit tests. @@ -1,7 +1,6 @@ package { default_applicable_licenses: ["external_libyuv_license"], } - // Added automatically by a large-scale-change // See: http://go/android-license-faq license { @@ -12,7 +11,183 @@ license { ], license_text: [ "LICENSE", + "PATENTS", ], } - subdirs = ["files"] + +cc_library { + name: "libyuv", + vendor_available: true, + product_available: true, + host_supported: true, + + srcs: [ + "source/compare.cc", + "source/compare_common.cc", + "source/compare_gcc.cc", + "source/compare_msa.cc", + "source/compare_neon.cc", + "source/compare_neon64.cc", + "source/convert.cc", + "source/convert_argb.cc", + "source/convert_from.cc", + "source/convert_from_argb.cc", + "source/convert_jpeg.cc", + "source/convert_to_argb.cc", + "source/convert_to_i420.cc", + "source/cpu_id.cc", + "source/mjpeg_decoder.cc", + "source/mjpeg_validate.cc", + "source/planar_functions.cc", + "source/rotate.cc", + "source/rotate_any.cc", + "source/rotate_argb.cc", + "source/rotate_common.cc", + "source/rotate_gcc.cc", + "source/rotate_msa.cc", + "source/rotate_neon.cc", + "source/rotate_neon64.cc", + "source/row_any.cc", + "source/row_common.cc", + "source/row_gcc.cc", + "source/row_msa.cc", + "source/row_neon.cc", + "source/row_neon64.cc", + "source/row_rvv.cc", + "source/scale.cc", + "source/scale_any.cc", + "source/scale_argb.cc", + "source/scale_common.cc", + "source/scale_gcc.cc", + "source/scale_msa.cc", + "source/scale_neon.cc", + "source/scale_neon64.cc", + "source/scale_rgb.cc", + "source/scale_rvv.cc", + "source/scale_uv.cc", + "source/video_common.cc", + ], + + cflags: [ + "-Wall", + "-Werror", + "-Wno-unused-parameter", + "-fexceptions", + "-DHAVE_JPEG", + "-DLIBYUV_UNLIMITED_DATA", + ], + + arch: { + arm: { + cflags: ["-mfpu=neon"], + }, + }, + + shared_libs: ["libjpeg"], + + export_include_dirs: ["include"], + + apex_available: [ + "//apex_available:platform", + "com.android.media.swcodec", + "com.android.virt", + ], + min_sdk_version: "29", +} + +// compatibilty static library until all uses of libyuv_static are replaced +// with libyuv (b/37646797) +cc_library_static { + name: "libyuv_static", + vendor_available: true, + whole_static_libs: ["libyuv"], + apex_available: [ + "//apex_available:platform", + "com.android.media.swcodec", + ], + min_sdk_version: "29", +} + +cc_test { + name: "libyuv_unittest", + static_libs: ["libyuv"], + shared_libs: ["libjpeg"], + cflags: ["-Wall", "-Werror"], + srcs: [ + "unit_test/basictypes_test.cc", + "unit_test/color_test.cc", + "unit_test/compare_test.cc", + "unit_test/convert_test.cc", + "unit_test/cpu_test.cc", + "unit_test/cpu_thread_test.cc", + "unit_test/math_test.cc", + "unit_test/planar_test.cc", + "unit_test/rotate_argb_test.cc", + "unit_test/rotate_test.cc", + "unit_test/scale_argb_test.cc", + "unit_test/scale_plane_test.cc", + "unit_test/scale_rgb_test.cc", + "unit_test/scale_test.cc", + "unit_test/scale_uv_test.cc", + "unit_test/unit_test.cc", + "unit_test/video_common_test.cc", + ], +} + +cc_test { + name: "compare", + gtest: false, + srcs: [ + "util/compare.cc", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "cpuid", + gtest: false, + srcs: [ + "util/cpuid.c", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "i444tonv12_eg", + gtest: false, + srcs: [ + "util/i444tonv12_eg.cc", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "psnr", + gtest: false, + srcs: [ + "util/psnr_main.cc", + "util/psnr.cc", + "util/ssim.cc", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "yuvconstants", + gtest: false, + srcs: [ + "util/yuvconstants.c", + ], + static_libs: ["libyuv"], +} + +cc_test { + name: "yuvconvert", + gtest: false, + srcs: [ + "util/yuvconvert.cc", + ], + static_libs: ["libyuv"], + shared_libs: ["libjpeg"], +} @@ -1,14 +0,0 @@ -# Copyright 2011 Google Inc. All Rights Reserved. -# -# Description: -# The libyuv package provides implementation yuv image conversion and -# scaling. -# -# This library is used by Talk Video and WebRTC. -# - -licenses(['notice']) # 3-clause BSD - -exports_files(['LICENSE']) - -package(default_visibility = ['//visibility:public']) diff --git a/files/BUILD.gn b/BUILD.gn index a72ff065..2c600b22 100644 --- a/files/BUILD.gn +++ b/BUILD.gn @@ -6,6 +6,7 @@ # in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. +import("//build/config/features.gni") import("//testing/test.gni") import("libyuv.gni") @@ -21,15 +22,25 @@ declare_args() { config("libyuv_config") { include_dirs = [ "include" ] - if (is_android && current_cpu == "arm64") { - ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ] - } - if (is_android && current_cpu != "arm64") { - ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ] + if (is_android) { + if (target_cpu == "arm" || target_cpu == "x86" || target_cpu == "mipsel") { + ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ] + } else { + ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ] + } } - + defines = [] if (!libyuv_use_neon) { - defines = [ "LIBYUV_DISABLE_NEON" ] + defines += [ "LIBYUV_DISABLE_NEON" ] + } + if (libyuv_disable_rvv) { + defines += [ "LIBYUV_DISABLE_RVV" ] + } + if (!libyuv_use_lsx) { + defines += [ "LIBYUV_DISABLE_LSX" ] + } + if (!libyuv_use_lasx) { + defines += [ "LIBYUV_DISABLE_LASX" ] } } @@ -69,6 +80,14 @@ group("libyuv") { deps += [ ":libyuv_msa" ] } + if (libyuv_use_lsx) { + deps += [ ":libyuv_lsx" ] + } + + if (libyuv_use_lasx) { + deps += [ ":libyuv_lasx" ] + } + if (!is_ios && !libyuv_disable_jpeg) { # Make sure that clients of libyuv link with libjpeg. This can't go in # libyuv_internal because in Windows x64 builds that will generate a clang @@ -129,6 +148,7 @@ static_library("libyuv_internal") { "source/row_any.cc", "source/row_common.cc", "source/row_gcc.cc", + "source/row_rvv.cc", "source/row_win.cc", "source/scale.cc", "source/scale_any.cc", @@ -136,6 +156,7 @@ static_library("libyuv_internal") { "source/scale_common.cc", "source/scale_gcc.cc", "source/scale_rgb.cc", + "source/scale_rvv.cc", "source/scale_uv.cc", "source/scale_win.cc", "source/video_common.cc", @@ -150,7 +171,7 @@ static_library("libyuv_internal") { configs += [ "//build/config/gcc:symbol_visibility_default" ] } - if (!is_ios && !libyuv_disable_jpeg) { + if ((!is_ios || use_blink) && !libyuv_disable_jpeg) { defines += [ "HAVE_JPEG" ] # Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps @@ -229,6 +250,44 @@ if (libyuv_use_msa) { } } +if (libyuv_use_lsx) { + static_library("libyuv_lsx") { + sources = [ + # LSX Source Files + "source/rotate_lsx.cc", + "source/row_lsx.cc", + "source/scale_lsx.cc", + ] + + cflags_cc = [ + "-mlsx", + "-Wno-c++11-narrowing", + ] + + deps = [ ":libyuv_internal" ] + + public_configs = [ ":libyuv_config" ] + } +} + +if (libyuv_use_lasx) { + static_library("libyuv_lasx") { + sources = [ + # LASX Source Files + "source/row_lasx.cc", + ] + + cflags_cc = [ + "-mlasx", + "-Wno-c++11-narrowing", + ] + + deps = [ ":libyuv_internal" ] + + public_configs = [ ":libyuv_config" ] + } +} + if (libyuv_include_tests) { config("libyuv_unittest_warnings_config") { if (!is_win) { @@ -256,6 +315,7 @@ if (libyuv_include_tests) { "unit_test/basictypes_test.cc", "unit_test/color_test.cc", "unit_test/compare_test.cc", + "unit_test/convert_argb_test.cc", "unit_test/convert_test.cc", "unit_test/cpu_test.cc", "unit_test/cpu_thread_test.cc", @@ -264,6 +324,7 @@ if (libyuv_include_tests) { "unit_test/rotate_argb_test.cc", "unit_test/rotate_test.cc", "unit_test/scale_argb_test.cc", + "unit_test/scale_plane_test.cc", "unit_test/scale_rgb_test.cc", "unit_test/scale_test.cc", "unit_test/scale_uv_test.cc", diff --git a/files/CM_linux_packages.cmake b/CM_linux_packages.cmake index 5f676f89..a073edfa 100644 --- a/files/CM_linux_packages.cmake +++ b/CM_linux_packages.cmake @@ -8,7 +8,7 @@ SET ( YUV_VER_MAJOR 0 ) SET ( YUV_VER_MINOR 0 ) SET ( YUV_VER_PATCH ${YUV_VERSION_NUMBER} ) SET ( YUV_VERSION ${YUV_VER_MAJOR}.${YUV_VER_MINOR}.${YUV_VER_PATCH} ) -MESSAGE ( "Building ver.: ${YUV_VERSION}" ) +MESSAGE ( VERBOSE "Building ver.: ${YUV_VERSION}" ) # is this a 32-bit or 64-bit build? IF ( CMAKE_SIZEOF_VOID_P EQUAL 8 ) @@ -45,7 +45,7 @@ ELSE () SET ( YUV_SYSTEM_NAME "amd-${YUV_BIT_SIZE}" ) ENDIF () ENDIF () -MESSAGE ( "Packaging for: ${YUV_SYSTEM_NAME}" ) +MESSAGE ( VERBOSE "Packaging for: ${YUV_SYSTEM_NAME}" ) # define all the variables needed by CPack to create .deb and .rpm packages SET ( CPACK_PACKAGE_VENDOR "Frank Barchard" ) diff --git a/files/CMakeLists.txt b/CMakeLists.txt index d190507b..9abfa74b 100644 --- a/files/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ PROJECT ( YUV C CXX ) # "C" is required even for C++ projects CMAKE_MINIMUM_REQUIRED( VERSION 2.8.12 ) -OPTION( TEST "Built unit tests" OFF ) +OPTION( UNIT_TEST "Built unit tests" OFF ) SET ( ly_base_dir ${PROJECT_SOURCE_DIR} ) SET ( ly_src_dir ${ly_base_dir}/source ) @@ -37,22 +37,32 @@ if(WIN32) SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES IMPORT_PREFIX "lib" ) endif() +# this creates the cpuid tool +ADD_EXECUTABLE ( cpuid ${ly_base_dir}/util/cpuid.c ) +TARGET_LINK_LIBRARIES ( cpuid ${ly_lib_static} ) + # this creates the conversion tool ADD_EXECUTABLE ( yuvconvert ${ly_base_dir}/util/yuvconvert.cc ) TARGET_LINK_LIBRARIES ( yuvconvert ${ly_lib_static} ) +# this creates the yuvconstants tool +ADD_EXECUTABLE ( yuvconstants ${ly_base_dir}/util/yuvconstants.c ) +TARGET_LINK_LIBRARIES ( yuvconstants ${ly_lib_static} ) -INCLUDE ( FindJPEG ) +find_package ( JPEG ) if (JPEG_FOUND) include_directories( ${JPEG_INCLUDE_DIR} ) - target_link_libraries( yuvconvert ${JPEG_LIBRARY} ) + target_link_libraries( ${ly_lib_shared} ${JPEG_LIBRARY} ) add_definitions( -DHAVE_JPEG ) endif() -if(TEST) +if(UNIT_TEST) find_library(GTEST_LIBRARY gtest) if(GTEST_LIBRARY STREQUAL "GTEST_LIBRARY-NOTFOUND") set(GTEST_SRC_DIR /usr/src/gtest CACHE STRING "Location of gtest sources") + if (CMAKE_CROSSCOMPILING) + set(GTEST_SRC_DIR third_party/googletest/src/googletest) + endif() if(EXISTS ${GTEST_SRC_DIR}/src/gtest-all.cc) message(STATUS "building gtest from sources in ${GTEST_SRC_DIR}") set(gtest_sources ${GTEST_SRC_DIR}/src/gtest-all.cc) @@ -61,7 +71,7 @@ if(TEST) include_directories(${GTEST_SRC_DIR}/include) set(GTEST_LIBRARY gtest) else() - message(FATAL_ERROR "TEST is set but unable to find gtest library") + message(FATAL_ERROR "UNIT_TEST is set but unable to find gtest library") endif() endif() @@ -78,6 +88,12 @@ if(TEST) if(NACL AND NACL_LIBC STREQUAL "newlib") target_link_libraries(libyuv_unittest glibc-compat) endif() + + find_library(GFLAGS_LIBRARY gflags) + if(NOT GFLAGS_LIBRARY STREQUAL "GFLAGS_LIBRARY-NOTFOUND") + target_link_libraries(libyuv_unittest gflags) + add_definitions(-DLIBYUV_USE_GFLAGS) + endif() endif() @@ -5,43 +5,62 @@ gclient_gn_args = [ vars = { 'chromium_git': 'https://chromium.googlesource.com', - 'chromium_revision': '829c6df33dce1085a61d8fd44209fc84bbf9a6a7', - 'gn_version': 'git_revision:6f13aaac55a977e1948910942675c69f2b4f7a94', + 'chromium_revision': 'af3d01376bec75a68f90160bfd38057d60510a2b', + 'gn_version': 'git_revision:fae280eabe5d31accc53100137459ece19a7a295', + # ninja CIPD package version. + # https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja + 'ninja_version': 'version:2@1.11.1.chromium.6', + # reclient CIPD package version + 'reclient_version': 're_client_version:0.110.0.43ec6b1-gomaip', # Keep the Chromium default of generating location tags. 'generate_location_tags': True, + + # By default, download the fuchsia sdk from the public sdk directory. + 'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/core/', + 'fuchsia_version': 'version:15.20230909.2.1', + # By default, download the fuchsia images from the fuchsia GCS bucket. + 'fuchsia_images_bucket': 'fuchsia', + 'checkout_fuchsia': False, + # Since the images are hundreds of MB, default to only downloading the image + # most commonly useful for developers. Bots and developers that need to use + # other images can override this with additional images. + 'checkout_fuchsia_boot_images': "terminal.qemu-x64,terminal.x64", + 'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""', } deps = { 'src/build': - Var('chromium_git') + '/chromium/src/build' + '@' + 'dcea3443035f48d58193788e0bc56daca4e5db33', + Var('chromium_git') + '/chromium/src/build' + '@' + '5885d3c24833ad72845a52a1b913a2b8bc651b56', 'src/buildtools': - Var('chromium_git') + '/chromium/src/buildtools' + '@' + '075dd7e22837a69189003e4fa84499acf63188cf', + Var('chromium_git') + '/chromium/src/buildtools' + '@' + '79ab87fa54614258c4c95891e873223371194525', 'src/testing': - Var('chromium_git') + '/chromium/src/testing' + '@' + 'f4e42be13265ec304b0f3085eee2b15f30f44077', + Var('chromium_git') + '/chromium/src/testing' + '@' + '51e9a02297057cc0e917763a51e16680b7d16fb6', 'src/third_party': - Var('chromium_git') + '/chromium/src/third_party' + '@' + '42c249feeb71bc0cd184849f0509aefef599343d', + Var('chromium_git') + '/chromium/src/third_party' + '@' + '2dc4b18abd1003ce7b1eda509dc96f12d49a9667', 'src/buildtools/linux64': { 'packages': [ { - 'package': 'gn/gn/linux-amd64', + 'package': 'gn/gn/linux-${{arch}}', 'version': Var('gn_version'), } ], 'dep_type': 'cipd', - 'condition': 'checkout_linux', + 'condition': 'host_os == "linux"', }, + 'src/buildtools/mac': { 'packages': [ { - 'package': 'gn/gn/mac-amd64', + 'package': 'gn/gn/mac-${{arch}}', 'version': Var('gn_version'), } ], 'dep_type': 'cipd', - 'condition': 'checkout_mac', + 'condition': 'host_os == "mac"', }, + 'src/buildtools/win': { 'packages': [ { @@ -50,43 +69,60 @@ deps = { } ], 'dep_type': 'cipd', - 'condition': 'checkout_win', + 'condition': 'host_os == "win"', }, - 'src/buildtools/clang_format/script': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + '99876cacf78329e5f99c244dbe42ccd1654517a0', - 'src/buildtools/third_party/libc++/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '79a2e924d96e2fc1e4b937c42efd08898fa472d7', - 'src/buildtools/third_party/libc++abi/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '665b74f7d1b3bb295cd6ba7d8fcec1acd3d2ac84', - 'src/buildtools/third_party/libunwind/trunk': - Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'f51a154281bdfe746c46c07cd4fb05be97f9441d', + 'src/buildtools/reclient': { + 'packages': [ + { + 'package': 'infra/rbe/client/${{platform}}', + 'version': Var('reclient_version'), + } + ], + 'dep_type': 'cipd', + }, 'src/third_party/catapult': - Var('chromium_git') + '/catapult.git' + '@' + '75423c310eb303d28978be892fcf7b9c2c824909', + Var('chromium_git') + '/catapult.git' + '@' + 'fa05d995e152efdae488a2aeba397cd609fdbc9d', + 'src/third_party/clang-format/script': + Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef', 'src/third_party/colorama/src': - Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8', + Var('chromium_git') + '/external/colorama.git' + '@' + '3de9f013df4b470069d03d250224062e8cf15c49', + 'src/third_party/cpu_features/src': { + 'url': Var('chromium_git') + '/external/github.com/google/cpu_features.git' + '@' + '936b9ab5515dead115606559502e3864958f7f6e', + 'condition': 'checkout_android', + }, 'src/third_party/depot_tools': - Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '2ffa1bde797a8127c0f72908d0bd74051fd65d0d', + Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + 'd3e43dd4319ba169c0aaf44547eecf861f2fe5da', 'src/third_party/freetype/src': - Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + 'cff026d41599945498044d2f4dcc0e610ffb6929', + Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '9e3c5d7e183c1a8d5ed8868d7d28ef18d3ec9ec8', + 'third_party/fuchsia-gn-sdk': { + 'url': Var('chromium_git') + '/chromium/src/third_party/fuchsia-gn-sdk.git' + '@' + '0d6902558d92fe3d49ba9a8f638ddea829be595b', + 'condition': 'checkout_fuchsia', + }, 'src/third_party/googletest/src': - Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'e2f3978937c0244508135f126e2617a7734a68be', + Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07', 'src/third_party/harfbuzz-ng/src': - Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '64b29dbd5994a511acee69cb9b45ad650ef88359', + Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + 'db700b5670d9475cc8ed4880cc9447b232c5e432', + 'src/third_party/libc++/src': + Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '84fb809dd6dae36d556dc0bb702c6cc2ce9d4b80', + 'src/third_party/libc++abi/src': + Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '8d21803b9076b16d46c32e2f10da191ee758520c', + 'src/third_party/libunwind/src': + Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'f1c687e0aaf0d70b9a53a150e9be5cb63af9215f', 'src/third_party/libjpeg_turbo': - Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '02959c3ee17abacfd1339ec22ea93301292ffd56', + Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '30bdb85e302ecfc52593636b2f44af438e05e784', 'src/third_party/nasm': - Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '9215e8e1d0fe474ffd3e16c1a07a0f97089e6224', + Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '7fc833e889d1afda72c06220e5bed8fb43b2e5ce', 'src/tools': - Var('chromium_git') + '/chromium/src/tools' + '@' + '198dc879529652b39ba6e223bcc0bcad5f1facd6', + Var('chromium_git') + '/chromium/src/tools' + '@' + 'a76c0dbb64c603a0d45e0c6dfae3a351b6e1adf1', # libyuv-only dependencies (not present in Chromium). 'src/third_party/gtest-parallel': Var('chromium_git') + '/external/webrtc/deps/third_party/gtest-parallel' + '@' + '1dad0e9f6d82ff994130b529d7d814b40eb32b0e', 'src/third_party/lss': { - 'url': Var('chromium_git') + '/linux-syscall-support.git' + '@' + '92a65a8f5d705d1928874420c8d0d15bde8c89e5', + 'url': Var('chromium_git') + '/linux-syscall-support.git' + '@' + 'ce877209e11aa69dcfffbd53ef90ea1d07136521', 'condition': 'checkout_android or checkout_linux', }, @@ -101,14 +137,32 @@ deps = { 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/auto/src': { - 'url': Var('chromium_git') + '/external/github.com/google/auto.git' + '@' + 'fe67d853d6356943dc79541c892ab6d3e6a7b61a', - 'condition': 'checkout_android', + + 'src/third_party/kotlin_stdlib': { + 'packages': [ + { + 'package': 'chromium/third_party/kotlin_stdlib', + 'version': 'Z1gsqhL967kFQecxKrRwXHbl-vwQjpv0l7PMUZ0EVO8C', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, + 'src/third_party/kotlinc/current': { + 'packages': [ + { + 'package': 'chromium/third_party/kotlinc', + 'version': 'Rr02Gf2EkaeSs3EhSUHhPqDHSd1AzimrM6cRYUJCPjQC', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', }, + 'src/third_party/boringssl/src': - 'https://boringssl.googlesource.com/boringssl.git' + '@' + '3a667d10e94186fd503966f5638e134fe9fb4080', + 'https://boringssl.googlesource.com/boringssl.git' + '@' + '20a06474c0b4a16779311bfe98ba69dc2402101d', 'src/base': { - 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'e9e639622449a893a1b5e32781d072cec08ead72', + 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'd407b7061bce341bb6e11b539ea86c46c949ac4c', 'condition': 'checkout_android', }, 'src/third_party/bazel': { @@ -131,20 +185,28 @@ deps = { 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_ndk': { - 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '401019bf85744311b26c88ced255cd53401af8b7', - 'condition': 'checkout_android', + 'src/third_party/android_toolchain': { + 'packages': [ + { + 'package': 'chromium/third_party/android_toolchain/android_toolchain', + 'version': 'R_8suM8m0oHbZ1awdxGXvKEFpAOETscbfZxkkMthyk8C', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', }, + 'src/third_party/androidx': { 'packages': [ { 'package': 'chromium/third_party/androidx', - 'version': '6d8ij5pzYh29WWjPbdbAWFBJSA1nUgkWf2p6wCVZKIsC', + 'version': 'y7rF_rx56mD3FGhMiqnlbQ6HOqHJ95xUFNX1m-_a988C', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_support_test_runner': { 'packages': [ { @@ -158,16 +220,12 @@ deps = { 'src/third_party/android_sdk/public': { 'packages': [ { - 'package': 'chromium/third_party/android_sdk/public/build-tools/31.0.0', - 'version': 'tRoD45SCi7UleQqSV7MrMQO1_e5P8ysphkCcj6z_cCQC', + 'package': 'chromium/third_party/android_sdk/public/build-tools/34.0.0', + 'version': 'YK9Rzw3fDzMHVzatNN6VlyoD_81amLZpN1AbmkdOd6AC', }, { 'package': 'chromium/third_party/android_sdk/public/emulator', - 'version': 'gMHhUuoQRKfxr-MBn3fNNXZtkAVXtOwMwT7kfx8jkIgC', - }, - { - 'package': 'chromium/third_party/android_sdk/public/extras', - 'version': 'ppQ4TnqDvBHQ3lXx5KPq97egzF5X2FFyOrVHkGmiTMQC', + 'version': '9lGp8nTUCRRWGMnI_96HcKfzjnxEJKUcfvfwmA3wXNkC', }, { 'package': 'chromium/third_party/android_sdk/public/patcher', @@ -175,11 +233,15 @@ deps = { }, { 'package': 'chromium/third_party/android_sdk/public/platform-tools', - 'version': 'g7n_-r6yJd_SGRklujGB1wEt8iyr77FZTUJVS9w6O34C', + 'version': 'HWVsGs2HCKgSVv41FsOcsfJbNcB0UFiNrF6Tc4yRArYC', }, { - 'package': 'chromium/third_party/android_sdk/public/platforms/android-31', - 'version': 'lL3IGexKjYlwjO_1Ga-xwxgwbE_w-lmi2Zi1uOlWUIAC', + 'package': 'chromium/third_party/android_sdk/public/platforms/android-34', + 'version': 'u-bhWbTME6u-DjypTgr3ZikCyeAeU6txkR9ET6Uudc8C', + }, + { + 'package': 'chromium/third_party/android_sdk/public/platforms/android-tiramisuprivacysandbox', + 'version': 'YWMYkzyxGBgVsty0GhXL1oxbY0pGXQIgFc0Rh7ZMRPYC', }, { 'package': 'chromium/third_party/android_sdk/public/sources/android-31', @@ -187,7 +249,7 @@ deps = { }, { 'package': 'chromium/third_party/android_sdk/public/cmdline-tools', - 'version': 'Ez2NWws2SJYCF6qw2O-mSCqK6424l3ZdSTpppLyVR_cC', + 'version': 'EWnL2r7oV5GtE9Ef7GyohyFam42wtMtEKYU4dCb3U1YC', }, ], 'condition': 'checkout_android', @@ -207,7 +269,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/android_build_tools/aapt2', - 'version': 'version:3.6.0-alpha03-5516695-cr0', + 'version': 'STY0BXlZxsEhudnlXQFed-B5UpwehcoM0sYqor6qRqsC', }, ], 'condition': 'checkout_android', @@ -223,6 +285,16 @@ deps = { 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/byte_buddy/android_sdk_build_tools_25_0_2': { + 'packages': [ + { + 'package': 'chromium/third_party/android_sdk/public/build-tools', + 'version': 'kwIs2vdfTm93yEP8LG5aSnchN4BVEdVxbqQtF4XpPdkC', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, 'src/third_party/ced/src': { 'url': Var('chromium_git') + '/external/github.com/google/compact_enc_det.git' + '@' + 'ba412eaaacd3186085babcd901679a48863c7dd5', 'condition': 'checkout_android', @@ -267,7 +339,7 @@ deps = { }, 'src/third_party/icu': { - 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'bf66d373ae781a3498f2babe7b61d933dd774b82', + 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'e8c3bc9ea97d4423ad0515e5f1c064f486dae8b1', }, 'src/third_party/icu4j': { 'packages': [ @@ -293,11 +365,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/jdk', - 'version': 'PfRSnxe8Od6WU4zBXomq-zsgcJgWmm3z4gMQNB-r2QcC', - }, - { - 'package': 'chromium/third_party/jdk/extras', - 'version': 'fkhuOQ3r-zKtWEdKplpo6k0vKkjl-LY_rJTmtzFCQN4C', + 'version': 'GCFtf5t6M4HlrHj6NXedHbpHp2xjgognF8ptNci4478C', }, ], 'condition': 'checkout_android', @@ -308,22 +376,31 @@ deps = { 'condition': 'checkout_android', }, 'src/third_party/junit/src': { - 'url': Var('chromium_git') + '/external/junit.git' + '@' + '64155f8a9babcfcf4263cf4d08253a1556e75481', + 'url': Var('chromium_git') + '/external/junit.git' + '@' + '05fe2a64f59127c02135be22f416e91260d6ede6', 'condition': 'checkout_android', }, 'src/third_party/libunwindstack': { - 'url': Var('chromium_git') + '/chromium/src/third_party/libunwindstack.git' + '@' + '6868358481bb1e5e20d155c1084dc436c88b5e6b', + 'url': Var('chromium_git') + '/chromium/src/third_party/libunwindstack.git' + '@' + '4dbfa0e8c844c8e243b297bc185e54a99ff94f9e', 'condition': 'checkout_android', }, + 'src/third_party/ninja': { + 'packages': [ + { + 'package': 'infra/3pp/tools/ninja/${{platform}}', + 'version': Var('ninja_version'), + } + ], + 'dep_type': 'cipd', + }, 'src/third_party/mockito/src': { - 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + '04a2a289a4222f80ad20717c25144981210d2eac', + 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + '7c3641bcef717ffa7d765f2c86b847d0aab1aac9', 'condition': 'checkout_android', }, 'src/third_party/objenesis': { 'packages': [ { 'package': 'chromium/third_party/objenesis', - 'version': '9e367f55e5a65781ee77bfcbaa88fb82b30e75c0', + 'version': 'tknDblENYi8IaJYyD6tUahUyHYZlzJ_Y74_QZSz4DpIC', }, ], 'condition': 'checkout_android', @@ -343,7 +420,20 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/r8', - 'version': 'Nu_mvQJe34CotIXadFlA3w732CJ9EvQGuVs4udcZedAC', + 'version': 'O1BBWiBTIeNUcraX8STMtQXVaCleu6SJJjWCcnfhPLkC', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, + # This duplication is intentional, so we avoid updating the r8.jar used by + # dexing unless necessary, since each update invalidates all incremental + # dexing and unnecessarily slows down all bots. + 'src/third_party/r8/d8': { + 'packages': [ + { + 'package': 'chromium/third_party/r8', + 'version': 'vw5kLlW3-suSlCKSO9OQpFWpR8oDnvQ8k1RgKNUapQYC', }, ], 'condition': 'checkout_android', @@ -360,14 +450,14 @@ deps = { 'dep_type': 'cipd', }, 'src/third_party/requests/src': { - 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'refs/tags/v2.23.0', + 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'c7e0fc087ceeadb8b4c84a0953a422c474093d6d', 'condition': 'checkout_android', }, 'src/third_party/robolectric': { 'packages': [ { 'package': 'chromium/third_party/robolectric', - 'version': 'iC6RDM5EH3GEAzR-1shW_Mg0FeeNE5shq1okkFfuuNQC', + 'version': 'hzetqh1qFI32FOgQroZvGcGdomrgVBJ6WKRnl1KFw6EC', }, ], 'condition': 'checkout_android', @@ -377,7 +467,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/sqlite4java', - 'version': '889660698187baa7c8b0d79f7bf58563125fbd66', + 'version': 'LofjKH9dgXIAJhRYCPQlMFywSwxYimrfDeBmaHc-Z5EC', }, ], 'condition': 'checkout_android', @@ -387,7 +477,7 @@ deps = { 'packages': [ { 'package': 'chromium/third_party/turbine', - 'version': 'Om6yIEXgJxuqghErK29h9RcMH6VaymMbxwScwXmcN6EC', + 'version': '2I2Nz480QsuCxpQ1lMfbigX8l5HAhX3_ykWU4TKRGo4C', }, ], 'condition': 'checkout_android', @@ -400,1718 +490,1822 @@ deps = { # iOS deps: 'src/ios': { - 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '81826d980c159f949c2c7901f4dbec9a09788964', + 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + 'ddd58e86cf4ebdc0db60a5d0f3c323de49bb295c', 'condition': 'checkout_ios' }, # Everything coming after this is automatically updated by the auto-roller. # === ANDROID_DEPS Generated Code Start === - + # Generated by //third_party/android_deps/fetch_all.py 'src/third_party/android_deps/libs/android_arch_core_common': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/android_arch_core_common', - 'version': 'version:2@1.1.1.cr0', + 'version': 'version:2@1.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/android_arch_core_runtime': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/android_arch_core_runtime', - 'version': 'version:2@1.1.1.cr0', + 'version': 'version:2@1.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/android_arch_lifecycle_common': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_common', - 'version': 'version:2@1.1.1.cr0', + 'version': 'version:2@1.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/android_arch_lifecycle_common_java8': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_common_java8', - 'version': 'version:2@1.1.1.cr0', + 'version': 'version:2@1.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/android_arch_lifecycle_livedata': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_livedata', - 'version': 'version:2@1.1.1.cr0', + 'version': 'version:2@1.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/android_arch_lifecycle_livedata_core': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_livedata_core', - 'version': 'version:2@1.1.1.cr0', + 'version': 'version:2@1.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/android_arch_lifecycle_runtime': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_runtime', - 'version': 'version:2@1.1.1.cr0', + 'version': 'version:2@1.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/android_arch_lifecycle_viewmodel': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_viewmodel', - 'version': 'version:2@1.1.1.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/backport_util_concurrent_backport_util_concurrent': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/backport_util_concurrent_backport_util_concurrent', - 'version': 'version:2@3.1.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/classworlds_classworlds': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/classworlds_classworlds', - 'version': 'version:2@1.1-alpha-2.cr0', + 'version': 'version:2@1.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_animated_vector_drawable': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_animated_vector_drawable', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_appcompat_v7': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_appcompat_v7', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_asynclayoutinflater': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_asynclayoutinflater', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_cardview_v7': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_cardview_v7', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_collections': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_collections', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_coordinatorlayout': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_coordinatorlayout', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_cursoradapter': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_cursoradapter', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_customview': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_customview', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_design': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_design', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_documentfile': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_documentfile', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_drawerlayout': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_drawerlayout', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_interpolator': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_interpolator', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_loader': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_loader', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_localbroadcastmanager': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_localbroadcastmanager', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_multidex': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_multidex', - 'version': 'version:2@1.0.0.cr0', + 'version': 'version:2@1.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_print': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_print', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_recyclerview_v7': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_recyclerview_v7', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_slidingpanelayout': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_slidingpanelayout', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_support_annotations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_annotations', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_support_compat': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_compat', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_support_core_ui': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_core_ui', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_support_core_utils': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_core_utils', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_support_fragment': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_fragment', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_support_media_compat': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_media_compat', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_support_v4': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_v4', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_support_vector_drawable': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_support_vector_drawable', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_swiperefreshlayout': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_swiperefreshlayout', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_transition': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_transition', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_versionedparcelable': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_versionedparcelable', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_support_viewpager': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_support_viewpager', - 'version': 'version:2@28.0.0.cr0', + 'version': 'version:2@28.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_android_tools_common': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_android_tools_common', - 'version': 'version:2@30.0.0-alpha10.cr0', + 'version': 'version:2@30.2.0-beta01.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs': { + + 'src/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs', - 'version': 'version:2@1.1.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api', + 'version': 'version:2@30.2.0-beta01.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs_configuration': { + + 'src/third_party/android_deps/libs/com_android_tools_sdk_common': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs_configuration', - 'version': 'version:2@1.1.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_android_tools_sdk_common', + 'version': 'version:2@30.2.0-beta01.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api': { + + 'src/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api', - 'version': 'version:2@30.0.0-alpha10.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine', + 'version': 'version:2@2.8.8.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/com_android_tools_sdk_common': { + + 'src/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/com_android_tools_sdk_common', - 'version': 'version:2@30.0.0-alpha10.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms', + 'version': 'version:2@1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine': { + + 'src/third_party/android_deps/libs/com_google_android_annotations': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine', - 'version': 'version:2@2.8.8.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_google_android_annotations', + 'version': 'version:2@4.1.1.4.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms': { + + 'src/third_party/android_deps/libs/com_google_android_apps_common_testing_accessibility_framework_accessibility_test_framework': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms', - 'version': 'version:2@1.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_google_android_apps_common_testing_accessibility_framework_accessibility_test_framework', + 'version': 'version:2@4.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_datatransport_transport_api': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_datatransport_transport_api', - 'version': 'version:2@2.2.1.cr0', + 'version': 'version:2@2.2.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@20.1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth_api_phone': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth_api_phone', - 'version': 'version:2@17.5.0.cr0', + 'version': 'version:2@18.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth_base': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth_base', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@18.0.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_base': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_base', - 'version': 'version:2@17.5.0.cr0', + 'version': 'version:2@18.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_basement': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_basement', - 'version': 'version:2@17.5.0.cr0', + 'version': 'version:2@18.1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_cast': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cast', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_cast_framework': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cast_framework', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_clearcut': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_clearcut', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_cloud_messaging': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cloud_messaging', - 'version': 'version:2@16.0.0.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/com_google_android_gms_play_services_fido': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_fido', - 'version': 'version:2@19.0.0-beta.cr0', + 'version': 'version:2@16.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_flags': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_flags', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_gcm': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_gcm', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_iid': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_iid', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_instantapps': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_instantapps', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@18.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_location': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_location', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@19.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_phenotype': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_phenotype', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_places_placereport': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_places_placereport', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_stats': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_stats', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_tasks': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_tasks', - 'version': 'version:2@17.2.0.cr0', + 'version': 'version:2@18.0.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_vision': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_vision', - 'version': 'version:2@18.0.0.cr0', + 'version': 'version:2@20.1.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_gms_play_services_vision_common': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_vision_common', - 'version': 'version:2@18.0.0.cr0', + 'version': 'version:2@19.1.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_android_material_material': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_android_material_material', - 'version': 'version:2@1.6.0-alpha01.cr0', + 'version': 'version:2@1.7.0-alpha02.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/com_google_android_play_core': { + + 'src/third_party/android_deps/libs/com_google_android_play_core_common': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_core', - 'version': 'version:2@1.10.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_core_common', + 'version': 'version:2@2.0.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + + 'src/third_party/android_deps/libs/com_google_android_play_feature_delivery': { + 'packages': [ + { + 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_feature_delivery', + 'version': 'version:2@2.0.1.cr1', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, + 'src/third_party/android_deps/libs/com_google_auto_auto_common': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_auto_auto_common', - 'version': 'version:2@1.1.2.cr0', + 'version': 'version:2@1.2.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_auto_service_auto_service': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_auto_service_auto_service', - 'version': 'version:2@1.0-rc6.cr0', + 'version': 'version:2@1.0-rc6.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_auto_service_auto_service_annotations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_auto_service_auto_service_annotations', - 'version': 'version:2@1.0-rc6.cr0', + 'version': 'version:2@1.0-rc6.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_auto_value_auto_value_annotations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_auto_value_auto_value_annotations', - 'version': 'version:2@1.7.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/com_google_code_findbugs_jformatstring': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/com_google_code_findbugs_jformatstring', - 'version': 'version:2@3.0.0.cr0', + 'version': 'version:2@1.10.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_code_findbugs_jsr305': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_code_findbugs_jsr305', - 'version': 'version:2@3.0.2.cr0', + 'version': 'version:2@3.0.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_code_gson_gson': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_code_gson_gson', - 'version': 'version:2@2.8.0.cr0', + 'version': 'version:2@2.9.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_dagger_dagger': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger', - 'version': 'version:2@2.30.cr0', + 'version': 'version:2@2.30.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_dagger_dagger_compiler': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_compiler', - 'version': 'version:2@2.30.cr0', + 'version': 'version:2@2.30.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_dagger_dagger_producers': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_producers', - 'version': 'version:2@2.30.cr0', + 'version': 'version:2@2.30.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_dagger_dagger_spi': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_spi', - 'version': 'version:2@2.30.cr0', + 'version': 'version:2@2.30.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_errorprone_error_prone_annotation': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_annotation', - 'version': 'version:2@2.10.0.cr0', + 'version': 'version:2@2.11.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_errorprone_error_prone_annotations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_annotations', - 'version': 'version:2@2.10.0.cr0', + 'version': 'version:2@2.18.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_errorprone_error_prone_check_api': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_check_api', - 'version': 'version:2@2.10.0.cr0', + 'version': 'version:2@2.11.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_errorprone_error_prone_core': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_core', - 'version': 'version:2@2.10.0.cr0', + 'version': 'version:2@2.11.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_errorprone_error_prone_type_annotations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_type_annotations', - 'version': 'version:2@2.10.0.cr0', + 'version': 'version:2@2.11.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_errorprone_javac': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_javac', - 'version': 'version:2@9+181-r4173-1.cr0', + 'version': 'version:2@9+181-r4173-1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_errorprone_javac_shaded': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_javac_shaded', - 'version': 'version:2@9-dev-r4023-3.cr0', + 'version': 'version:2@9-dev-r4023-3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_annotations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_annotations', - 'version': 'version:2@16.0.0.cr0', + 'version': 'version:2@16.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_common': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_common', - 'version': 'version:2@19.5.0.cr0', + 'version': 'version:2@19.5.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_components': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_components', - 'version': 'version:2@16.1.0.cr0', + 'version': 'version:2@16.1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_encoders': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_encoders', - 'version': 'version:2@16.1.0.cr0', + 'version': 'version:2@16.1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_encoders_json': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_encoders_json', - 'version': 'version:2@17.1.0.cr0', + 'version': 'version:2@17.1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_iid': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_iid', - 'version': 'version:2@21.0.1.cr0', + 'version': 'version:2@21.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_iid_interop': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_iid_interop', - 'version': 'version:2@17.0.0.cr0', + 'version': 'version:2@17.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_installations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_installations', - 'version': 'version:2@16.3.5.cr0', + 'version': 'version:2@16.3.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_installations_interop': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_installations_interop', - 'version': 'version:2@16.0.1.cr0', + 'version': 'version:2@16.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_measurement_connector': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_measurement_connector', - 'version': 'version:2@18.0.0.cr0', + 'version': 'version:2@18.0.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_firebase_firebase_messaging': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_messaging', - 'version': 'version:2@21.0.1.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/com_google_flatbuffers_flatbuffers_java': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/com_google_flatbuffers_flatbuffers_java', - 'version': 'version:2@2.0.3.cr0', + 'version': 'version:2@21.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_googlejavaformat_google_java_format': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_googlejavaformat_google_java_format', - 'version': 'version:2@1.5.cr0', + 'version': 'version:2@1.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_guava_failureaccess': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_guava_failureaccess', - 'version': 'version:2@1.0.1.cr0', + 'version': 'version:2@1.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_guava_guava': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_guava_guava', - 'version': 'version:2@31.0-jre.cr0', + 'version': 'version:2@31.1-jre.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_guava_guava_android': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_guava_guava_android', - 'version': 'version:2@31.0-android.cr0', + 'version': 'version:2@31.1-android.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_guava_listenablefuture': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_guava_listenablefuture', - 'version': 'version:2@1.0.cr0', + 'version': 'version:2@1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_j2objc_j2objc_annotations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_j2objc_j2objc_annotations', - 'version': 'version:2@1.3.cr0', + 'version': 'version:2@1.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_protobuf_protobuf_java': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_protobuf_protobuf_java', - 'version': 'version:2@3.4.0.cr0', + 'version': 'version:2@3.19.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_google_protobuf_protobuf_javalite': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_google_protobuf_protobuf_javalite', - 'version': 'version:2@3.13.0.cr0', + 'version': 'version:2@3.21.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_googlecode_java_diff_utils_diffutils': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_googlecode_java_diff_utils_diffutils', - 'version': 'version:2@1.3.0.cr0', + 'version': 'version:2@1.3.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_squareup_javapoet': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_squareup_javapoet', - 'version': 'version:2@1.13.0.cr0', + 'version': 'version:2@1.13.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/com_squareup_javawriter': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/com_squareup_javawriter', - 'version': 'version:2@2.1.1.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils', - 'version': 'version:2@4.0.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/javax_annotation_javax_annotation_api': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/javax_annotation_javax_annotation_api', - 'version': 'version:2@1.3.2.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/javax_annotation_jsr250_api': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/javax_annotation_jsr250_api', - 'version': 'version:2@1.0.cr0', - }, - ], - 'condition': 'checkout_android', - 'dep_type': 'cipd', - }, - 'src/third_party/android_deps/libs/javax_inject_javax_inject': { - 'packages': [ - { - 'package': 'chromium/third_party/android_deps/libs/javax_inject_javax_inject', - 'version': 'version:2@1.cr0', + 'version': 'version:2@2.1.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/nekohtml_nekohtml': { + + 'src/third_party/android_deps/libs/com_squareup_okio_okio_jvm': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/nekohtml_nekohtml', - 'version': 'version:2@1.9.6.2.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_squareup_okio_okio_jvm', + 'version': 'version:2@3.3.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/nekohtml_xercesminimal': { + + 'src/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/nekohtml_xercesminimal', - 'version': 'version:2@1.9.6.2.cr0', + 'package': 'chromium/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm', + 'version': 'version:2@4.7.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/net_ltgt_gradle_incap_incap': { + + 'src/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/net_ltgt_gradle_incap_incap', - 'version': 'version:2@0.2.cr0', + 'package': 'chromium/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils', + 'version': 'version:2@4.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/net_sf_kxml_kxml2': { + + 'src/third_party/android_deps/libs/io_grpc_grpc_api': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/net_sf_kxml_kxml2', - 'version': 'version:2@2.3.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_api', + 'version': 'version:2@1.49.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_ant_ant': { + + 'src/third_party/android_deps/libs/io_grpc_grpc_binder': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_ant_ant', - 'version': 'version:2@1.8.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_binder', + 'version': 'version:2@1.49.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_ant_ant_launcher': { + + 'src/third_party/android_deps/libs/io_grpc_grpc_context': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_ant_ant_launcher', - 'version': 'version:2@1.8.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_context', + 'version': 'version:2@1.49.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_ant_tasks': { + + 'src/third_party/android_deps/libs/io_grpc_grpc_core': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_ant_tasks', - 'version': 'version:2@2.1.3.cr0', + 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_core', + 'version': 'version:2@1.49.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_artifact': { + + 'src/third_party/android_deps/libs/io_grpc_grpc_protobuf_lite': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_artifact', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_protobuf_lite', + 'version': 'version:2@1.49.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_artifact_manager': { + + 'src/third_party/android_deps/libs/io_grpc_grpc_stub': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_artifact_manager', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_stub', + 'version': 'version:2@1.49.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_error_diagnostics': { + + 'src/third_party/android_deps/libs/io_perfmark_perfmark_api': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_error_diagnostics', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/io_perfmark_perfmark_api', + 'version': 'version:2@0.25.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_model': { + + 'src/third_party/android_deps/libs/javax_annotation_javax_annotation_api': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_model', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/javax_annotation_javax_annotation_api', + 'version': 'version:2@1.3.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_plugin_registry': { + + 'src/third_party/android_deps/libs/javax_annotation_jsr250_api': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_plugin_registry', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/javax_annotation_jsr250_api', + 'version': 'version:2@1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_profile': { + + 'src/third_party/android_deps/libs/javax_inject_javax_inject': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_profile', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/javax_inject_javax_inject', + 'version': 'version:2@1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_project': { + + 'src/third_party/android_deps/libs/net_bytebuddy_byte_buddy': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_project', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy', + 'version': 'version:2@1.14.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_repository_metadata': { + + 'src/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_repository_metadata', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent', + 'version': 'version:2@1.14.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_maven_settings': { + + 'src/third_party/android_deps/libs/net_ltgt_gradle_incap_incap': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_settings', - 'version': 'version:2@2.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/net_ltgt_gradle_incap_incap', + 'version': 'version:2@0.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_file': { + + 'src/third_party/android_deps/libs/org_bouncycastle_bcprov_jdk18on': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_file', - 'version': 'version:2@1.0-beta-6.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_bouncycastle_bcprov_jdk18on', + 'version': 'version:2@1.72.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_lightweight': { + + 'src/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_lightweight', - 'version': 'version:2@1.0-beta-6.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup', + 'version': 'version:2@1.2.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_shared': { + + 'src/third_party/android_deps/libs/org_checkerframework_checker_compat_qual': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_shared', - 'version': 'version:2@1.0-beta-6.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_compat_qual', + 'version': 'version:2@2.5.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_provider_api': { + + 'src/third_party/android_deps/libs/org_checkerframework_checker_qual': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_provider_api', - 'version': 'version:2@1.0-beta-6.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_qual', + 'version': 'version:2@3.25.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup': { + + 'src/third_party/android_deps/libs/org_checkerframework_checker_util': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup', - 'version': 'version:2@1.2.1.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_util', + 'version': 'version:2@3.25.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_checkerframework_checker_compat_qual': { + + 'src/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_compat_qual', - 'version': 'version:2@2.5.5.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone', + 'version': 'version:2@3.15.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_checkerframework_checker_qual': { + + 'src/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_qual', - 'version': 'version:2@3.12.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations', + 'version': 'version:2@1.21.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone': { + + 'src/third_party/android_deps/libs/org_conscrypt_conscrypt_openjdk_uber': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone', - 'version': 'version:2@3.15.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_conscrypt_conscrypt_openjdk_uber', + 'version': 'version:2@2.5.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations': { + + 'src/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations', - 'version': 'version:2@1.17.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit', + 'version': 'version:2@4.4.1.201607150455-r.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_container_default': { + + 'src/third_party/android_deps/libs/org_hamcrest_hamcrest': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_container_default', - 'version': 'version:2@1.0-alpha-9-stable-1.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_hamcrest_hamcrest', + 'version': 'version:2@2.2.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_interpolation': { + + 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_interpolation', - 'version': 'version:2@1.11.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7', + 'version': 'version:2@1.8.20.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_utils': { + + 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_utils', - 'version': 'version:2@1.5.15.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8', + 'version': 'version:2@1.8.20.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit': { + + 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit', - 'version': 'version:2@4.4.1.201607150455-r.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android', + 'version': 'version:2@1.6.4.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_jetbrains_annotations': { + + 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_annotations', - 'version': 'version:2@13.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm', + 'version': 'version:2@1.6.4.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib': { + + 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_guava': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib', - 'version': 'version:2@1.6.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_guava', + 'version': 'version:2@1.6.4.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_common': { + + 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_common', - 'version': 'version:2@1.6.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm', + 'version': 'version:2@0.1.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7': { + + 'src/third_party/android_deps/libs/org_jsoup_jsoup': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7', - 'version': 'version:2@1.5.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_jsoup_jsoup', + 'version': 'version:2@1.15.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8': { + + 'src/third_party/android_deps/libs/org_mockito_mockito_android': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8', - 'version': 'version:2@1.5.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_android', + 'version': 'version:2@5.4.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android': { + + 'src/third_party/android_deps/libs/org_mockito_mockito_core': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android', - 'version': 'version:2@1.5.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_core', + 'version': 'version:2@5.4.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm': { + + 'src/third_party/android_deps/libs/org_mockito_mockito_subclass': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm', - 'version': 'version:2@1.5.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_subclass', + 'version': 'version:2@5.4.0.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, - 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm': { + + 'src/third_party/android_deps/libs/org_objenesis_objenesis': { 'packages': [ { - 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm', - 'version': 'version:2@0.1.0.cr0', + 'package': 'chromium/third_party/android_deps/libs/org_objenesis_objenesis', + 'version': 'version:2@3.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_ow2_asm_asm': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm', - 'version': 'version:2@7.0.cr0', + 'version': 'version:2@9.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_ow2_asm_asm_analysis': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_analysis', - 'version': 'version:2@7.0.cr0', + 'version': 'version:2@9.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_ow2_asm_asm_commons': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_commons', - 'version': 'version:2@7.0.cr0', + 'version': 'version:2@9.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_ow2_asm_asm_tree': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_tree', - 'version': 'version:2@7.0.cr0', + 'version': 'version:2@9.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_ow2_asm_asm_util': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_util', - 'version': 'version:2@7.0.cr0', + 'version': 'version:2@9.5.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_pcollections_pcollections': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_pcollections_pcollections', - 'version': 'version:2@2.1.2.cr0', + 'version': 'version:2@3.1.4.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_annotations': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_annotations', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_junit': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_junit', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, + + 'src/third_party/android_deps/libs/org_robolectric_nativeruntime': { + 'packages': [ + { + 'package': 'chromium/third_party/android_deps/libs/org_robolectric_nativeruntime', + 'version': 'version:2@4.10.3.cr1', + }, + ], + 'condition': 'checkout_android', + 'dep_type': 'cipd', + }, + + 'src/third_party/android_deps/libs/org_robolectric_nativeruntime_dist_compat': { + 'packages': [ + { + 'package': 'chromium/third_party/android_deps/libs/org_robolectric_nativeruntime_dist_compat', + 'version': 'version:2@1.0.1.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_pluginapi': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_pluginapi', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_plugins_maven_dependency_resolver': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_plugins_maven_dependency_resolver', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_resources': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_resources', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_robolectric': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_robolectric', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_sandbox': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_sandbox', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_shadowapi': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadowapi', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_shadows_framework': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadows_framework', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_shadows_playservices': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadows_playservices', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_utils': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_utils', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', 'dep_type': 'cipd', }, + 'src/third_party/android_deps/libs/org_robolectric_utils_reflector': { 'packages': [ { 'package': 'chromium/third_party/android_deps/libs/org_robolectric_utils_reflector', - 'version': 'version:2@4.3.1.cr0', + 'version': 'version:2@4.10.3.cr1', }, ], 'condition': 'checkout_android', @@ -2197,30 +2391,75 @@ hooks = [ 'condition': 'checkout_mac', }, { - 'name': 'msan_chained_origins', + 'name': 'msan_chained_origins_focal', + 'pattern': '.', + 'condition': 'checkout_instrumented_libraries', + 'action': [ 'python3', + 'src/third_party/depot_tools/download_from_google_storage.py', + '--no_resume', + '--no_auth', + '--bucket', 'chromium-instrumented-libraries', + '-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1', + ], + }, + { + 'name': 'msan_no_origins_focal', + 'pattern': '.', + 'condition': 'checkout_instrumented_libraries', + 'action': [ 'python3', + 'src/third_party/depot_tools/download_from_google_storage.py', + '--no_resume', + '--no_auth', + '--bucket', 'chromium-instrumented-libraries', + '-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1', + ], + }, + { + 'name': 'msan_chained_origins_focal', 'pattern': '.', 'condition': 'checkout_instrumented_libraries', 'action': [ 'python3', 'src/third_party/depot_tools/download_from_google_storage.py', - "--no_resume", - "--no_auth", - "--bucket", "chromium-instrumented-libraries", - "-s", "src/third_party/instrumented_libraries/binaries/msan-chained-origins.tgz.sha1", + '--no_resume', + '--no_auth', + '--bucket', 'chromium-instrumented-libraries', + '-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1', ], }, { - 'name': 'msan_no_origins', + 'name': 'msan_no_origins_focal', 'pattern': '.', 'condition': 'checkout_instrumented_libraries', 'action': [ 'python3', 'src/third_party/depot_tools/download_from_google_storage.py', - "--no_resume", - "--no_auth", - "--bucket", "chromium-instrumented-libraries", - "-s", "src/third_party/instrumented_libraries/binaries/msan-no-origins.tgz.sha1", + '--no_resume', + '--no_auth', + '--bucket', 'chromium-instrumented-libraries', + '-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1', ], }, { + 'name': 'Download Fuchsia SDK from GCS', + 'pattern': '.', + 'condition': 'checkout_fuchsia', + 'action': [ + 'python3', + 'src/build/fuchsia/update_sdk.py', + '--cipd-prefix={fuchsia_sdk_cipd_prefix}', + '--version={fuchsia_version}', + ], + }, + { + 'name': 'Download Fuchsia system images', + 'pattern': '.', + 'condition': 'checkout_fuchsia and checkout_fuchsia_product_bundles', + 'action': [ + 'python3', + 'src/build/fuchsia/update_product_bundles.py', + '{checkout_fuchsia_boot_images}', + ], + }, + { # Pull clang if needed or requested via GYP_DEFINES. # Note: On Win, this should run after win_toolchain, as it may use it. 'name': 'clang', @@ -2238,7 +2477,9 @@ hooks = [ { 'name': 'clang_format_win', 'pattern': '.', - 'action': [ 'download_from_google_storage', + 'condition': 'host_os == "win"', + 'action': [ 'python3', + 'src/third_party/depot_tools/download_from_google_storage.py', '--no_resume', '--platform=win32', '--no_auth', @@ -2247,21 +2488,38 @@ hooks = [ ], }, { - 'name': 'clang_format_mac', + 'name': 'clang_format_mac_x64', 'pattern': '.', - 'action': [ 'download_from_google_storage', + 'condition': 'host_os == "mac" and host_cpu == "x64"', + 'action': [ 'python3', + 'src/third_party/depot_tools/download_from_google_storage.py', '--no_resume', '--platform=darwin', '--no_auth', '--bucket', 'chromium-clang-format', - '-s', 'src/buildtools/mac/clang-format.sha1', + '-s', 'src/buildtools/mac/clang-format.x64.sha1', + '-o', 'src/buildtools/mac/clang-format', ], }, { + 'name': 'clang_format_mac_arm64', + 'pattern': '.', + 'condition': 'host_os == "mac" and host_cpu == "arm64"', + 'action': [ 'python3', + 'src/third_party/depot_tools/download_from_google_storage.py', + '--no_resume', + '--no_auth', + '--bucket', 'chromium-clang-format', + '-s', 'src/buildtools/mac/clang-format.arm64.sha1', + '-o', 'src/buildtools/mac/clang-format', + ], + }, + { 'name': 'clang_format_linux', 'pattern': '.', 'condition': 'host_os == "linux"', - 'action': [ 'download_from_google_storage', + 'action': [ 'python3', + 'src/third_party/depot_tools/download_from_google_storage.py', '--no_resume', '--platform=linux*', '--no_auth', @@ -2304,18 +2562,6 @@ hooks = [ ], }, { - # We used to use src as a CIPD root. We moved it to a different directory - # in crrev.com/c/930178 but left the clobber here to ensure that that CL - # could be reverted safely. This can be safely removed once crbug.com/794764 - # is resolved. - 'name': 'Android Clobber Deprecated CIPD Root', - 'pattern': '.', - 'condition': 'checkout_android', - 'action': ['src/build/cipd/clobber_cipd_root.py', - '--root', 'src', - ], - }, - { 'name': 'Generate component metadata for tests', 'pattern': '.', 'action': [ diff --git a/files/DIR_METADATA b/DIR_METADATA index 8bc04f15..8bc04f15 100644 --- a/files/DIR_METADATA +++ b/DIR_METADATA @@ -1,4 +1,4 @@ -Copyright (c) 2011, Google Inc. All rights reserved. +Copyright 2011 The LibYuv Project Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1,14 +1,19 @@ -name: "libyuv" -description: - "libyuv is an open source project that includes YUV scaling and conversion " - "functionality." +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update libyuv +# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md +name: "libyuv" +description: "libyuv is an open source project that includes YUV scaling and conversion functionality." third_party { - url { - type: GIT + license_type: NOTICE + last_upgrade_date { + year: 2024 + month: 1 + day: 11 + } + identifier { + type: "Git" value: "https://chromium.googlesource.com/libyuv/libyuv/" + version: "af6ac8265bbd07bcf977526458b60305c4304288" } - version: "d53f1beecdd8d959f7a3f2e19bd0bd7e7227a233" - last_upgrade_date { year: 2022 month: 8 day: 5 } - license_type: NOTICE } @@ -1,4 +1,11 @@ -fbarchard@google.com -phoglund@google.com -magjed@google.com -chz@google.com +mbonadei@chromium.org +fbarchard@chromium.org +magjed@chromium.org +wtc@google.com +jansson@google.com + +per-file *.gn=mbonadei@chromium.org,jansson@google.com +per-file .gitignore=* +per-file AUTHORS=* +per-file DEPS=* +per-file PRESUBMIT.py=mbonadei@chromium.org,jansson@google.com diff --git a/OWNERS.android b/OWNERS.android new file mode 100644 index 00000000..7529cb92 --- /dev/null +++ b/OWNERS.android @@ -0,0 +1 @@ +include platform/system/core:/janitors/OWNERS diff --git a/files/PRESUBMIT.py b/PRESUBMIT.py index d3901caf..d3901caf 100644 --- a/files/PRESUBMIT.py +++ b/PRESUBMIT.py diff --git a/files/README.chromium b/README.chromium index 3f68e21e..1389f285 100644 --- a/files/README.chromium +++ b/README.chromium @@ -1,8 +1,9 @@ Name: libyuv -URL: http://code.google.com/p/libyuv/ -Version: 1837 +URL: https://chromium.googlesource.com/libyuv/libyuv/ +Version: 1883 License: BSD License File: LICENSE +Shipped: yes Description: libyuv is an open source project that includes YUV conversion and scaling functionality. diff --git a/files/README.md b/README.md index db70b7f0..95eeb04c 100644 --- a/files/README.md +++ b/README.md @@ -7,6 +7,7 @@ * Optimized for SSSE3/AVX2 on x86/x64. * Optimized for Neon on Arm. * Optimized for MSA on Mips. +* Optimized for RVV on RISC-V. ### Development diff --git a/README.version b/README.version deleted file mode 100644 index 5deb188e..00000000 --- a/README.version +++ /dev/null @@ -1,8 +0,0 @@ -Version: r1837 -BugComponent: 42195 -Owner: lajos -Local Modifications: - * Remove files/Android.mk (it messes with the android build system). - * Remove OWNERS files within files/ and all the subdirectories (except for - files/fuzz). Having these files breaks repo presubmit hooks since they - contain non @google.com email addresses. diff --git a/UPDATING b/UPDATING deleted file mode 100644 index 2679284c..00000000 --- a/UPDATING +++ /dev/null @@ -1,36 +0,0 @@ -To sync the libyuv checkout to an upstream revision, do the following: - -These commands are known to work from the external/libyuv directory of the -Android tree's checkout. - -Step 1: Remove the files/ subdirectory. - -$ rm -rf files - -Step 2: Clone the libyuv repository from upstream. - -$ git clone https://chromium.googlesource.com/libyuv/libyuv files - -Step 3 (optional): Checkout a specific commit/tag. - -$ cd files -$ git checkout <commit_or_tag> -$ cd .. - -Step 4: Remove files that aren't necessary (Android.mk, .git and OWNERS). - -$ rm files/Android.mk -$ rm -rf files/.git -$ find files/ -name "OWNERS" | xargs rm - -Step 5: Update the version and last_upgrade_date fields in the METADATA file. - -Step 6: Update README.version with the version (can be found in - files/include/libyuv/version.h) - -Step 7: If any local modifications are being done, update README.version and - this file with updated instructions. - -Step 8: Ensure that libyuv builds and camera and media related CTS tests are - passing. If there are any linker errors about missing symbols, try - updating frameworks/av/media/libstagefright/export.lds. diff --git a/files/build_overrides/build.gni b/build_overrides/build.gni index c8490313..d9d01d51 100644 --- a/files/build_overrides/build.gni +++ b/build_overrides/build.gni @@ -13,6 +13,9 @@ build_with_chromium = false # Some non-Chromium builds don't support building java targets. enable_java_templates = true +# Enables assertions on safety checks in libc++. +enable_safe_libcxx = true + # Allow using custom suppressions files (currently not used by libyuv). asan_suppressions_file = "//build/sanitizers/asan_suppressions.cc" lsan_suppressions_file = "//build/sanitizers/lsan_suppressions.cc" diff --git a/files/build_overrides/gtest.gni b/build_overrides/gtest.gni index d3c3f68c..d3c3f68c 100644 --- a/files/build_overrides/gtest.gni +++ b/build_overrides/gtest.gni diff --git a/build_overrides/partition_alloc.gni b/build_overrides/partition_alloc.gni new file mode 100644 index 00000000..dcf8ac2d --- /dev/null +++ b/build_overrides/partition_alloc.gni @@ -0,0 +1,17 @@ +# Copyright 2022 The LibYuv Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +# Use default values for PartitionAlloc as standalone library from +# base/allocator/partition_allocator/build_overrides/partition_alloc.gni +use_partition_alloc_as_malloc_default = false +use_allocator_shim_default = false +enable_backup_ref_ptr_support_default = false +enable_mte_checked_ptr_support_default = false +put_ref_count_in_previous_slot_default = false +enable_backup_ref_ptr_slow_checks_default = false +enable_dangling_raw_ptr_checks_default = false diff --git a/files/cleanup_links.py b/cleanup_links.py index 7d1eba9b..7d1eba9b 100755 --- a/files/cleanup_links.py +++ b/cleanup_links.py diff --git a/codereview.settings b/codereview.settings index 9782886f..b226fae5 100644 --- a/codereview.settings +++ b/codereview.settings @@ -1,5 +1,5 @@ -# This file is used by git cl to get repository specific information. +# This file is used by `git cl` to get repository specific information. +CODE_REVIEW_SERVER: codereview.chromium.org GERRIT_HOST: True PROJECT: libyuv -TRY_ON_UPLOAD: False VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/ diff --git a/files/docs/deprecated_builds.md b/docs/deprecated_builds.md index ba42966c..8edefd78 100644 --- a/files/docs/deprecated_builds.md +++ b/docs/deprecated_builds.md @@ -165,11 +165,11 @@ mipsel arm32 disassembly: - third_party/android_ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o + llvm-objdump -d out/Release/obj/source/libyuv.row_neon.o arm64 disassembly: - third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o + llvm-objdump -d out/Release/obj/source/libyuv.row_neon64.o Running tests: diff --git a/files/docs/environment_variables.md b/docs/environment_variables.md index dd5d59fb..4eb09659 100644 --- a/files/docs/environment_variables.md +++ b/docs/environment_variables.md @@ -40,6 +40,9 @@ By default the cpu is detected and the most advanced form of SIMD is used. But LIBYUV_DISABLE_LSX LIBYUV_DISABLE_LASX +## RISCV CPUs + LIBYUV_DISABLE_RVV + # Test Width/Height/Repeat The unittests default to a small image (128x72) to run fast. This can be set by environment variable to test a specific resolutions. diff --git a/files/docs/filtering.md b/docs/filtering.md index 8696976e..8696976e 100644 --- a/files/docs/filtering.md +++ b/docs/filtering.md diff --git a/files/docs/formats.md b/docs/formats.md index 12ea9465..12ea9465 100644 --- a/files/docs/formats.md +++ b/docs/formats.md diff --git a/files/docs/getting_started.md b/docs/getting_started.md index 15b19ab2..f2f71b8b 100644 --- a/files/docs/getting_started.md +++ b/docs/getting_started.md @@ -139,11 +139,11 @@ mips arm disassembly: - third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt + llvm-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt - third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt + llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt - third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt + llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt Caveat: Disassembly may require optimize_max be disabled in BUILD.gn @@ -220,6 +220,47 @@ Install cmake: http://www.cmake.org/ make -j4 make package +## Building RISC-V target with cmake + +### Prerequisite: build risc-v clang toolchain and qemu + +If you don't have prebuilt clang and riscv64 qemu, run the script to download source and build them. + + ./riscv_script/prepare_toolchain_qemu.sh + +After running script, clang & qemu are built in `build-toolchain-qemu/riscv-clang/` & `build-toolchain-qemu/riscv-qemu/`. + +### Cross-compile for RISC-V target + cmake -B out/Release/ -DUNIT_TEST=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE="./riscv_script/riscv-clang.cmake" \ + -DTOOLCHAIN_PATH={TOOLCHAIN_PATH} \ + -DUSE_RVV=ON . + cmake --build out/Release/ + +#### Customized Compiler Flags + +Customized compiler flags are supported by `-DRISCV_COMPILER_FLAGS="xxx"`. +If `-DRISCV_COMPILER_FLAGS="xxx"` is manually assigned, other compile flags(e.g disable -march=xxx) will not be appended. + +Example: + + cmake -B out/Release/ -DUNIT_TEST=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE="./riscv_script/riscv-clang.cmake" \ + -DRISCV_COMPILER_FLAGS="-mcpu=sifive-x280" \ + . + +### Run on QEMU + +#### Run libyuv_unittest on QEMU + cd out/Release/ + USE_RVV=ON \ + TOOLCHAIN_PATH={TOOLCHAIN_PATH} \ + QEMU_PREFIX_PATH={QEMU_PREFIX_PATH} \ + ../../riscv_script/run_qemu.sh libyuv_unittest + + ## Setup for Arm Cross compile See also https://www.ccoderun.ca/programming/2015-12-20_CrossCompiling/index.html diff --git a/files/docs/rotation.md b/docs/rotation.md index a08430fd..a08430fd 100644 --- a/files/docs/rotation.md +++ b/docs/rotation.md diff --git a/files/download_vs_toolchain.py b/download_vs_toolchain.py index 6bc086d6..6bc086d6 100644 --- a/files/download_vs_toolchain.py +++ b/download_vs_toolchain.py diff --git a/files/Android.bp b/files/Android.bp deleted file mode 100644 index 36156287..00000000 --- a/files/Android.bp +++ /dev/null @@ -1,196 +0,0 @@ -package { - default_applicable_licenses: ["external_libyuv_files_license"], -} - -// Added automatically by a large-scale-change -// -// large-scale-change included anything that looked like it might be a license -// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc. -// -// Please consider removing redundant or irrelevant files from 'license_text:'. -// See: http://go/android-license-faq -license { - name: "external_libyuv_files_license", - visibility: [":__subpackages__"], - license_kinds: [ - "SPDX-license-identifier-BSD", - ], - license_text: [ - "LICENSE", - "PATENTS", - ], -} - -cc_library { - name: "libyuv", - vendor_available: true, - product_available: true, - host_supported: true, - vndk: { - enabled: true, - }, - - srcs: [ - "source/compare.cc", - "source/compare_common.cc", - "source/compare_gcc.cc", - "source/compare_msa.cc", - "source/compare_neon.cc", - "source/compare_neon64.cc", - "source/convert.cc", - "source/convert_argb.cc", - "source/convert_from.cc", - "source/convert_from_argb.cc", - "source/convert_jpeg.cc", - "source/convert_to_argb.cc", - "source/convert_to_i420.cc", - "source/cpu_id.cc", - "source/mjpeg_decoder.cc", - "source/mjpeg_validate.cc", - "source/planar_functions.cc", - "source/rotate.cc", - "source/rotate_any.cc", - "source/rotate_argb.cc", - "source/rotate_common.cc", - "source/rotate_gcc.cc", - "source/rotate_msa.cc", - "source/rotate_neon.cc", - "source/rotate_neon64.cc", - "source/row_any.cc", - "source/row_common.cc", - "source/row_gcc.cc", - "source/row_msa.cc", - "source/row_neon.cc", - "source/row_neon64.cc", - "source/scale.cc", - "source/scale_any.cc", - "source/scale_argb.cc", - "source/scale_common.cc", - "source/scale_gcc.cc", - "source/scale_msa.cc", - "source/scale_neon.cc", - "source/scale_neon64.cc", - "source/scale_rgb.cc", - "source/scale_uv.cc", - "source/video_common.cc", - ], - - cflags: [ - "-Wall", - "-Werror", - "-Wno-unused-parameter", - "-fexceptions", - "-DHAVE_JPEG", - ], - - arch: { - arm: { - cflags: ["-mfpu=neon"], - }, - }, - - shared_libs: ["libjpeg"], - - export_include_dirs: ["include"], - - apex_available: [ - "//apex_available:platform", - "com.android.media.swcodec", - ], - min_sdk_version: "29", -} - -// compatibilty static library until all uses of libyuv_static are replaced -// with libyuv (b/37646797) -cc_library_static { - name: "libyuv_static", - vendor_available: true, - whole_static_libs: ["libyuv"], - apex_available: [ - "//apex_available:platform", - "com.android.media.swcodec", - ], - min_sdk_version: "29", -} - -cc_test { - name: "libyuv_unittest", - static_libs: ["libyuv"], - shared_libs: ["libjpeg"], - cflags: ["-Wall", "-Werror"], - srcs: [ - "unit_test/basictypes_test.cc", - "unit_test/color_test.cc", - "unit_test/compare_test.cc", - "unit_test/convert_test.cc", - "unit_test/cpu_test.cc", - "unit_test/cpu_thread_test.cc", - "unit_test/math_test.cc", - "unit_test/planar_test.cc", - "unit_test/rotate_argb_test.cc", - "unit_test/rotate_test.cc", - "unit_test/scale_argb_test.cc", - "unit_test/scale_rgb_test.cc", - "unit_test/scale_test.cc", - "unit_test/scale_uv_test.cc", - "unit_test/unit_test.cc", - "unit_test/video_common_test.cc", - ], -} - -cc_test { - name: "compare", - gtest: false, - srcs: [ - "util/compare.cc", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "cpuid", - gtest: false, - srcs: [ - "util/cpuid.c", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "i444tonv12_eg", - gtest: false, - srcs: [ - "util/i444tonv12_eg.cc", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "psnr", - gtest: false, - srcs: [ - "util/psnr_main.cc", - "util/psnr.cc", - "util/ssim.cc", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "yuvconstants", - gtest: false, - srcs: [ - "util/yuvconstants.c", - ], - static_libs: ["libyuv"], -} - -cc_test { - name: "yuvconvert", - gtest: false, - srcs: [ - "util/yuvconvert.cc", - ], - static_libs: ["libyuv"], - shared_libs: ["libjpeg"], -} diff --git a/files/LICENSE b/files/LICENSE deleted file mode 100644 index c911747a..00000000 --- a/files/LICENSE +++ /dev/null @@ -1,29 +0,0 @@ -Copyright 2011 The LibYuv Project Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - * Neither the name of Google nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/files/codereview.settings b/files/codereview.settings deleted file mode 100644 index b226fae5..00000000 --- a/files/codereview.settings +++ /dev/null @@ -1,5 +0,0 @@ -# This file is used by `git cl` to get repository specific information. -CODE_REVIEW_SERVER: codereview.chromium.org -GERRIT_HOST: True -PROJECT: libyuv -VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/ diff --git a/files/public.mk b/files/public.mk deleted file mode 100644 index 1342307a..00000000 --- a/files/public.mk +++ /dev/null @@ -1,13 +0,0 @@ -# This file contains all the common make variables which are useful for -# anyone depending on this library. -# Note that dependencies on NDK are not directly listed since NDK auto adds -# them. - -LIBYUV_INCLUDES := $(LIBYUV_PATH)/include - -LIBYUV_C_FLAGS := - -LIBYUV_CPP_FLAGS := - -LIBYUV_LDLIBS := -LIBYUV_DEP_MODULES := diff --git a/files/source/compare_mmi.cc b/files/source/compare_mmi.cc deleted file mode 100644 index 7640d946..00000000 --- a/files/source/compare_mmi.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#include "libyuv/compare_row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Mips MMI. -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -// Hakmem method for hamming distance. -uint32_t HammingDistance_MMI(const uint8_t* src_a, - const uint8_t* src_b, - int count) { - uint32_t diff = 0u; - - uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0; - uint64_t c1 = 0x5555555555555555; - uint64_t c2 = 0x3333333333333333; - uint64_t c3 = 0x0f0f0f0f0f0f0f0f; - uint32_t c4 = 0x01010101; - uint64_t s1 = 1, s2 = 2, s3 = 4; - __asm__ volatile( - "1: \n\t" - "ldc1 %[ta], 0(%[src_a]) \n\t" - "ldc1 %[tb], 0(%[src_b]) \n\t" - "xor %[temp], %[ta], %[tb] \n\t" - "psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1 - "and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1 - "psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1 - "and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2) - "psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2 - "and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2 - "paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t - "psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4 - "paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4) - "and %[temp1], %[temp1], %[c3] \n\t" //&c3 - "dmfc1 $t0, %[temp1] \n\t" - "dsrl32 $t0, $t0, 0 \n\t " - "mul $t0, $t0, %[c4] \n\t" - "dsrl $t0, $t0, 24 \n\t" - "dadd %[diff], %[diff], $t0 \n\t" - "dmfc1 $t0, %[temp1] \n\t" - "mul $t0, $t0, %[c4] \n\t" - "dsrl $t0, $t0, 24 \n\t" - "dadd %[diff], %[diff], $t0 \n\t" - "daddiu %[src_a], %[src_a], 8 \n\t" - "daddiu %[src_b], %[src_b], 8 \n\t" - "addiu %[count], %[count], -8 \n\t" - "bgtz %[count], 1b \n\t" - "nop \n\t" - : [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b), - [count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp), - [temp1] "+f"(temp1) - : [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1), - [s2] "f"(s2), [s3] "f"(s3) - : "memory"); - return diff; -} - -uint32_t SumSquareError_MMI(const uint8_t* src_a, - const uint8_t* src_b, - int count) { - uint32_t sse = 0u; - uint32_t sse_hi = 0u, sse_lo = 0u; - - uint64_t src1, src2; - uint64_t diff, diff_hi, diff_lo; - uint64_t sse_sum, sse_tmp; - - const uint64_t mask = 0x0ULL; - - __asm__ volatile( - "xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t" - - "1: \n\t" - "ldc1 %[src1], 0x00(%[src_a]) \n\t" - "ldc1 %[src2], 0x00(%[src_b]) \n\t" - "pasubub %[diff], %[src1], %[src2] \n\t" - "punpcklbh %[diff_lo], %[diff], %[mask] \n\t" - "punpckhbh %[diff_hi], %[diff], %[mask] \n\t" - "pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t" - "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t" - "pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t" - "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t" - - "daddiu %[src_a], %[src_a], 0x08 \n\t" - "daddiu %[src_b], %[src_b], 0x08 \n\t" - "daddiu %[count], %[count], -0x08 \n\t" - "bnez %[count], 1b \n\t" - - "mfc1 %[sse_lo], %[sse_sum] \n\t" - "mfhc1 %[sse_hi], %[sse_sum] \n\t" - "daddu %[sse], %[sse_hi], %[sse_lo] \n\t" - : [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1), - [src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi), - [sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp), - [sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo) - : [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count), - [mask] "f"(mask) - : "memory"); - - return sse; -} - -#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/files/source/rotate_common.cc b/files/source/rotate_common.cc deleted file mode 100644 index ff212ade..00000000 --- a/files/source/rotate_common.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/rotate_row.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -void TransposeWx8_C(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride, - int width) { - int i; - for (i = 0; i < width; ++i) { - dst[0] = src[0 * src_stride]; - dst[1] = src[1 * src_stride]; - dst[2] = src[2 * src_stride]; - dst[3] = src[3 * src_stride]; - dst[4] = src[4 * src_stride]; - dst[5] = src[5 * src_stride]; - dst[6] = src[6 * src_stride]; - dst[7] = src[7 * src_stride]; - ++src; - dst += dst_stride; - } -} - -void TransposeUVWx8_C(const uint8_t* src, - int src_stride, - uint8_t* dst_a, - int dst_stride_a, - uint8_t* dst_b, - int dst_stride_b, - int width) { - int i; - for (i = 0; i < width; ++i) { - dst_a[0] = src[0 * src_stride + 0]; - dst_b[0] = src[0 * src_stride + 1]; - dst_a[1] = src[1 * src_stride + 0]; - dst_b[1] = src[1 * src_stride + 1]; - dst_a[2] = src[2 * src_stride + 0]; - dst_b[2] = src[2 * src_stride + 1]; - dst_a[3] = src[3 * src_stride + 0]; - dst_b[3] = src[3 * src_stride + 1]; - dst_a[4] = src[4 * src_stride + 0]; - dst_b[4] = src[4 * src_stride + 1]; - dst_a[5] = src[5 * src_stride + 0]; - dst_b[5] = src[5 * src_stride + 1]; - dst_a[6] = src[6 * src_stride + 0]; - dst_b[6] = src[6 * src_stride + 1]; - dst_a[7] = src[7 * src_stride + 0]; - dst_b[7] = src[7 * src_stride + 1]; - src += 2; - dst_a += dst_stride_a; - dst_b += dst_stride_b; - } -} - -void TransposeWxH_C(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride, - int width, - int height) { - int i; - for (i = 0; i < width; ++i) { - int j; - for (j = 0; j < height; ++j) { - dst[i * dst_stride + j] = src[j * src_stride + i]; - } - } -} - -void TransposeUVWxH_C(const uint8_t* src, - int src_stride, - uint8_t* dst_a, - int dst_stride_a, - uint8_t* dst_b, - int dst_stride_b, - int width, - int height) { - int i; - for (i = 0; i < width * 2; i += 2) { - int j; - for (j = 0; j < height; ++j) { - dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)]; - dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1]; - } - } -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/files/source/rotate_mmi.cc b/files/source/rotate_mmi.cc deleted file mode 100644 index f8de6083..00000000 --- a/files/source/rotate_mmi.cc +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/rotate_row.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Mips MMI. -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -void TransposeWx8_MMI(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride, - int width) { - uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; - uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13; - uint8_t* src_tmp = nullptr; - - __asm__ volatile( - "1: \n\t" - "ldc1 %[tmp12], 0x00(%[src]) \n\t" - "dadd %[src_tmp], %[src], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp0 = (00 10 01 11 02 12 03 13) */ - "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" - /* tmp1 = (04 14 05 15 06 16 07 17) */ - "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp2 = (20 30 21 31 22 32 23 33) */ - "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" - /* tmp3 = (24 34 25 35 26 36 27 37) */ - "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" - - /* tmp4 = (00 10 20 30 01 11 21 31) */ - "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t" - /* tmp5 = (02 12 22 32 03 13 23 33) */ - "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t" - /* tmp6 = (04 14 24 34 05 15 25 35) */ - "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t" - /* tmp7 = (06 16 26 36 07 17 27 37) */ - "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp0 = (40 50 41 51 42 52 43 53) */ - "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" - /* tmp1 = (44 54 45 55 46 56 47 57) */ - "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp2 = (60 70 61 71 62 72 63 73) */ - "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" - /* tmp3 = (64 74 65 75 66 76 67 77) */ - "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" - - /* tmp8 = (40 50 60 70 41 51 61 71) */ - "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t" - /* tmp9 = (42 52 62 72 43 53 63 73) */ - "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t" - /* tmp10 = (44 54 64 74 45 55 65 75) */ - "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t" - /* tmp11 = (46 56 66 76 47 57 67 77) */ - "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t" - - /* tmp0 = (00 10 20 30 40 50 60 70) */ - "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t" - /* tmp1 = (01 11 21 31 41 51 61 71) */ - "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" - - /* tmp0 = (02 12 22 32 42 52 62 72) */ - "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t" - /* tmp1 = (03 13 23 33 43 53 63 73) */ - "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" - - /* tmp0 = (04 14 24 34 44 54 64 74) */ - "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t" - /* tmp1 = (05 15 25 35 45 55 65 75) */ - "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" - - /* tmp0 = (06 16 26 36 46 56 66 76) */ - "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t" - /* tmp1 = (07 17 27 37 47 57 67 77) */ - "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" - - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "daddi %[src], %[src], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - - : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), - [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5), - [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8), - [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11), - [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst] "+&r"(dst), - [src_tmp] "+&r"(src_tmp) - : [src] "r"(src), [width] "r"(width), [src_stride] "r"(src_stride), - [dst_stride] "r"(dst_stride) - : "memory"); -} - -void TransposeUVWx8_MMI(const uint8_t* src, - int src_stride, - uint8_t* dst_a, - int dst_stride_a, - uint8_t* dst_b, - int dst_stride_b, - int width) { - uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; - uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13; - uint8_t* src_tmp = nullptr; - - __asm__ volatile( - "1: \n\t" - /* tmp12 = (u00 v00 u01 v01 u02 v02 u03 v03) */ - "ldc1 %[tmp12], 0x00(%[src]) \n\t" - "dadd %[src_tmp], %[src], %[src_stride] \n\t" - /* tmp13 = (u10 v10 u11 v11 u12 v12 u13 v13) */ - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp0 = (u00 u10 v00 v10 u01 u11 v01 v11) */ - "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" - /* tmp1 = (u02 u12 v02 v12 u03 u13 v03 v13) */ - "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - /* tmp12 = (u20 v20 u21 v21 u22 v22 u23 v23) */ - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - /* tmp13 = (u30 v30 u31 v31 u32 v32 u33 v33) */ - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp2 = (u20 u30 v20 v30 u21 u31 v21 v31) */ - "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" - /* tmp3 = (u22 u32 v22 v32 u23 u33 v23 v33) */ - "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" - - /* tmp4 = (u00 u10 u20 u30 v00 v10 v20 v30) */ - "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t" - /* tmp5 = (u01 u11 u21 u31 v01 v11 v21 v31) */ - "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t" - /* tmp6 = (u02 u12 u22 u32 v02 v12 v22 v32) */ - "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t" - /* tmp7 = (u03 u13 u23 u33 v03 v13 v23 v33) */ - "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - /* tmp12 = (u40 v40 u41 v41 u42 v42 u43 v43) */ - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - /* tmp13 = (u50 v50 u51 v51 u52 v52 u53 v53) */ - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp0 = (u40 u50 v40 v50 u41 u51 v41 v51) */ - "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" - /* tmp1 = (u42 u52 v42 v52 u43 u53 v43 v53) */ - "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - /* tmp12 = (u60 v60 u61 v61 u62 v62 u63 v63) */ - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - /* tmp13 = (u70 v70 u71 v71 u72 v72 u73 v73) */ - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp2 = (u60 u70 v60 v70 u61 u71 v61 v71) */ - "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" - /* tmp3 = (u62 u72 v62 v72 u63 u73 v63 v73) */ - "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" - - /* tmp8 = (u40 u50 u60 u70 v40 v50 v60 v70) */ - "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t" - /* tmp9 = (u41 u51 u61 u71 v41 v51 v61 v71) */ - "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t" - /* tmp10 = (u42 u52 u62 u72 v42 v52 v62 v72) */ - "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t" - /* tmp11 = (u43 u53 u63 u73 v43 v53 v63 v73) */ - "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t" - - /* tmp0 = (u00 u10 u20 u30 u40 u50 u60 u70) */ - "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t" - /* tmp1 = (v00 v10 v20 v30 v40 v50 v60 v70) */ - "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" - - /* tmp0 = (u01 u11 u21 u31 u41 u51 u61 u71) */ - "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t" - /* tmp1 = (v01 v11 v21 v31 v41 v51 v61 v71) */ - "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t" - "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" - "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" - - /* tmp0 = (u02 u12 u22 u32 u42 u52 u62 u72) */ - "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t" - /* tmp1 = (v02 v12 v22 v32 v42 v52 v62 v72) */ - "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t" - "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" - "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" - - /* tmp0 = (u03 u13 u23 u33 u43 u53 u63 u73) */ - "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t" - /* tmp1 = (v03 v13 v23 v33 v43 v53 v63 v73) */ - "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t" - "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" - "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" - - "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" - "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" - "daddiu %[src], %[src], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), - [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5), - [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8), - [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11), - [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst_a] "+&r"(dst_a), - [dst_b] "+&r"(dst_b), [src_tmp] "+&r"(src_tmp) - : [src] "r"(src), [width] "r"(width), [dst_stride_a] "r"(dst_stride_a), - [dst_stride_b] "r"(dst_stride_b), [src_stride] "r"(src_stride) - : "memory"); -} - -#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/files/source/row_mmi.cc b/files/source/row_mmi.cc deleted file mode 100644 index 362fd1cf..00000000 --- a/files/source/row_mmi.cc +++ /dev/null @@ -1,7842 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "libyuv/row.h" - -#include <string.h> // For memcpy and memset. - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Mips MMI. -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -// clang-format off - -void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width) { - uint64_t src0, src1, dest; - const uint64_t mask = 0xff000000ULL; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" - - "or %[src0], %[src0], %[mask] \n\t" - "or %[src1], %[src1], %[mask] \n\t" - "punpcklwd %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t" - - "or %[src0], %[src0], %[mask] \n\t" - "or %[src1], %[src1], %[mask] \n\t" - "punpcklwd %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width), - [mask] "f"(mask) - : "memory"); -} - -void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) { - uint64_t src0, src1, dest; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0xff000000ULL; - const uint64_t mask2 = 0xc6; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" - - "or %[src0], %[src0], %[mask1] \n\t" - "punpcklbh %[src0], %[src0], %[mask0] \n\t" - "pshufh %[src0], %[src0], %[mask2] \n\t" - "or %[src1], %[src1], %[mask1] \n\t" - "punpcklbh %[src1], %[src1], %[mask0] \n\t" - "pshufh %[src1], %[src1], %[mask2] \n\t" - "packushb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t" - - "or %[src0], %[src0], %[mask1] \n\t" - "punpcklbh %[src0], %[src0], %[mask0] \n\t" - "pshufh %[src0], %[src0], %[mask2] \n\t" - "or %[src1], %[src1], %[mask1] \n\t" - "punpcklbh %[src1], %[src1], %[mask0] \n\t" - "pshufh %[src1], %[src1], %[mask2] \n\t" - "packushb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), - [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width) - : "memory"); -} - -void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { - uint64_t src0, src1; - uint64_t ftmp[4]; - uint64_t mask0 = 0xc6; - uint64_t mask1 = 0x6c; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t" - "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t" - "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t" - - "punpcklbh %[ftmp0], %[src0], %[zero] \n\t" - "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t" - "punpckhbh %[ftmp1], %[src0], %[zero] \n\t" - "punpcklbh %[src1], %[src1], %[zero] \n\t" - "pextrh %[ftmp2], %[ftmp0], %[three] \n\t" - "pextrh %[ftmp3], %[ftmp1], %[one] \n\t" - "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t" - "pextrh %[ftmp3], %[ftmp1], %[two] \n\t" - "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "pshufh %[src1], %[src1], %[mask1] \n\t" - "pextrh %[ftmp2], %[src1], %[zero] \n\t" - "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t" - "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" - "packushb %[src1], %[src1], %[zero] \n\t" - - "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t" - "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t" - "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t" - "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t" - - "daddiu %[src_raw], %[src_raw], 0x0c \n\t" - "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]), - [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]) - : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width), - [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00), - [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03) - : "memory"); -} - -void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565, - uint8_t* dst_argb, - int width) { - uint64_t ftmp[5]; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0007000700070007; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psrlh %[r], %[src1], %[three] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[two] \n\t" - "psrlh %[src1], %[g], %[four] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "packushb %[b], %[b], %[r] \n\t" - "packushb %[g], %[g], %[c1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" - "punpckhhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" - "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t" - "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), - [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]) - : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), - [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), - [four] "f"(0x04) - : "memory"); -} - -void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555, - uint8_t* dst_argb, - int width) { - uint64_t ftmp[6]; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0003000300030003; - uint64_t c3 = 0x007c007c007c007c; - uint64_t c4 = 0x0001000100010001; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "and %[r], %[src1], %[c3] \n\t" - "psrlh %[r], %[r], %[two] \n\t" - "psrlh %[a], %[src1], %[seven] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[three] \n\t" - "psrlh %[src1], %[g], %[two] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "xor %[a], %[a], %[c1] \n\t" - "paddb %[a], %[a], %[c4] \n\t" - "packushb %[b], %[b], %[r] \n\t" - "packushb %[g], %[g], %[a] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" - "punpckhhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" - "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t" - "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), - [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5]) - : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), - [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05), - [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07) - : "memory"); -} - -void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444, - uint8_t* dst_argb, - int width) { - uint64_t ftmp[6]; - uint64_t c0 = 0x000f000f000f000f; - uint64_t c1 = 0x00ff00ff00ff00ff; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g], %[src0], %[four] \n\t" - "and %[r], %[src1], %[c0] \n\t" - "psrlh %[a], %[src1], %[four] \n\t" - "psllh %[src0], %[b], %[four] \n\t" - "or %[b], %[src0], %[b] \n\t" - "psllh %[src0], %[g], %[four] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psllh %[src0], %[r], %[four] \n\t" - "or %[r], %[src0], %[r] \n\t" - "psllh %[src0], %[a], %[four] \n\t" - "or %[a], %[src0], %[a] \n\t" - "packushb %[b], %[b], %[r] \n\t" - "packushb %[g], %[g], %[a] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" - "punpckhhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" - "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t" - "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), - [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5]) - : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08), - [four] "f"(0x04) - : "memory"); -} - -void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - uint64_t src; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t" - "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t" - - "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t" - "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t" - "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t" - - "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t" - "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t" - "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t" - - "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t" - "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t" - "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width) - : "memory"); -} - -void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - uint64_t src0, src1; - uint64_t ftmp[3]; - uint64_t mask0 = 0xc6; - uint64_t mask1 = 0x18; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[ftmp0], %[src0], %[zero] \n\t" - "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t" - "punpckhbh %[ftmp1], %[src0], %[zero] \n\t" - "punpcklbh %[ftmp2], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - - "pextrh %[src0], %[ftmp1], %[two] \n\t" - "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t" - "pshufh %[ftmp1], %[ftmp1], %[one] \n\t" - - "pextrh %[src0], %[ftmp2], %[two] \n\t" - "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t" - "pextrh %[src0], %[ftmp2], %[one] \n\t" - "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t" - "pextrh %[src0], %[ftmp2], %[zero] \n\t" - "pshufh %[src1], %[src1], %[mask1] \n\t" - "pinsrh_0 %[src1], %[src1], %[src0] \n\t" - "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" - "packushb %[src1], %[src1], %[zero] \n\t" - - "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t" - "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t" - "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]), - [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00), - [one] "f"(0x01), [two] "f"(0x02) - : "memory"); -} - -void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - uint64_t src0, src1; - uint64_t ftmp[3]; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[b], %[src0], %[src1] \n\t" - "punpckhbh %[g], %[src0], %[src1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklbh %[b], %[src0], %[zero] \n\t" - "punpckhbh %[g], %[src0], %[zero] \n\t" - "punpcklbh %[r], %[src1], %[zero] \n\t" - - "psrlh %[b], %[b], %[three] \n\t" - "psrlh %[g], %[g], %[two] \n\t" - "psrlh %[r], %[r], %[three] \n\t" - - "psllh %[g], %[g], %[five] \n\t" - "psllh %[r], %[r], %[eleven] \n\t" - "or %[b], %[b], %[g] \n\t" - "or %[b], %[b], %[r] \n\t" - - "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), - [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05), - [eleven] "f"(0x0b) - : "memory"); -} - -// dither4 is a row of 4 values from 4x4 dither matrix. -// The 4x4 matrix contains values to increase RGB. When converting to -// fewer bits (565) this provides an ordered dither. -// The order in the 4x4 matrix in first byte is upper left. -// The 4 values are passed as an int, then referenced as an array, so -// endian will not affect order of the original matrix. But the dither4 -// will containing the first pixel in the lower byte for little endian -// or the upper byte for big endian. -void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - const uint32_t dither4, - int width) { - uint64_t src0, src1; - uint64_t ftmp[3]; - uint64_t c0 = 0x00ff00ff00ff00ff; - - __asm__ volatile( - "punpcklbh %[dither], %[dither], %[zero] \n\t" - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[b], %[src0], %[src1] \n\t" - "punpckhbh %[g], %[src0], %[src1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklbh %[b], %[src0], %[zero] \n\t" - "punpckhbh %[g], %[src0], %[zero] \n\t" - "punpcklbh %[r], %[src1], %[zero] \n\t" - - "paddh %[b], %[b], %[dither] \n\t" - "paddh %[g], %[g], %[dither] \n\t" - "paddh %[r], %[r], %[dither] \n\t" - "pcmpgth %[src0], %[b], %[c0] \n\t" - "or %[src0], %[src0], %[b] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "pcmpgth %[src0], %[g], %[c0] \n\t" - "or %[src0], %[src0], %[g] \n\t" - "and %[g], %[src0], %[c0] \n\t" - "pcmpgth %[src0], %[r], %[c0] \n\t" - "or %[src0], %[src0], %[r] \n\t" - "and %[r], %[src0], %[c0] \n\t" - - "psrlh %[b], %[b], %[three] \n\t" - "psrlh %[g], %[g], %[two] \n\t" - "psrlh %[r], %[r], %[three] \n\t" - - "psllh %[g], %[g], %[five] \n\t" - "psllh %[r], %[r], %[eleven] \n\t" - "or %[b], %[b], %[g] \n\t" - "or %[b], %[b], %[r] \n\t" - - "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), - [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02), - [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b) - : "memory"); -} - -void ARGBToARGB1555Row_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width) { - uint64_t src0, src1; - uint64_t ftmp[4]; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[b], %[src0], %[src1] \n\t" - "punpckhbh %[g], %[src0], %[src1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklbh %[b], %[src0], %[zero] \n\t" - "punpckhbh %[g], %[src0], %[zero] \n\t" - "punpcklbh %[r], %[src1], %[zero] \n\t" - "punpckhbh %[a], %[src1], %[zero] \n\t" - - "psrlh %[b], %[b], %[three] \n\t" - "psrlh %[g], %[g], %[three] \n\t" - "psrlh %[r], %[r], %[three] \n\t" - "psrlh %[a], %[a], %[seven] \n\t" - - "psllh %[g], %[g], %[five] \n\t" - "psllh %[r], %[r], %[ten] \n\t" - "psllh %[a], %[a], %[fifteen] \n\t" - "or %[b], %[b], %[g] \n\t" - "or %[b], %[b], %[r] \n\t" - "or %[b], %[b], %[a] \n\t" - - "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), - [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05), - [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f) - : "memory"); -} - -void ARGBToARGB4444Row_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width) { - uint64_t src0, src1; - uint64_t ftmp[4]; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[b], %[src0], %[src1] \n\t" - "punpckhbh %[g], %[src0], %[src1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklbh %[b], %[src0], %[zero] \n\t" - "punpckhbh %[g], %[src0], %[zero] \n\t" - "punpcklbh %[r], %[src1], %[zero] \n\t" - "punpckhbh %[a], %[src1], %[zero] \n\t" - - "psrlh %[b], %[b], %[four] \n\t" - "psrlh %[g], %[g], %[four] \n\t" - "psrlh %[r], %[r], %[four] \n\t" - "psrlh %[a], %[a], %[four] \n\t" - - "psllh %[g], %[g], %[four] \n\t" - "psllh %[r], %[r], %[eight] \n\t" - "psllh %[a], %[a], %[twelve] \n\t" - "or %[b], %[b], %[g] \n\t" - "or %[b], %[b], %[r] \n\t" - "or %[b], %[b], %[a] \n\t" - - "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), - [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08), - [twelve] "f"(0x0c) - : "memory"); -} - -void ARGBToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0001004200810019; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x08(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x17(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x10(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x18(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void ARGBToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0013002500380002; - const uint64_t mask_v = 0x00020038002f0009; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest0_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest1_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest2_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest3_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void BGRAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0019008100420001; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x08(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x17(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x10(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x18(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void BGRAToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0002003800250013; - const uint64_t mask_v = 0x0009002f00380002; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[dest0_u], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src1], %[src0] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src0], %[src1] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[dest1_u], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src1], %[src0] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src0], %[src1] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[dest2_u], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src1], %[src0] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src0], %[src1] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[dest3_u], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src1], %[src0] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src0], %[src1] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void ABGRToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0001001900810042; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x08(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x17(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x10(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x18(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void ABGRToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0002003800250013; - const uint64_t mask_v = 0x0009002F00380002; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t" - "dsll %[dest0_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src1], %[src0] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src0], %[src1] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t" - "dsll %[dest1_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src1], %[src0] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src0], %[src1] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t" - "dsll %[dest2_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src1], %[src0] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src0], %[src1] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t" - "dsll %[dest3_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src1], %[src0] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src0], %[src1] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void RGBAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0042008100190001; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x08(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x17(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x10(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x18(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void RGBAToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0013002500380002; - const uint64_t mask_v = 0x00020038002f0009; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t" - "dsrl %[dest0_v], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" - "dsrl %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t" - "dsrl %[dest1_v], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" - "dsrl %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t" - "dsrl %[dest2_v], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" - "dsrl %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t" - "dsrl %[dest3_v], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" - "dsrl %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void RGB24ToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0001004200810019; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x06(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x13(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x19(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x12(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x18 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void RGB24ToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0013002500380002; - const uint64_t mask_v = 0x00020038002f0009; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest0_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest1_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest2_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest3_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void RAWToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0001001900810042; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x06(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x13(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x19(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x12(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x18 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void RAWToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0002003800250013; - const uint64_t mask_v = 0x0009002f00380002; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t" - "dsll %[dest0_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src1], %[src0] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src0], %[src1] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t" - "dsll %[dest1_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src1], %[src0] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src0], %[src1] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t" - "dsll %[dest2_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src1], %[src0] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src0], %[src1] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t" - "dsll %[dest3_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src1], %[src0] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src0], %[src1] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void ARGBToYJRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest, dest0, dest1, dest2, dest3; - uint64_t tmp0, tmp1; - const uint64_t shift = 0x08; - const uint64_t value = 0x80; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x0001004D0096001DULL; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" - "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" - "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - - "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" - "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest2], %[dest2], %[shift] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" - "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest3], %[dest3], %[shift] \n\t" - - "packsswh %[tmp0], %[dest0], %[dest1] \n\t" - "packsswh %[tmp1], %[dest2], %[dest3] \n\t" - "packushb %[dest], %[tmp0], %[tmp1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), - [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0), - [tmp1] "=&f"(tmp1) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0), - [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value), - [width] "r"(width) - : "memory"); -} - -void ARGBToUVJRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[12]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0015002a003f0002; - const uint64_t mask_v = 0x0002003f0035000a; - - __asm__ volatile( - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[dest0_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[dest1_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[dest2_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[dest3_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), - [sixteen] "f"(0x10) - : "memory"); -} - -void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { - uint64_t ftmp[11]; - const uint64_t value = 0x1080108010801080; - const uint64_t mask = 0x0001004200810019; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0007000700070007; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psrlh %[r], %[src1], %[three] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[two] \n\t" - "psrlh %[src1], %[g], %[four] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[src0], %[src1] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[src0], %[src1] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psrlh %[r], %[src1], %[three] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[two] \n\t" - "psrlh %[src1], %[g], %[four] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[src0], %[src1] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[src0], %[src1] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddiu %[width], %[width], -0x08 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), - [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), - [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), - [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) - : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), - [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05), - [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04) - : "memory"); -} - -void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width) { - uint64_t ftmp[11]; - const uint64_t value = 0x1080108010801080; - const uint64_t mask = 0x0001004200810019; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0003000300030003; - uint64_t c3 = 0x007c007c007c007c; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "and %[r], %[src1], %[c3] \n\t" - "psrlh %[r], %[r], %[two] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[three] \n\t" - "psrlh %[src1], %[g], %[two] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[src0], %[src1] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[src0], %[src1] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "and %[r], %[src1], %[c3] \n\t" - "psrlh %[r], %[r], %[two] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[three] \n\t" - "psrlh %[src1], %[g], %[two] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[src0], %[src1] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[src0], %[src1] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddiu %[width], %[width], -0x08 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), - [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), - [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), - [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) - : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y), - [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0), - [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08), - [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07) - : "memory"); -} - -void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width) { - uint64_t ftmp[11]; - uint64_t value = 0x1080108010801080; - uint64_t mask = 0x0001004200810019; - uint64_t c0 = 0x000f000f000f000f; - uint64_t c1 = 0x00ff00ff00ff00ff; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g], %[src0], %[four] \n\t" - "and %[r], %[src1], %[c0] \n\t" - "psllh %[src0], %[b], %[four] \n\t" - "or %[b], %[src0], %[b] \n\t" - "psllh %[src0], %[g], %[four] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psllh %[src0], %[r], %[four] \n\t" - "or %[r], %[src0], %[r] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[src0], %[src1] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[src0], %[src1] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g], %[src0], %[four] \n\t" - "and %[r], %[src1], %[c0] \n\t" - "psllh %[src0], %[b], %[four] \n\t" - "or %[b], %[src0], %[b] \n\t" - "psllh %[src0], %[g], %[four] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psllh %[src0], %[r], %[four] \n\t" - "or %[r], %[src0], %[r] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[src0], %[src1] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[src0], %[src1] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddiu %[width], %[width], -0x08 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), - [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), - [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), - [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) - : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y), - [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0), - [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04) - : "memory"); -} - -void RGB565ToUVRow_MMI(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t ftmp[13]; - uint64_t value = 0x2020202020202020; - uint64_t mask_u = 0x0026004a00700002; - uint64_t mask_v = 0x00020070005e0012; - uint64_t mask = 0x93; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0007000700070007; - __asm__ volatile( - "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t" - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" - "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t" - "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t" - "psrlh %[dest0_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest0_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "psrlh %[r0], %[dest0_u], %[three] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest0_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest0_v], %[src0], %[c2] \n\t" - "psllh %[dest0_v], %[dest0_v], %[three] \n\t" - "or %[dest0_v], %[src1], %[dest0_v] \n\t" - "psrlh %[src0], %[src0], %[three] \n\t" - "paddh %[b0], %[b0], %[dest0_u] \n\t" - "paddh %[g0], %[g0], %[dest0_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest0_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t" - "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t" - "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t" - "psrlh %[dest1_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest1_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "psrlh %[r0], %[dest1_u], %[three] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest1_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest1_v], %[src0], %[c2] \n\t" - "psllh %[dest1_v], %[dest1_v], %[three] \n\t" - "or %[dest1_v], %[src1], %[dest1_v] \n\t" - "psrlh %[src0], %[src0], %[three] \n\t" - "paddh %[b0], %[b0], %[dest1_u] \n\t" - "paddh %[g0], %[g0], %[dest1_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest1_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t" - "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t" - "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t" - "psrlh %[dest2_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest2_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "psrlh %[r0], %[dest2_u], %[three] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest2_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest2_v], %[src0], %[c2] \n\t" - "psllh %[dest2_v], %[dest2_v], %[three] \n\t" - "or %[dest2_v], %[src1], %[dest2_v] \n\t" - "psrlh %[src0], %[src0], %[three] \n\t" - "paddh %[b0], %[b0], %[dest2_u] \n\t" - "paddh %[g0], %[g0], %[dest2_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest2_u], %[dest2_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest2_u], %[dest2_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest2_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest2_v], %[g0] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t" - "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t" - "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t" - "psrlh %[dest3_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest3_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "psrlh %[r0], %[dest3_u], %[three] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest3_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest3_v], %[src0], %[c2] \n\t" - "psllh %[dest3_v], %[dest3_v], %[three] \n\t" - "or %[dest3_v], %[src1], %[dest3_v] \n\t" - "psrlh %[src0], %[src0], %[three] \n\t" - "paddh %[b0], %[b0], %[dest3_u] \n\t" - "paddh %[g0], %[g0], %[dest3_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest3_u], %[dest3_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest3_u], %[dest3_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest3_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest3_v], %[g0] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t" - "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddiu %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), - [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), - [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), - [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]), - [dest3_v] "=&f"(ftmp[12]) - : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), - [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), - [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03), - [one] "f"(0x01) - : "memory"); -} - -void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t ftmp[11]; - uint64_t value = 0x2020202020202020; - uint64_t mask_u = 0x0026004a00700002; - uint64_t mask_v = 0x00020070005e0012; - uint64_t mask = 0x93; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0003000300030003; - uint64_t c3 = 0x007c007c007c007c; - __asm__ volatile( - "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t" - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" - "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t" - "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t" - "psrlh %[dest0_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest0_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "and %[r0], %[dest0_u], %[c3] \n\t" - "psrlh %[r0], %[r0], %[two] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest0_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest0_v], %[src0], %[c2] \n\t" - "psllh %[dest0_v], %[dest0_v], %[three] \n\t" - "or %[dest0_v], %[src1], %[dest0_v] \n\t" - "and %[src0], %[src0], %[c3] \n\t" - "psrlh %[src0], %[src0], %[two] \n\t" - "paddh %[b0], %[b0], %[dest0_u] \n\t" - "paddh %[g0], %[g0], %[dest0_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[six] \n\t" - "psllh %[g0], %[g0], %[one] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest0_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t" - "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t" - "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t" - "psrlh %[dest1_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest1_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "and %[r0], %[dest1_u], %[c3] \n\t" - "psrlh %[r0], %[r0], %[two] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest1_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest1_v], %[src0], %[c2] \n\t" - "psllh %[dest1_v], %[dest1_v], %[three] \n\t" - "or %[dest1_v], %[src1], %[dest1_v] \n\t" - "and %[src0], %[src0], %[c3] \n\t" - "psrlh %[src0], %[src0], %[two] \n\t" - "paddh %[b0], %[b0], %[dest1_u] \n\t" - "paddh %[g0], %[g0], %[dest1_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[six] \n\t" - "psllh %[g0], %[g0], %[one] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest1_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t" - "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t" - "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t" - "psrlh %[dest2_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest2_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "and %[r0], %[dest2_u], %[c3] \n\t" - "psrlh %[r0], %[r0], %[two] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest2_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest0_v], %[src0], %[c2] \n\t" - "psllh %[dest0_v], %[dest0_v], %[three] \n\t" - "or %[dest0_v], %[src1], %[dest0_v] \n\t" - "and %[src0], %[src0], %[c3] \n\t" - "psrlh %[src0], %[src0], %[two] \n\t" - "paddh %[b0], %[b0], %[dest2_u] \n\t" - "paddh %[g0], %[g0], %[dest0_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest2_u], %[dest0_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[six] \n\t" - "psllh %[g0], %[g0], %[one] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest2_u], %[dest0_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest2_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t" - "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t" - "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t" - "psrlh %[dest3_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest3_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "and %[r0], %[dest3_u], %[c3] \n\t" - "psrlh %[r0], %[r0], %[two] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest3_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest1_v], %[src0], %[c2] \n\t" - "psllh %[dest1_v], %[dest1_v], %[three] \n\t" - "or %[dest1_v], %[src1], %[dest1_v] \n\t" - "and %[src0], %[src0], %[c3] \n\t" - "psrlh %[src0], %[src0], %[two] \n\t" - "paddh %[b0], %[b0], %[dest3_u] \n\t" - "paddh %[g0], %[g0], %[dest1_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest3_u], %[dest1_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[six] \n\t" - "psllh %[g0], %[g0], %[one] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest3_u], %[dest1_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest3_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[dest0_u], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t" - "packushb %[dest0_v], %[dest1_u], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t" - "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddiu %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), - [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), - [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), - [dest1_v] "=&f"(ftmp[10]) - : [src_argb1555] "r"(src_argb1555), - [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u), - [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value), - [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), - [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), - [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03), - [two] "f"(0x02), [one] "f"(0x01) - : "memory"); -} - -void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t ftmp[13]; - uint64_t value = 0x2020202020202020; - uint64_t mask_u = 0x0026004a00700002; - uint64_t mask_v = 0x00020070005e0012; - uint64_t mask = 0x93; - uint64_t c0 = 0x000f000f000f000f; - uint64_t c1 = 0x00ff00ff00ff00ff; - __asm__ volatile( - "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t" - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" - "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t" - "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t" - "psrlh %[dest0_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g0], %[src0], %[four] \n\t" - "and %[r0], %[dest0_u], %[c0] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest0_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[dest0_v], %[src1], %[four] \n\t" - "and %[src0], %[src0], %[c0] \n\t" - "paddh %[b0], %[b0], %[dest0_u] \n\t" - "paddh %[g0], %[g0], %[dest0_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" - "psrlh %[b0], %[src0], %[four] \n\t" - "psllh %[r0], %[src0], %[two] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[four] \n\t" - "psllh %[g0], %[g0], %[two] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest0_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t" - "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t" - "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t" - "psrlh %[dest1_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g0], %[src0], %[four] \n\t" - "and %[r0], %[dest1_u], %[c0] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest1_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[dest1_v], %[src1], %[four] \n\t" - "and %[src0], %[src0], %[c0] \n\t" - "paddh %[b0], %[b0], %[dest1_u] \n\t" - "paddh %[g0], %[g0], %[dest1_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" - "psrlh %[b0], %[src0], %[four] \n\t" - "psllh %[r0], %[src0], %[two] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[four] \n\t" - "psllh %[g0], %[g0], %[two] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest1_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t" - "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t" - "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t" - "psrlh %[dest2_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g0], %[src0], %[four] \n\t" - "and %[r0], %[dest2_u], %[c0] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest2_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[dest2_v], %[src1], %[four] \n\t" - "and %[src0], %[src0], %[c0] \n\t" - "paddh %[b0], %[b0], %[dest2_u] \n\t" - "paddh %[g0], %[g0], %[dest2_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest2_u], %[dest2_v] \n\t" - "psrlh %[b0], %[src0], %[four] \n\t" - "psllh %[r0], %[src0], %[two] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[four] \n\t" - "psllh %[g0], %[g0], %[two] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest2_u], %[dest2_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest2_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest2_v], %[g0] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t" - "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t" - "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t" - "psrlh %[dest3_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g0], %[src0], %[four] \n\t" - "and %[r0], %[dest3_u], %[c0] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest3_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[dest3_v], %[src1], %[four] \n\t" - "and %[src0], %[src0], %[c0] \n\t" - "paddh %[b0], %[b0], %[dest3_u] \n\t" - "paddh %[g0], %[g0], %[dest3_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest3_u], %[dest3_v] \n\t" - "psrlh %[b0], %[src0], %[four] \n\t" - "psllh %[r0], %[src0], %[two] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[four] \n\t" - "psllh %[g0], %[g0], %[two] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest3_u], %[dest3_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest3_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest3_v], %[g0] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t" - "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddiu %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), - [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), - [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), - [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]), - [dest3_v] "=&f"(ftmp[12]) - : [src_argb4444] "r"(src_argb4444), - [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u), - [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value), - [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u), - [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04), - [two] "f"(0x02) - : "memory"); -} - -void ARGBToUV444Row_MMI(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t ftmp[12]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0026004a00700002; - const uint64_t mask_v = 0x00020070005e0012; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t" - "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t" - "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t" - "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t" - "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), - [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]), - [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]), - [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]), - [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]), - [dest3_v] "=&f"(ftmp[11]) - : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), - [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), - [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10), - [eight] "f"(0x08) - : "memory"); -} - -void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) { - uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi; - uint64_t tmp0, tmp1; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x01; - const uint64_t mask2 = 0x0080004D0096001DULL; - const uint64_t mask3 = 0xFF000000FF000000ULL; - const uint64_t mask4 = ~mask3; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - - "and %[src37], %[src], %[mask3] \n\t" - - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t" - "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t" - "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t" - "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t" - - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t" - "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t" - "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t" - "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t" - "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "and %[dest], %[dest], %[mask4] \n\t" - "or %[dest], %[dest], %[src37] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0), - [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest), - [src37] "=&f"(src37) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width), - [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1), - [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4) - : "memory"); -} - -// Convert a row of image to Sepia tone. -void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) { - uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2; - uint64_t tmp0, tmp1; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x002300440011ULL; - const uint64_t mask2 = 0x002D00580016ULL; - const uint64_t mask3 = 0x003200620018ULL; - const uint64_t mask4 = 0xFF000000FF000000ULL; - const uint64_t shift = 0x07; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "and %[dest37], %[dest], %[mask4] \n\t" - - "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t" - "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t" - "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t" - "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t" - "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t" - "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t" - "paddw %[dest0], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t" - "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t" - "paddw %[dest1], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" - - "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t" - "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t" - "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t" - "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t" - "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t" - "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t" - "paddw %[dest0], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t" - "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t" - "paddw %[dest1], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - "packsswh %[dest_hi], %[dest0], %[dest1] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "or %[dest], %[dest], %[dest37] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), - [dest] "=&f"(dest) - : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0), - [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3), - [mask4] "f"(mask4), [shift] "f"(shift) - : "memory"); -} - -// Apply color matrix to a row of image. Matrix is signed. -// TODO(fbarchard): Consider adding rounding (+32). -void ARGBColorMatrixRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - const int8_t* matrix_argb, - int width) { - uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2, - dest3; - uint64_t matrix, matrix_hi, matrix_lo; - uint64_t tmp0, tmp1; - const uint64_t shift0 = 0x06; - const uint64_t shift1 = 0x08; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - - "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t" - "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t" - "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" - "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" - "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t" - "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" - "paddw %[dest0], %[tmp0], %[tmp1] \n\t" - "psraw %[dest0], %[dest0], %[shift0] \n\t" - - "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t" - "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t" - "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" - "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" - "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t" - "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" - "paddw %[dest1], %[tmp0], %[tmp1] \n\t" - "psraw %[dest1], %[dest1], %[shift0] \n\t" - - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - - "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t" - "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t" - "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" - "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" - "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t" - "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" - "paddw %[dest2], %[tmp0], %[tmp1] \n\t" - "psraw %[dest2], %[dest2], %[shift0] \n\t" - - "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t" - "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t" - "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" - "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" - "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t" - "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" - "paddw %[dest3], %[tmp0], %[tmp1] \n\t" - "psraw %[dest3], %[dest3], %[shift0] \n\t" - - "packsswh %[tmp0], %[dest0], %[dest1] \n\t" - "packsswh %[tmp1], %[dest2], %[dest3] \n\t" - "packushb %[dest], %[tmp0], %[tmp1] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest), - [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi), - [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix) - : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb), - [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0), - [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1) - : "memory"); -} - -void ARGBShadeRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - int width, - uint32_t value) { - uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[src] \n\t" - "punpckhbh %[src_hi], %[src], %[src] \n\t" - - "punpcklbh %[value], %[value], %[value] \n\t" - - "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" - "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src), - [dest] "=&f"(dest) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width), - [value] "f"(value), [shift] "f"(shift) - : "memory"); -} - -void ARGBMultiplyRow_MMI(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo; - uint64_t dest, dest_lo, dest_hi; - const uint64_t mask = 0x0; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[src0_lo], %[src0], %[src0] \n\t" - "punpckhbh %[src0_hi], %[src0], %[src0] \n\t" - - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src1_lo], %[src1], %[mask] \n\t" - "punpckhbh %[src1_hi], %[src1], %[mask] \n\t" - - "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t" - "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), - [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0), - [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1), - [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask) - : "memory"); -} - -void ARGBAddRow_MMI(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - uint64_t src0, src1, dest; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "paddusb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1), - [dst_ptr] "r"(dst_argb), [width] "r"(width) - : "memory"); -} - -void ARGBSubtractRow_MMI(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - uint64_t src0, src1, dest; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "psubusb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1), - [dst_ptr] "r"(dst_argb), [width] "r"(width) - : "memory"); -} - -// Sobel functions which mimics SSSE3. -void SobelXRow_MMI(const uint8_t* src_y0, - const uint8_t* src_y1, - const uint8_t* src_y2, - uint8_t* dst_sobelx, - int width) { - uint64_t y00 = 0, y10 = 0, y20 = 0; - uint64_t y02 = 0, y12 = 0, y22 = 0; - uint64_t zero = 0x0; - uint64_t sobel = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i] - "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t" - "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2] - "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t" - - "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i] - "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t" - "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2] - "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t" - - "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i] - "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t" - "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2] - "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t" - - "punpcklbh %[y00], %[y00], %[zero] \n\t" - "punpcklbh %[y10], %[y10], %[zero] \n\t" - "punpcklbh %[y20], %[y20], %[zero] \n\t" - - "punpcklbh %[y02], %[y02], %[zero] \n\t" - "punpcklbh %[y12], %[y12], %[zero] \n\t" - "punpcklbh %[y22], %[y22], %[zero] \n\t" - - "paddh %[y00], %[y00], %[y10] \n\t" // a+b - "paddh %[y20], %[y20], %[y10] \n\t" // c+b - "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c - - "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub - "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub - "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub - - "pmaxsh %[y10], %[y00], %[y02] \n\t" - "pminsh %[y20], %[y00], %[y02] \n\t" - "psubh %[sobel], %[y10], %[y20] \n\t" // Abs - - "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t" - "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t" - "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t" - "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t" - - "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t" - "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t" - "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t" - "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t" - - "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t" - "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t" - "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t" - "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t" - - "punpcklbh %[y00], %[y00], %[zero] \n\t" - "punpcklbh %[y10], %[y10], %[zero] \n\t" - "punpcklbh %[y20], %[y20], %[zero] \n\t" - - "punpcklbh %[y02], %[y02], %[zero] \n\t" - "punpcklbh %[y12], %[y12], %[zero] \n\t" - "punpcklbh %[y22], %[y22], %[zero] \n\t" - - "paddh %[y00], %[y00], %[y10] \n\t" - "paddh %[y20], %[y20], %[y10] \n\t" - "paddh %[y00], %[y00], %[y20] \n\t" - - "paddh %[y02], %[y02], %[y12] \n\t" - "paddh %[y22], %[y22], %[y12] \n\t" - "paddh %[y02], %[y02], %[y22] \n\t" - - "pmaxsh %[y10], %[y00], %[y02] \n\t" - "pminsh %[y20], %[y00], %[y02] \n\t" - "psubh %[y00], %[y10], %[y20] \n\t" - - "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255 - "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t" - "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t" - - "daddiu %[src_y0], %[src_y0], 8 \n\t" - "daddiu %[src_y1], %[src_y1], 8 \n\t" - "daddiu %[src_y2], %[src_y2], 8 \n\t" - "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10), - [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22) - : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2), - [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero) - : "memory"); -} - -void SobelYRow_MMI(const uint8_t* src_y0, - const uint8_t* src_y1, - uint8_t* dst_sobely, - int width) { - uint64_t y00 = 0, y01 = 0, y02 = 0; - uint64_t y10 = 0, y11 = 0, y12 = 0; - uint64_t zero = 0x0; - uint64_t sobel = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i] - "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t" - "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1] - "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t" - "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2] - "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t" - - "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i] - "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t" - "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1] - "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t" - "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2] - "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t" - - "punpcklbh %[y00], %[y00], %[zero] \n\t" - "punpcklbh %[y01], %[y01], %[zero] \n\t" - "punpcklbh %[y02], %[y02], %[zero] \n\t" - - "punpcklbh %[y10], %[y10], %[zero] \n\t" - "punpcklbh %[y11], %[y11], %[zero] \n\t" - "punpcklbh %[y12], %[y12], %[zero] \n\t" - - "paddh %[y00], %[y00], %[y01] \n\t" // a+b - "paddh %[y02], %[y02], %[y01] \n\t" // c+b - "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c - - "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub - "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub - "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub - - "pmaxsh %[y02], %[y00], %[y10] \n\t" - "pminsh %[y12], %[y00], %[y10] \n\t" - "psubh %[sobel], %[y02], %[y12] \n\t" // Abs - - "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t" - "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t" - "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t" - "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t" - "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t" - "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t" - - "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t" - "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t" - "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t" - "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t" - "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t" - "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t" - - "punpcklbh %[y00], %[y00], %[zero] \n\t" - "punpcklbh %[y01], %[y01], %[zero] \n\t" - "punpcklbh %[y02], %[y02], %[zero] \n\t" - - "punpcklbh %[y10], %[y10], %[zero] \n\t" - "punpcklbh %[y11], %[y11], %[zero] \n\t" - "punpcklbh %[y12], %[y12], %[zero] \n\t" - - "paddh %[y00], %[y00], %[y01] \n\t" - "paddh %[y02], %[y02], %[y01] \n\t" - "paddh %[y00], %[y00], %[y02] \n\t" - - "paddh %[y10], %[y10], %[y11] \n\t" - "paddh %[y12], %[y12], %[y11] \n\t" - "paddh %[y10], %[y10], %[y12] \n\t" - - "pmaxsh %[y02], %[y00], %[y10] \n\t" - "pminsh %[y12], %[y00], %[y10] \n\t" - "psubh %[y00], %[y02], %[y12] \n\t" - - "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255 - "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t" - "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t" - - "daddiu %[src_y0], %[src_y0], 8 \n\t" - "daddiu %[src_y1], %[src_y1], 8 \n\t" - "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01), - [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12) - : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), - [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero) - : "memory"); -} - -void SobelRow_MMI(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_argb, - int width) { - double temp[3]; - uint64_t c1 = 0xff000000ff000000; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i] - "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t" - "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i] - "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t" - // s7 s6 s5 s4 s3 s2 s1 s0 = a+b - "paddusb %[t2] , %[t0], %[t1] \n\t" - - // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0 - "punpcklbh %[t0], %[t2], %[t2] \n\t" - - // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0 - "punpcklbh %[t1], %[t0], %[t0] \n\t" - "or %[t1], %[t1], %[c1] \n\t" - // 255 s1 s1 s1 s55 s0 s0 s0 - "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t" - - // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2 - "punpckhbh %[t1], %[t0], %[t0] \n\t" - "or %[t1], %[t1], %[c1] \n\t" - // 255 s3 s3 s3 255 s2 s2 s2 - "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t" - - // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4 - "punpckhbh %[t0], %[t2], %[t2] \n\t" - - // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4 - "punpcklbh %[t1], %[t0], %[t0] \n\t" - "or %[t1], %[t1], %[c1] \n\t" - "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t" - "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t" - - // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6 - "punpckhbh %[t1], %[t0], %[t0] \n\t" - "or %[t1], %[t1], %[c1] \n\t" - "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t" - "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t" - - "daddiu %[dst_argb], %[dst_argb], 32 \n\t" - "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" - "daddiu %[src_sobely], %[src_sobely], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]) - : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), - [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1) - : "memory"); -} - -void SobelToPlaneRow_MMI(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_y, - int width) { - uint64_t tr = 0; - uint64_t tb = 0; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t" - "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i] - "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t" - "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i] - "paddusb %[tr], %[tr], %[tb] \n\t" // g - "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t" - "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t" - - "daddiu %[dst_y], %[dst_y], 8 \n\t" - "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" - "daddiu %[src_sobely], %[src_sobely], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [tr] "=&f"(tr), [tb] "=&f"(tb) - : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), - [dst_y] "r"(dst_y), [width] "r"(width) - : "memory"); -} - -void SobelXYRow_MMI(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_argb, - int width) { - uint64_t temp[3]; - uint64_t result = 0; - uint64_t gb = 0; - uint64_t cr = 0; - uint64_t c1 = 0xffffffffffffffff; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i] - "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t" - "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i] - "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t" - "paddusb %[tg] , %[tr], %[tb] \n\t" // g - - // g3 b3 g2 b2 g1 b1 g0 b0 - "punpcklbh %[gb], %[tb], %[tg] \n\t" - // c3 r3 r2 r2 c1 r1 c0 r0 - "punpcklbh %[cr], %[tr], %[c1] \n\t" - // c1 r1 g1 b1 c0 r0 g0 b0 - "punpcklhw %[result], %[gb], %[cr] \n\t" - "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t" - // c3 r3 g3 b3 c2 r2 g2 b2 - "punpckhhw %[result], %[gb], %[cr] \n\t" - "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t" - - // g7 b7 g6 b6 g5 b5 g4 b4 - "punpckhbh %[gb], %[tb], %[tg] \n\t" - // c7 r7 c6 r6 c5 r5 c4 r4 - "punpckhbh %[cr], %[tr], %[c1] \n\t" - // c5 r5 g5 b5 c4 r4 g4 b4 - "punpcklhw %[result], %[gb], %[cr] \n\t" - "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t" - "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t" - // c7 r7 g7 b7 c6 r6 g6 b6 - "punpckhhw %[result], %[gb], %[cr] \n\t" - "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t" - "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t" - - "daddiu %[dst_argb], %[dst_argb], 32 \n\t" - "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" - "daddiu %[src_sobely], %[src_sobely], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]), - [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result) - : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), - [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1) - : "memory"); -} - -void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) { - // Copy a Y to RGB. - uint64_t src, dest; - const uint64_t mask0 = 0x00ffffff00ffffffULL; - const uint64_t mask1 = ~mask0; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src], %[src], %[src] \n\t" - "punpcklhw %[dest], %[src], %[src] \n\t" - "and %[dest], %[dest], %[mask0] \n\t" - "or %[dest], %[dest], %[mask1] \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - - "punpckhhw %[dest], %[src], %[src] \n\t" - "and %[dest], %[dest], %[mask0] \n\t" - "or %[dest], %[dest], %[mask1] \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest) - : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -// TODO - respect YuvConstants -void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf, - const struct YuvConstants*, int width) { - uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x55; - const uint64_t mask2 = 0xAA; - const uint64_t mask3 = 0xFF; - const uint64_t mask4 = 0x4A354A354A354A35ULL; - const uint64_t mask5 = 0x0488048804880488ULL; - const uint64_t shift0 = 0x08; - const uint64_t shift1 = 0x06; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - - "pshufh %[src], %[src_lo], %[mask0] \n\t" - "psllh %[dest_lo], %[src], %[shift0] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src] \n\t" - "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" - "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" - "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" - "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" - "pshufh %[src], %[src_lo], %[mask1] \n\t" - "psllh %[dest_hi], %[src], %[shift0] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src] \n\t" - "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" - "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" - "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" - "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "pshufh %[src], %[src_lo], %[mask2] \n\t" - "psllh %[dest_lo], %[src], %[shift0] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src] \n\t" - "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" - "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" - "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" - "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" - "pshufh %[src], %[src_lo], %[mask3] \n\t" - "psllh %[dest_hi], %[src], %[shift0] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src] \n\t" - "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" - "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" - "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" - "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "pshufh %[src], %[src_hi], %[mask0] \n\t" - "psllh %[dest_lo], %[src], %[shift0] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src] \n\t" - "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" - "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" - "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" - "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" - "pshufh %[src], %[src_hi], %[mask1] \n\t" - "psllh %[dest_hi], %[src], %[shift0] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src] \n\t" - "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" - "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" - "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" - "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t" - - "pshufh %[src], %[src_hi], %[mask2] \n\t" - "psllh %[dest_lo], %[src], %[shift0] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src] \n\t" - "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" - "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" - "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" - "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" - "pshufh %[src], %[src_hi], %[mask3] \n\t" - "psllh %[dest_hi], %[src], %[shift0] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src] \n\t" - "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" - "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" - "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" - "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t" - - "daddi %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo) - : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0), - [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3), - [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0), - [shift1] "f"(shift1), [width] "r"(width) - : "memory"); -} - -void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) { - uint64_t source, src0, src1, dest; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x1b; - - src += width - 1; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[source], 0(%[src_ptr]) \n\t" - "gsldrc1 %[source], -7(%[src_ptr]) \n\t" - "punpcklbh %[src0], %[source], %[mask0] \n\t" - "pshufh %[src0], %[src0], %[mask1] \n\t" - "punpckhbh %[src1], %[source], %[mask0] \n\t" - "pshufh %[src1], %[src1], %[mask1] \n\t" - "packushb %[dest], %[src1], %[src0] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddi %[src_ptr], %[src_ptr], -0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0), - [src1] "=&f"(src1) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -void MirrorSplitUVRow_MMI(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src0, src1, dest0, dest1; - const uint64_t mask0 = 0x00ff00ff00ff00ffULL; - const uint64_t mask1 = 0x1b; - const uint64_t shift = 0x08; - - src_uv += (width - 1) << 1; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 1(%[src_ptr]) \n\t" - "gsldrc1 %[src0], -6(%[src_ptr]) \n\t" - "gsldlc1 %[src1], -7(%[src_ptr]) \n\t" - "gsldrc1 %[src1], -14(%[src_ptr]) \n\t" - - "and %[dest0], %[src0], %[mask0] \n\t" - "pshufh %[dest0], %[dest0], %[mask1] \n\t" - "and %[dest1], %[src1], %[mask0] \n\t" - "pshufh %[dest1], %[dest1], %[mask1] \n\t" - "packushb %[dest0], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t" - - "psrlh %[dest0], %[src0], %[shift] \n\t" - "pshufh %[dest0], %[dest0], %[mask1] \n\t" - "psrlh %[dest1], %[src1], %[shift] \n\t" - "pshufh %[dest1], %[dest1], %[mask1] \n\t" - "packushb %[dest0], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t" - - "daddi %[src_ptr], %[src_ptr], -0x10 \n\t" - "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t" - "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0), - [src1] "=&f"(src1) - : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v), - [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1), - [shift] "f"(shift) - : "memory"); -} - -void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) { - src += (width - 1) * 4; - uint64_t temp = 0x0; - uint64_t shuff = 0x4e; // 01 00 11 10 - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[temp], 3(%[src]) \n\t" - "gsldrc1 %[temp], -4(%[src]) \n\t" - "pshufh %[temp], %[temp], %[shuff] \n\t" - "gssdrc1 %[temp], 0x0(%[dst]) \n\t" - "gssdlc1 %[temp], 0x7(%[dst]) \n\t" - - "daddiu %[src], %[src], -0x08 \n\t" - "daddiu %[dst], %[dst], 0x08 \n\t" - "daddiu %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [temp] "=&f"(temp) - : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff) - : "memory"); -} - -void SplitUVRow_MMI(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t temp[4]; - uint64_t shift = 0x08; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t" - - "and %[t2], %[t0], %[c0] \n\t" - "and %[t3], %[t1], %[c0] \n\t" - "packushb %[t2], %[t2], %[t3] \n\t" - "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t" - - "psrlh %[t2], %[t0], %[shift] \n\t" - "psrlh %[t3], %[t1], %[shift] \n\t" - "packushb %[t2], %[t2], %[t3] \n\t" - "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t" - - "daddiu %[src_uv], %[src_uv], 16 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), - [t3] "=&f"(temp[3]) - : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), - [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift) - : "memory"); -} - -void MergeUVRow_MMI(const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_uv, - int width) { - uint64_t temp[3]; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x0(%[src_u]) \n\t" - "gsldlc1 %[t0], 0x7(%[src_u]) \n\t" - "gsldrc1 %[t1], 0x0(%[src_v]) \n\t" - "gsldlc1 %[t1], 0x7(%[src_v]) \n\t" - "punpcklbh %[t2], %[t0], %[t1] \n\t" - "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t" - "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t" - "punpckhbh %[t2], %[t0], %[t1] \n\t" - "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t" - "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t" - - "daddiu %[src_u], %[src_u], 8 \n\t" - "daddiu %[src_v], %[src_v], 8 \n\t" - "daddiu %[dst_uv], %[dst_uv], 16 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]) - : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v), - [width] "r"(width) - : "memory"); -} - -void SplitRGBRow_MMI(const uint8_t* src_rgb, - uint8_t* dst_r, - uint8_t* dst_g, - uint8_t* dst_b, - int width) { - uint64_t src[4]; - uint64_t dest_hi, dest_lo, dest; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" - "punpcklbh %[dest_lo], %[src0], %[src1] \n\t" - "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t" - "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t" - "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t" - "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t" - "punpcklbh %[dest_hi], %[src2], %[src3] \n\t" - - "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t" - "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t" - "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t" - "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t" - "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t" - "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t" - "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" - "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t" - "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t" - "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]), - [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g), - [dstb_ptr] "r"(dst_b), [width] "r"(width) - : "memory"); -} - -void MergeRGBRow_MMI(const uint8_t* src_r, - const uint8_t* src_g, - const uint8_t* src_b, - uint8_t* dst_rgb, - int width) { - uint64_t srcr, srcg, srcb, dest; - uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo; - const uint64_t temp = 0x0; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t" - "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t" - "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t" - "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t" - "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t" - "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t" - - "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t" - "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t" - "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t" - "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t" - - "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t" - "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t" - "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t" - "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t" - "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t" - "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t" - "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t" - - "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t" - "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t" - "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb), - [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi), - [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi), - [srcbz_lo] "=&f"(srcbz_lo) - : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b), - [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp) - : "memory"); -} - -// Filter 2 rows of YUY2 UV's (422) into U and V (420). -void YUY2ToUVRow_MMI(const uint8_t* src_yuy2, - int src_stride_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t c0 = 0xff00ff00ff00ff00; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t temp[3]; - uint64_t data[4]; - uint64_t shift = 0x08; - uint64_t src_stride = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" - "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t" - "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t" - "pavgb %[t0], %[t0], %[t1] \n\t" - - "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t" - "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t" - "pavgb %[t1], %[t2], %[t1] \n\t" - - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "psrlh %[t0], %[t0], %[shift] \n\t" - "psrlh %[t1], %[t1], %[shift] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d0], %[t0], %[c1] \n\t" - "psrlh %[d1], %[t1], %[shift] \n\t" - - "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t" - "pavgb %[t0], %[t0], %[t1] \n\t" - - "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t" - "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t" - "pavgb %[t1], %[t2], %[t1] \n\t" - - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "psrlh %[t0], %[t0], %[shift] \n\t" - "psrlh %[t1], %[t1], %[shift] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d2], %[t0], %[c1] \n\t" - "psrlh %[d3], %[t1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d2] \n\t" - "packushb %[d1], %[d1], %[d3] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" - "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" - "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -16 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), - [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), - [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride) - : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift) - : "memory"); -} - -// Copy row of YUY2 UV's (422) into U and V (422). -void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t c0 = 0xff00ff00ff00ff00; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t temp[2]; - uint64_t data[4]; - uint64_t shift = 0x08; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "psrlh %[t0], %[t0], %[shift] \n\t" - "psrlh %[t1], %[t1], %[shift] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d0], %[t0], %[c1] \n\t" - "psrlh %[d1], %[t1], %[shift] \n\t" - - "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t" - "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "psrlh %[t0], %[t0], %[shift] \n\t" - "psrlh %[t1], %[t1], %[shift] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d2], %[t0], %[c1] \n\t" - "psrlh %[d3], %[t1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d2] \n\t" - "packushb %[d1], %[d1], %[d3] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" - "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" - "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -16 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]), - [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) - : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift) - : "memory"); -} - -// Copy row of YUY2 Y's (422) into Y (420/422). -void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t temp[2]; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t" - "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t" - "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t" - "daddiu %[dst_y], %[dst_y], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]) - : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width), - [c0] "f"(c0) - : "memory"); -} - -// Filter 2 rows of UYVY UV's (422) into U and V (420). -void UYVYToUVRow_MMI(const uint8_t* src_uyvy, - int src_stride_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - // Output a row of UV values. - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t temp[3]; - uint64_t data[4]; - uint64_t shift = 0x08; - uint64_t src_stride = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" - "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t" - "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t" - "pavgb %[t0], %[t0], %[t1] \n\t" - - "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t" - "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t" - "pavgb %[t1], %[t2], %[t1] \n\t" - - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d0], %[t0], %[c0] \n\t" - "psrlh %[d1], %[t1], %[shift] \n\t" - - "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t" - "pavgb %[t0], %[t0], %[t1] \n\t" - - "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t" - "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t" - "pavgb %[t1], %[t2], %[t1] \n\t" - - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d2], %[t0], %[c0] \n\t" - "psrlh %[d3], %[t1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d2] \n\t" - "packushb %[d1], %[d1], %[d3] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" - "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" - "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -16 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), - [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), - [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride) - : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [c0] "f"(c0), [shift] "f"(shift) - : "memory"); -} - -// Copy row of UYVY UV's (422) into U and V (422). -void UYVYToUV422Row_MMI(const uint8_t* src_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - // Output a row of UV values. - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t temp[2]; - uint64_t data[4]; - uint64_t shift = 0x08; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d0], %[t0], %[c0] \n\t" - "psrlh %[d1], %[t1], %[shift] \n\t" - - "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t" - "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d2], %[t0], %[c0] \n\t" - "psrlh %[d3], %[t1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d2] \n\t" - "packushb %[d1], %[d1], %[d3] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" - "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" - "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -16 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]), - [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) - : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), - [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift) - : "memory"); -} - -// Copy row of UYVY Y's (422) into Y (420/422). -void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { - // Output a row of Y values. - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t shift = 0x08; - uint64_t temp[2]; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t" - "dsrl %[t0], %[t0], %[shift] \n\t" - "dsrl %[t1], %[t1], %[shift] \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t" - "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t" - "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t" - "daddiu %[dst_y], %[dst_y], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]) - : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width), - [c0] "f"(c0), [shift] "f"(shift) - : "memory"); -} - -// Blend src_argb over src_argb1 and store to dst_argb. -// dst_argb may be src_argb or src_argb1. -// This code mimics the SSSE3 version for better testability. -void ARGBBlendRow_MMI(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi, - dest_lo; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL; - const uint64_t mask2 = 0x00FF00FF00FF00FFULL; - const uint64_t mask3 = 0xFF; - const uint64_t mask4 = ~mask1; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t" - - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t" - - "psubush %[alpha], %[mask2], %[src0_lo] \n\t" - "pshufh %[alpha], %[alpha], %[mask3] \n\t" - "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t" - - "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t" - "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t" - - "psubush %[alpha], %[mask2], %[src0_hi] \n\t" - "pshufh %[alpha], %[alpha], %[mask3] \n\t" - "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[mask4] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha), - [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), - [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo) - : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1), - [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1), - [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4), - [shift] "f"(shift), [width] "r"(width) - : "memory"); -} - -void BlendPlaneRow_MMI(const uint8_t* src0, - const uint8_t* src1, - const uint8_t* alpha, - uint8_t* dst, - int width) { - uint64_t source0, source1, dest, alph; - uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi, - dest_lo; - uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL; - const uint64_t mask2 = 0x00FF00FF00FF00FFULL; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t" - "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t" - - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t" - "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t" - - "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t" - "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t" - "psubusb %[alpha_r], %[mask1], %[alpha] \n\t" - "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t" - "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t" - "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t" - "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t" - - "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t" - "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t" - "paddush %[dest_lo], %[dest_lo], %[dest] \n\t" - "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" - - "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t" - "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t" - "paddush %[dest_hi], %[dest_hi], %[dest] \n\t" - "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph), - [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), - [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), - [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi), - [alpha_r] "=&f"(alpha_rev) - : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha), - [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1), - [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width) - : "memory"); -} - -// Multiply source RGB by alpha and store to destination. -// This code mimics the SSSE3 version for better testability. -void ARGBAttenuateRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - int width) { - uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha; - const uint64_t mask0 = 0xFF; - const uint64_t mask1 = 0xFF000000FF000000ULL; - const uint64_t mask2 = ~mask1; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[src] \n\t" - "punpckhbh %[src_hi], %[src], %[src] \n\t" - - "pshufh %[alpha], %[src_lo], %[mask0] \n\t" - "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" - "pshufh %[alpha], %[src_hi], %[mask0] \n\t" - "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "and %[dest], %[dest], %[mask2] \n\t" - "and %[src], %[src], %[mask1] \n\t" - "or %[dest], %[dest], %[src] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), - [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift), - [width] "r"(width) - : "memory"); -} - -void ComputeCumulativeSumRow_MMI(const uint8_t* row, - int32_t* cumsum, - const int32_t* previous_cumsum, - int width) { - int64_t row_sum[2] = {0, 0}; - uint64_t src, dest0, dest1, presrc0, presrc1, dest; - const uint64_t mask = 0x0; - - __asm__ volatile( - "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t" - "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t" - - "1: \n\t" - "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t" - "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t" - - "punpcklbh %[src], %[src], %[mask] \n\t" - "punpcklhw %[dest0], %[src], %[mask] \n\t" - "punpckhhw %[dest1], %[src], %[mask] \n\t" - - "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t" - "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t" - - "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t" - "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t" - "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t" - "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t" - - "paddw %[dest0], %[row_sum0], %[presrc0] \n\t" - "paddw %[dest1], %[row_sum1], %[presrc1] \n\t" - - "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t" - "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x01 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]), - [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0), - [presrc1] "=&f"(presrc1) - : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum), - [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask) - : "memory"); -} - -// C version 2x2 -> 2x1. -void InterpolateRow_MMI(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction) { - if (source_y_fraction == 0) { - __asm__ volatile( - "1: \n\t" - "ld $t0, 0x0(%[src_ptr]) \n\t" - "sd $t0, 0x0(%[dst_ptr]) \n\t" - "daddiu %[src_ptr], %[src_ptr], 8 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : - : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width) - : "memory"); - return; - } - if (source_y_fraction == 128) { - uint64_t uv = 0x0; - uint64_t uv_stride = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t" - "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t" - "daddu $t0, %[src_ptr], %[stride] \n\t" - "gsldrc1 %[uv_stride], 0x0($t0) \n\t" - "gsldlc1 %[uv_stride], 0x7($t0) \n\t" - - "pavgb %[uv], %[uv], %[uv_stride] \n\t" - "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t" - "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 8 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width), - [stride] "r"((int64_t)src_stride) - : "memory"); - return; - } - const uint8_t* src_ptr1 = src_ptr + src_stride; - uint64_t temp; - uint64_t data[4]; - uint64_t zero = 0x0; - uint64_t c0 = 0x0080008000800080; - uint64_t fy0 = 0x0100010001000100; - uint64_t shift = 0x8; - __asm__ volatile( - "pshufh %[fy1], %[fy1], %[zero] \n\t" - "psubh %[fy0], %[fy0], %[fy1] \n\t" - "1: \n\t" - "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t" - "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t" - "punpcklbh %[d0], %[t0], %[zero] \n\t" - "punpckhbh %[d1], %[t0], %[zero] \n\t" - "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t" - "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t" - "punpcklbh %[d2], %[t0], %[zero] \n\t" - "punpckhbh %[d3], %[t0], %[zero] \n\t" - - "pmullh %[d0], %[d0], %[fy0] \n\t" - "pmullh %[d2], %[d2], %[fy1] \n\t" - "paddh %[d0], %[d0], %[d2] \n\t" - "paddh %[d0], %[d0], %[c0] \n\t" - "psrlh %[d0], %[d0], %[shift] \n\t" - - "pmullh %[d1], %[d1], %[fy0] \n\t" - "pmullh %[d3], %[d3], %[fy1] \n\t" - "paddh %[d1], %[d1], %[d3] \n\t" - "paddh %[d1], %[d1], %[c0] \n\t" - "psrlh %[d1], %[d1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d1] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t" - "daddiu %[src_ptr], %[src_ptr], 8 \n\t" - "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), - [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) - : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1), - [dst_ptr] "r"(dst_ptr), [width] "r"(width), - [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0), - [shift] "f"(shift), [zero] "f"(zero) - : "memory"); -} - -// Use first 4 shuffler values to reorder ARGB channels. -void ARGBShuffleRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width) { - uint64_t source, dest0, dest1, dest; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) | - ((shuffler[2] & 0x03) << 4) | - ((shuffler[3] & 0x03) << 6); - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - - "punpcklbh %[dest0], %[src], %[mask0] \n\t" - "pshufh %[dest0], %[dest0], %[mask1] \n\t" - "punpckhbh %[dest1], %[src], %[mask0] \n\t" - "pshufh %[dest1], %[dest1], %[mask1] \n\t" - "packushb %[dest], %[dest0], %[dest1] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -void I422ToYUY2Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_frame, - int width) { - uint64_t temp[3]; - uint64_t vu = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i] - "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i] - "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i] - "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i] - "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i] - "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i] - "punpcklbh %[vu], %[tu], %[tv] \n\t" // g - "punpcklbh %[tu], %[ty], %[vu] \n\t" // g - "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t" - "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t" - "punpckhbh %[tu], %[ty], %[vu] \n\t" // g - "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t" - "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t" - "daddiu %[src_y], %[src_y], 8 \n\t" - "daddiu %[src_u], %[src_u], 4 \n\t" - "daddiu %[src_v], %[src_v], 4 \n\t" - "daddiu %[dst_frame], %[dst_frame], 16 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]), - [vu] "=&f"(vu) - : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), - [dst_frame] "r"(dst_frame), [width] "r"(width) - : "memory"); -} - -void I422ToUYVYRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_frame, - int width) { - uint64_t temp[3]; - uint64_t vu = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i] - "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i] - "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i] - "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i] - "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i] - "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i] - "punpcklbh %[vu], %[tu], %[tv] \n\t" // g - "punpcklbh %[tu], %[vu], %[ty] \n\t" // g - "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t" - "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t" - "punpckhbh %[tu], %[vu], %[ty] \n\t" // g - "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t" - "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t" - "daddiu %[src_y], %[src_y], 8 \n\t" - "daddiu %[src_u], %[src_u], 4 \n\t" - "daddiu %[src_v], %[src_v], 4 \n\t" - "daddiu %[dst_frame], %[dst_frame], 16 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]), - [vu] "=&f"(vu) - : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), - [dst_frame] "r"(dst_frame), [width] "r"(width) - : "memory"); -} - -void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) { - uint64_t source, dest; - const uint64_t mask0 = 0xff000000ff000000ULL; - const uint64_t mask1 = ~mask0; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "and %[src], %[src], %[mask0] \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[src], %[dest] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(source), [dest] "=&f"(dest) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb, - uint8_t* dst_a, - int width) { - uint64_t src, dest0, dest1, dest_lo, dest_hi, dest; - const uint64_t mask = 0xff000000ff000000ULL; - const uint64_t shift = 0x18; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "and %[dest0], %[src], %[mask] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t" - "and %[dest1], %[src], %[mask] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" - - "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t" - "and %[dest0], %[src], %[mask] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t" - "and %[dest1], %[src], %[mask] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - "packsswh %[dest_hi], %[dest0], %[dest1] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask), - [shift] "f"(shift), [width] "r"(width) - : "memory"); -} - -void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) { - uint64_t source, dest0, dest1, dest; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x00ffffff00ffffffULL; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - - "punpcklbh %[dest0], %[mask0], %[src] \n\t" - "punpcklhw %[dest1], %[mask0], %[dest0] \n\t" - "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - "punpckhhw %[dest1], %[mask0], %[dest0] \n\t" - "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[dest1] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "punpckhbh %[dest0], %[mask0], %[src] \n\t" - "punpcklhw %[dest1], %[mask0], %[dest0] \n\t" - "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[dest1] \n\t" - "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t" - "punpckhhw %[dest1], %[mask0], %[dest0] \n\t" - "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[dest1] \n\t" - "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -void I444ToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - __asm__ volatile ( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub - "or %[ub], %[ub], %[mask] \n\t"//must sign extension - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t"//sign extension - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - "punpcklbh %[u], %[u], %[zero] \n\t"//u - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - - "punpcklbh %[v], %[v], %[zero] \n\t"//v - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb - "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg - "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t" - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [y]"=&f"(y), - [u]"=&f"(u), [v]"=&f"(v), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [alpha]"f"(-1), - [six]"f"(0x6), [five]"f"(0x55), - [mask]"f"(mask) - : "memory" - ); -} - -// Also used for 420 -void I422ToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub - "or %[ub], %[ub], %[mask] \n\t"//must sign extension - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t"//sign extension - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t"//u - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t"//v - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb - "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg - "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t" - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), - [u]"=&f"(u), [v]"=&f"(v), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [alpha]"f"(-1), - [six]"f"(0x6), [five]"f"(0x55), - [mask]"f"(mask) - : "memory" - ); -} - -// 10 bit YUV to ARGB -void I210ToARGBRow_MMI(const uint16_t* src_y, - const uint16_t* src_u, - const uint16_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t" - - "1: \n\t" - "gsldlc1 %[y], 0x07(%[y_ptr]) \n\t" - "gsldrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "psllh %[y], %[y], %[six] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "punpcklhw %[u], %[u], %[u] \n\t" - "psrah %[u], %[u], %[two] \n\t" - "punpcklhw %[v], %[v], %[v] \n\t" - "psrah %[v], %[v], %[two] \n\t" - "pminsh %[u], %[u], %[mask1] \n\t" - "pminsh %[v], %[v], %[mask1] \n\t" - - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t" - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t" - "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t" - "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t" - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t" - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t" - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x08 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), - [u]"=&f"(u), [v]"=&f"(v), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [alpha]"f"(-1), - [six]"f"(0x6), [five]"f"(0x55), - [mask]"f"(mask), [two]"f"(0x02), - [mask1]"f"(0x00ff00ff00ff00ff) - : "memory" - ); -} - -void I422AlphaToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - const uint8_t* src_a, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v,a; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - "gslwlc1 %[a], 0x03(%[a_ptr]) \n\t" - "gslwrc1 %[a], 0x00(%[a_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t"//u - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t" - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb - "packushb %[g_vec0], %[g_vec0], %[a] \n\t" - "punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t"//aaaagggg - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t" - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t" - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[a_ptr], %[a_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), [a]"=&f"(a), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [a_ptr]"r"(src_a), [zero]"f"(0x00), - [six]"f"(0x6), [five]"f"(0x55), - [mask]"f"(mask) - : "memory" - ); -} - -void I422ToRGB24Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t"//u - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t" - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t" - "packushb %[g_vec0], %[g_vec0], %[zero] \n\t" - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t" - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t" - - "punpckhwd %[r_vec0], %[g_vec0], %[g_vec0] \n\t" - "psllw %[r_vec1], %[r_vec0], %[lmove1] \n\t" - "or %[g_vec0], %[g_vec0], %[r_vec1] \n\t" - "psrlw %[r_vec1], %[r_vec0], %[rmove1] \n\t" - "pextrh %[r_vec1], %[r_vec1], %[zero] \n\t" - "pinsrh_2 %[g_vec0], %[g_vec0], %[r_vec1] \n\t" - "pextrh %[r_vec1], %[g_vec1], %[zero] \n\t" - "pinsrh_3 %[g_vec0], %[g_vec0], %[r_vec1] \n\t" - "pextrh %[r_vec1], %[g_vec1], %[one] \n\t" - "punpckhwd %[g_vec1], %[g_vec1], %[g_vec1] \n\t" - "psllw %[g_vec1], %[g_vec1], %[rmove1] \n\t" - "or %[g_vec1], %[g_vec1], %[r_vec1] \n\t" - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t" - "gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0c \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask]"f"(mask), - [lmove1]"f"(0x18), [rmove1]"f"(0x8), - [one]"f"(0x1) - : "memory" - ); -} - -void I422ToARGB4444Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb4444, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t"//u - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "and %[g_vec], %[g_vec], %[mask1] \n\t" - "psrlw %[g_vec], %[g_vec], %[four] \n\t" - "psrlw %[r_vec], %[g_vec], %[four] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "punpcklbh %[r_vec], %[alpha], %[zero] \n\t" - "and %[g_vec], %[g_vec], %[r_vec] \n\t" - - "and %[b_vec], %[b_vec], %[mask1] \n\t" - "psrlw %[b_vec], %[b_vec], %[four] \n\t" - "psrlw %[r_vec], %[b_vec], %[four] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "punpcklbh %[r_vec], %[alpha], %[zero] \n\t" - "and %[b_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[b_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[dst_argb4444]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[dst_argb4444]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[dst_argb4444], %[dst_argb4444], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00), - [four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0), - [alpha]"f"(-1) - : "memory" - ); -} - -void I422ToARGB1555Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb1555, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "psrlw %[temp], %[g_vec], %[three] \n\t" - "and %[g_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[eight] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "psrlw %[temp], %[temp], %[eight] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "or %[g_vec], %[g_vec], %[mask3] \n\t" - - "psrlw %[temp], %[b_vec], %[three] \n\t" - "and %[b_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[eight] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "psrlw %[temp], %[temp], %[eight] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "or %[b_vec], %[b_vec], %[mask3] \n\t" - - "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t" - "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t" - "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[dst_argb1555]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[dst_argb1555]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[dst_argb1555], %[dst_argb1555], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [three]"f"(0x3), [mask2]"f"(0x1f0000001f), - [eight]"f"(0x8), [mask3]"f"(0x800000008000), - [lmove5]"f"(0x5) - : "memory" - ); -} - -void I422ToRGB565Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "psrlh %[temp], %[g_vec], %[three] \n\t" - "and %[g_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[seven] \n\t" - "psrlw %[r_vec], %[mask1], %[eight] \n\t" - "and %[r_vec], %[temp], %[r_vec] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "paddb %[r_vec], %[three], %[six] \n\t" - "psrlw %[temp], %[temp], %[r_vec] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "paddb %[temp], %[three], %[eight] \n\t" - "psllw %[r_vec], %[r_vec], %[temp] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - - "psrlh %[temp], %[b_vec], %[three] \n\t" - "and %[b_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[seven] \n\t" - "psrlw %[r_vec], %[mask1], %[eight] \n\t" - "and %[r_vec], %[temp], %[r_vec] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "paddb %[r_vec], %[three], %[six] \n\t" - "psrlw %[temp], %[temp], %[r_vec] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "paddb %[temp], %[three], %[eight] \n\t" - "psllw %[r_vec], %[r_vec], %[temp] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - - "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t" - "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t" - "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [three]"f"(0x3), [mask2]"f"(0x1f0000001f), - [eight]"f"(0x8), [seven]"f"(0x7), - [lmove5]"f"(0x5) - : "memory" - ); -} - -void NV12ToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[vshu] \n\t" - "pshufh %[u], %[u], %[ushu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv), - [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1) - : "memory" - ); -} - -void NV21ToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[ushu] \n\t" - "pshufh %[u], %[u], %[vshu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu), - [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1) - : "memory" - ); -} - -void NV12ToRGB24Row_MMI(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[vshu] \n\t" - "pshufh %[u], %[u], %[ushu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t" - "psllw %[temp], %[r_vec], %[lmove1] \n\t" - "or %[g_vec], %[g_vec], %[temp] \n\t" - "psrlw %[temp], %[r_vec], %[rmove1] \n\t" - "pextrh %[temp], %[temp], %[zero] \n\t" - "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t" - "pextrh %[temp], %[b_vec], %[zero] \n\t" - "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t" - "pextrh %[temp], %[b_vec], %[one] \n\t" - "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t" - "psllw %[b_vec], %[b_vec], %[rmove1] \n\t" - "or %[b_vec], %[b_vec], %[temp] \n\t" - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t" - "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv), - [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1), [lmove1]"f"(0x18), - [one]"f"(0x1), [rmove1]"f"(0x8) - : "memory" - ); -} - -void NV21ToRGB24Row_MMI(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[ushu] \n\t" - "pshufh %[u], %[u], %[vshu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t" - "psllw %[temp], %[r_vec], %[lmove1] \n\t" - "or %[g_vec], %[g_vec], %[temp] \n\t" - "psrlw %[temp], %[r_vec], %[rmove1] \n\t" - "pextrh %[temp], %[temp], %[zero] \n\t" - "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t" - "pextrh %[temp], %[b_vec], %[zero] \n\t" - "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t" - "pextrh %[temp], %[b_vec], %[one] \n\t" - "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t" - "psllw %[b_vec], %[b_vec], %[rmove1] \n\t" - "or %[b_vec], %[b_vec], %[temp] \n\t" - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t" - "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu), - [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [lmove1]"f"(0x18), [rmove1]"f"(0x8), - [one]"f"(0x1) - : "memory" - ); -} - -void NV12ToRGB565Row_MMI(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[vshu] \n\t" - "pshufh %[u], %[u], %[ushu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "psrlh %[temp], %[g_vec], %[three] \n\t" - "and %[g_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[seven] \n\t" - "psrlw %[r_vec], %[mask1], %[eight] \n\t" - "and %[r_vec], %[temp], %[r_vec] \n\t" - "psubb %[y], %[eight], %[three] \n\t"//5 - "psllw %[r_vec], %[r_vec], %[y] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "paddb %[r_vec], %[three], %[six] \n\t" - "psrlw %[temp], %[temp], %[r_vec] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "paddb %[temp], %[three], %[eight] \n\t" - "psllw %[r_vec], %[r_vec], %[temp] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - - "psrlh %[temp], %[b_vec], %[three] \n\t" - "and %[b_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[seven] \n\t" - "psrlw %[r_vec], %[mask1], %[eight] \n\t" - "and %[r_vec], %[temp], %[r_vec] \n\t" - "psubb %[y], %[eight], %[three] \n\t"//5 - "psllw %[r_vec], %[r_vec], %[y] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "paddb %[r_vec], %[three], %[six] \n\t" - "psrlw %[temp], %[temp], %[r_vec] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "paddb %[temp], %[three], %[eight] \n\t" - "psllw %[r_vec], %[r_vec], %[temp] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - - "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t" - "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t" - "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t" - "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv), - [dst_rgb565]"r"(dst_rgb565), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [three]"f"(0x3), [mask2]"f"(0x1f0000001f), - [eight]"f"(0x8), [seven]"f"(0x7) - : "memory" - ); -} - -void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gsldlc1 %[y], 0x07(%[yuy2_ptr]) \n\t" - "gsldrc1 %[y], 0x00(%[yuy2_ptr]) \n\t" - "psrlh %[temp], %[y], %[eight] \n\t" - "pshufh %[u], %[temp], %[ushu] \n\t" - "pshufh %[v], %[temp], %[vshu] \n\t" - - "psrlh %[temp], %[mask1], %[eight] \n\t" - "and %[y], %[y], %[temp] \n\t" - "psllh %[temp], %[y], %[eight] \n\t" - "or %[y], %[y], %[temp] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[yuy2_ptr], %[yuy2_ptr], 0x08 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [yuy2_ptr]"r"(src_yuy2), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1), [eight]"f"(0x8) - : "memory" - ); -} - -void UYVYToARGBRow_MMI(const uint8_t* src_uyvy, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gsldlc1 %[y], 0x07(%[uyvy_ptr]) \n\t" - "gsldrc1 %[y], 0x00(%[uyvy_ptr]) \n\t" - "psrlh %[temp], %[mask1], %[eight] \n\t" - "and %[temp], %[y], %[temp] \n\t" - "pshufh %[u], %[temp], %[ushu] \n\t" - "pshufh %[v], %[temp], %[vshu] \n\t" - - "psrlh %[y], %[y], %[eight] \n\t" - "psllh %[temp], %[y], %[eight] \n\t" - "or %[y], %[y], %[temp] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[uyvy_ptr], %[uyvy_ptr], 0x08 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [uyvy_ptr]"r"(src_uyvy), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1), [eight]"f"(0x8) - : "memory" - ); -} - -void I422ToRGBARow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "punpcklbh %[u], %[u], %[u] \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[alpha], %[g_vec] \n\t" - "punpcklbh %[b_vec], %[g_vec], %[r_vec] \n\t" - "punpckhbh %[r_vec], %[g_vec], %[r_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [alpha]"f"(-1) - : "memory" - ); -} - -void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) { - __asm__ volatile ( - "punpcklwd %[v32], %[v32], %[v32] \n\t" - "1: \n\t" - "gssdlc1 %[v32], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[v32], 0x00(%[dst_ptr]) \n\t" - "gssdlc1 %[v32], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[v32], 0x08(%[dst_ptr]) \n\t" - - "daddi %[width], %[width], -0x04 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "bnez %[width], 1b \n\t" - : [v32]"+&f"(v32) - : [dst_ptr]"r"(dst_argb), [width]"r"(width) - : "memory" - ); -} -// clang-format on - -// 10 bit YUV to ARGB -#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/files/source/scale_mmi.cc b/files/source/scale_mmi.cc deleted file mode 100644 index 1226ef3e..00000000 --- a/files/source/scale_mmi.cc +++ /dev/null @@ -1,1168 +0,0 @@ -/* - * Copyright 2013 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/scale.h" - -#include <assert.h> -#include <string.h> - -#include "libyuv/cpu_id.h" -#include "libyuv/planar_functions.h" // For CopyARGB -#include "libyuv/row.h" -#include "libyuv/scale_row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Mips MMI. -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -// clang-format off - -// CPU agnostic row functions -void ScaleRowDown2_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1, dest; - const uint64_t shift = 0x8ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "psrlh %[src0], %[src0], %[shift] \n\t" - - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "psrlh %[src1], %[src1], %[shift] \n\t" - - "packushb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift] "f"(shift) - : "memory"); -} - -void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest0, dest1; - - const uint64_t mask = 0x00ff00ff00ff00ffULL; - const uint64_t shift = 0x8ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "and %[dest0], %[src0], %[mask] \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "and %[dest1], %[src1], %[mask] \n\t" - "packushb %[dest0], %[dest0], %[dest1] \n\t" - - "psrlh %[src0], %[src0], %[shift] \n\t" - "psrlh %[src1], %[src1], %[shift] \n\t" - "packushb %[dest1], %[src0], %[src1] \n\t" - - "pavgb %[dest], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [mask] "f"(mask), - [shift] "f"(shift), [width] "r"(dst_width) - : "memory"); -} - -void ScaleRowDown2Box_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - const uint8_t* s = src_ptr; - const uint8_t* t = src_ptr + src_stride; - - uint64_t s0, s1, t0, t1; - uint64_t dest, dest0, dest1; - - const uint64_t ph = 0x0002000200020002ULL; - const uint64_t mask = 0x00ff00ff00ff00ffULL; - const uint64_t shift0 = 0x2ULL; - const uint64_t shift1 = 0x8ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[s0], 0x00(%[s]) \n\t" - "gsldlc1 %[s0], 0x07(%[s]) \n\t" - "psrlh %[s1], %[s0], %[shift1] \n\t" - "and %[s0], %[s0], %[mask] \n\t" - - "gsldrc1 %[t0], 0x00(%[t]) \n\t" - "gsldlc1 %[t0], 0x07(%[t]) \n\t" - "psrlh %[t1], %[t0], %[shift1] \n\t" - "and %[t0], %[t0], %[mask] \n\t" - - "paddh %[dest0], %[s0], %[s1] \n\t" - "paddh %[dest0], %[dest0], %[t0] \n\t" - "paddh %[dest0], %[dest0], %[t1] \n\t" - "paddh %[dest0], %[dest0], %[ph] \n\t" - "psrlh %[dest0], %[dest0], %[shift0] \n\t" - - "gsldrc1 %[s0], 0x08(%[s]) \n\t" - "gsldlc1 %[s0], 0x0f(%[s]) \n\t" - "psrlh %[s1], %[s0], %[shift1] \n\t" - "and %[s0], %[s0], %[mask] \n\t" - - "gsldrc1 %[t0], 0x08(%[t]) \n\t" - "gsldlc1 %[t0], 0x0f(%[t]) \n\t" - "psrlh %[t1], %[t0], %[shift1] \n\t" - "and %[t0], %[t0], %[mask] \n\t" - - "paddh %[dest1], %[s0], %[s1] \n\t" - "paddh %[dest1], %[dest1], %[t0] \n\t" - "paddh %[dest1], %[dest1], %[t1] \n\t" - "paddh %[dest1], %[dest1], %[ph] \n\t" - "psrlh %[dest1], %[dest1], %[shift0] \n\t" - - "packushb %[dest], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[s], %[s], 0x10 \n\t" - "daddiu %[t], %[t], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest] "=&f"(dest) - : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph), - [mask] "f"(mask) - : "memory"); -} - -void ScaleARGBRowDown2_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - (void)src_stride; - - const uint32_t* src = (const uint32_t*)(src_argb); - uint32_t* dst = (uint32_t*)(dst_argb); - - uint64_t src0, src1, dest; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "punpckhwd %[dest], %[src0], %[src1] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width) - : "memory"); -} - -void ScaleARGBRowDown2Linear_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest_hi, dest_lo; - - __asm__ volatile( - "1: \n\t" - "lwc1 %[src0], 0x00(%[src_ptr]) \n\t" - "lwc1 %[src1], 0x08(%[src_ptr]) \n\t" - "punpcklwd %[dest_lo], %[src0], %[src1] \n\t" - "lwc1 %[src0], 0x04(%[src_ptr]) \n\t" - "lwc1 %[src1], 0x0c(%[src_ptr]) \n\t" - "punpcklwd %[dest_hi], %[src0], %[src1] \n\t" - - "pavgb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width) - : "memory"); -} - -void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - const uint8_t* s = src_argb; - const uint8_t* t = src_argb + src_stride; - - uint64_t s0, s_hi, s_lo; - uint64_t t0, t_hi, t_lo; - uint64_t dest, dest_hi, dest_lo; - - const uint64_t mask = 0x0ULL; - const uint64_t ph = 0x0002000200020002ULL; - const uint64_t shfit = 0x2ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[s0], 0x00(%[s]) \n\t" - "gsldlc1 %[s0], 0x07(%[s]) \n\t" - "punpcklbh %[s_lo], %[s0], %[mask] \n\t" - "punpckhbh %[s_hi], %[s0], %[mask] \n\t" - "paddh %[dest_lo], %[s_lo], %[s_hi] \n\t" - - "gsldrc1 %[t0], 0x00(%[t]) \n\t" - "gsldlc1 %[t0], 0x07(%[t]) \n\t" - "punpcklbh %[t_lo], %[t0], %[mask] \n\t" - "punpckhbh %[t_hi], %[t0], %[mask] \n\t" - "paddh %[dest_lo], %[dest_lo], %[t_lo] \n\t" - "paddh %[dest_lo], %[dest_lo], %[t_hi] \n\t" - - "paddh %[dest_lo], %[dest_lo], %[ph] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shfit] \n\t" - - "gsldrc1 %[s0], 0x08(%[s]) \n\t" - "gsldlc1 %[s0], 0x0f(%[s]) \n\t" - "punpcklbh %[s_lo], %[s0], %[mask] \n\t" - "punpckhbh %[s_hi], %[s0], %[mask] \n\t" - "paddh %[dest_hi], %[s_lo], %[s_hi] \n\t" - - "gsldrc1 %[t0], 0x08(%[t]) \n\t" - "gsldlc1 %[t0], 0x0f(%[t]) \n\t" - "punpcklbh %[t_lo], %[t0], %[mask] \n\t" - "punpckhbh %[t_hi], %[t0], %[mask] \n\t" - "paddh %[dest_hi], %[dest_hi], %[t_lo] \n\t" - "paddh %[dest_hi], %[dest_hi], %[t_hi] \n\t" - - "paddh %[dest_hi], %[dest_hi], %[ph] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shfit] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[s], %[s], 0x10 \n\t" - "daddiu %[t], %[t], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [s0] "=&f"(s0), [t0] "=&f"(t0), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo), - [t_hi] "=&f"(t_hi), [t_lo] "=&f"(t_lo), [dest] "=&f"(dest) - : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width), - [mask] "f"(mask), [ph] "f"(ph), [shfit] "f"(shfit) - : "memory"); -} - -void ScaleRowDown2_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1, dest; - const uint64_t shift = 0x10ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "psrlw %[src0], %[src0], %[shift] \n\t" - - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "psrlw %[src1], %[src1], %[shift] \n\t" - - "packsswh %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift] "f"(shift) - : "memory"); -} - -void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest_hi, dest_lo; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "punpcklhw %[dest_lo], %[src0], %[src1] \n\t" - "punpckhhw %[dest_hi], %[src0], %[src1] \n\t" - - "punpcklhw %[src0], %[dest_lo], %[dest_hi] \n\t" - "punpckhhw %[src1], %[dest_lo], %[dest_hi] \n\t" - - "pavgh %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width) - : "memory"); -} - -void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - const uint16_t* s = src_ptr; - const uint16_t* t = src_ptr + src_stride; - - uint64_t s0, s1, s_hi, s_lo; - uint64_t t0, t1, t_hi, t_lo; - uint64_t dest, dest0, dest1; - - const uint64_t ph = 0x0000000200000002ULL; - const uint64_t mask = 0x0000ffff0000ffffULL; - const uint64_t shift0 = 0x10ULL; - const uint64_t shift1 = 0x2ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[s0], 0x00(%[s]) \n\t" - "gsldlc1 %[s0], 0x07(%[s]) \n\t" - "psrlw %[s1], %[s0], %[shift0] \n\t" - "and %[s0], %[s0], %[mask] \n\t" - - "gsldrc1 %[t0], 0x00(%[t]) \n\t" - "gsldlc1 %[t0], 0x07(%[t]) \n\t" - "psrlw %[t1], %[t0], %[shift0] \n\t" - "and %[t0], %[t0], %[mask] \n\t" - - "paddw %[dest0], %[s0], %[s1] \n\t" - "paddw %[dest0], %[dest0], %[t0] \n\t" - "paddw %[dest0], %[dest0], %[t1] \n\t" - "paddw %[dest0], %[dest0], %[ph] \n\t" - "psrlw %[dest0], %[dest0], %[shift1] \n\t" - - "gsldrc1 %[s0], 0x08(%[s]) \n\t" - "gsldlc1 %[s0], 0x0f(%[s]) \n\t" - "psrlw %[s1], %[s0], %[shift0] \n\t" - "and %[s0], %[s0], %[mask] \n\t" - - "gsldrc1 %[t0], 0x08(%[t]) \n\t" - "gsldlc1 %[t0], 0x0f(%[t]) \n\t" - "psrlw %[t1], %[t0], %[shift0] \n\t" - "and %[t0], %[t0], %[mask] \n\t" - - "paddw %[dest1], %[s0], %[s1] \n\t" - "paddw %[dest1], %[dest1], %[t0] \n\t" - "paddw %[dest1], %[dest1], %[t1] \n\t" - "paddw %[dest1], %[dest1], %[ph] \n\t" - "psrlw %[dest1], %[dest1], %[shift1] \n\t" - - "packsswh %[dest], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[s], %[s], 0x10 \n\t" - "daddiu %[t], %[t], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1), - [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo), [t_hi] "=&f"(t_hi), - [t_lo] "=&f"(t_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), - [dest] "=&f"(dest) - : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph), - [mask] "f"(mask) - : "memory"); -} - -void ScaleRowDown4_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest_hi, dest_lo; - - const uint64_t shift = 0x10ULL; - const uint64_t mask = 0x000000ff000000ffULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "psrlw %[src0], %[src0], %[shift] \n\t" - "and %[src0], %[src0], %[mask] \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "psrlw %[src1], %[src1], %[shift] \n\t" - "and %[src1], %[src1], %[mask] \n\t" - "packsswh %[dest_lo], %[src0], %[src1] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t" - "psrlw %[src0], %[src0], %[shift] \n\t" - "and %[src0], %[src0], %[mask] \n\t" - "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t" - "psrlw %[src1], %[src1], %[shift] \n\t" - "and %[src1], %[src1], %[mask] \n\t" - "packsswh %[dest_hi], %[src0], %[src1] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift] "f"(shift), [mask] "f"(mask) - : "memory"); -} - -void ScaleRowDown4_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest_hi, dest_lo; - - const uint64_t mask = 0x0ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "punpckhhw %[dest_lo], %[src0], %[src1] \n\t" - "punpcklhw %[dest_lo], %[dest_lo], %[mask] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t" - "punpckhhw %[dest_hi], %[src0], %[src1] \n\t" - "punpcklhw %[dest_hi], %[dest_hi], %[mask] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [mask] "f"(mask) - : "memory"); -} - -#define DO_SCALEROWDOWN4BOX_PUNPCKADD() \ - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \ - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \ - "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \ - "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" - -#define DO_SCALEROWDOWN4BOX_LOOP(reg) \ - "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \ - "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \ - "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \ - \ - "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_PUNPCKADD() \ - \ - "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_PUNPCKADD() \ - \ - "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_PUNPCKADD() \ - \ - "pmaddhw %[dest_lo], %[dest_lo], %[mask1] \n\t" \ - "pmaddhw %[dest_hi], %[dest_hi], %[mask1] \n\t" \ - "packsswh " #reg ", %[dest_lo], %[dest_hi] \n\t" \ - "pmaddhw " #reg ", " #reg ", %[mask1] \n\t" \ - "paddh " #reg ", " #reg ", %[ph] \n\t" \ - "psrlh " #reg ", " #reg ", %[shift] \n\t" \ - \ - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \ - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \ - "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \ - "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t" - -/* LibYUVScaleTest.ScaleDownBy4_Box */ -void ScaleRowDown4Box_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - const uint8_t* src0_ptr = src_ptr; - const uint8_t* src1_ptr = src_ptr + src_stride; - const uint8_t* src2_ptr = src_ptr + src_stride * 2; - const uint8_t* src3_ptr = src_ptr + src_stride * 3; - - uint64_t src, src_hi, src_lo; - uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3; - - const uint64_t mask0 = 0x0ULL; - const uint64_t mask1 = 0x0001000100010001ULL; - const uint64_t ph = 0x0008000800080008ULL; - const uint64_t shift = 0x4ULL; - - __asm__ volatile( - "1: \n\t" - - DO_SCALEROWDOWN4BOX_LOOP(%[dest0]) - DO_SCALEROWDOWN4BOX_LOOP(%[dest1]) - DO_SCALEROWDOWN4BOX_LOOP(%[dest2]) - DO_SCALEROWDOWN4BOX_LOOP(%[dest3]) - - "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" - "packsswh %[dest_hi], %[dest2], %[dest3] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest) - : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), - [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst), - [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0), - [ph] "f"(ph), [mask1] "f"(mask1) - : "memory"); -} - -#define DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \ - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \ - "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \ - "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" - -#define DO_SCALEROWDOWN4BOX_16_LOOP(reg) \ - "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \ - "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \ - "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \ - \ - "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ - \ - "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ - \ - "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ - \ - "paddw %[dest], %[dest_lo], %[dest_hi] \n\t" \ - "punpckhwd %[dest_hi], %[dest], %[dest] \n\t" \ - "paddw %[dest], %[dest_hi], %[dest] \n\t" \ - "paddw %[dest], %[dest], %[ph] \n\t" \ - "psraw %[dest], %[dest], %[shift] \n\t" \ - "and " #reg ", %[dest], %[mask1] \n\t" \ - \ - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \ - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \ - "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \ - "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t" - -/* LibYUVScaleTest.ScaleDownBy4_Box_16 */ -void ScaleRowDown4Box_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - const uint16_t* src0_ptr = src_ptr; - const uint16_t* src1_ptr = src_ptr + src_stride; - const uint16_t* src2_ptr = src_ptr + src_stride * 2; - const uint16_t* src3_ptr = src_ptr + src_stride * 3; - - uint64_t src, src_hi, src_lo; - uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3; - - const uint64_t mask0 = 0x0ULL; - const uint64_t mask1 = 0x00000000ffffffffULL; - const uint64_t ph = 0x0000000800000008ULL; - const uint64_t shift = 0x04ULL; - - __asm__ volatile( - "1: \n\t" - - DO_SCALEROWDOWN4BOX_16_LOOP(%[dest0]) - DO_SCALEROWDOWN4BOX_16_LOOP(%[dest1]) - DO_SCALEROWDOWN4BOX_16_LOOP(%[dest2]) - DO_SCALEROWDOWN4BOX_16_LOOP(%[dest3]) - "punpcklwd %[dest_lo], %[dest0], %[dest1] \n\t" - "punpcklwd %[dest_hi], %[dest2], %[dest3] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest) - : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), - [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst), - [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0), - [ph] "f"(ph), [mask1] "f"(mask1) - : "memory"); -} - -// Scales a single row of pixels up by 2x using point sampling. -void ScaleColsUp2_MMI(uint8_t* dst_ptr, - const uint8_t* src_ptr, - int dst_width, - int x, - int dx) { - uint64_t src, dest; - - (void)x; - (void)dx; - - __asm__ volatile( - "1: \n\t" - "lwc1 %[src], 0x00(%[src_ptr]) \n\t" - - "punpcklbh %[dest], %[src], %[src] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width) - : "memory"); -} - -void ScaleColsUp2_16_MMI(uint16_t* dst_ptr, - const uint16_t* src_ptr, - int dst_width, - int x, - int dx) { - uint64_t src, dest; - - (void)x; - (void)dx; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - - "punpcklhw %[dest], %[src], %[src] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "punpckhhw %[dest], %[src], %[src] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width) - : "memory"); -} - -void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { - uint64_t src, src_hi, src_lo, dest0, dest1; - const uint64_t mask = 0x0ULL; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[mask] \n\t" - - "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "paddush %[dest0], %[dest0], %[src_lo] \n\t" - "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "paddush %[dest1], %[dest1], %[src_hi] \n\t" - - "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [src] "=&f"(src) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width), - [mask] "f"(mask) - : "memory"); -} - -void ScaleAddRow_16_MMI(const uint16_t* src_ptr, - uint32_t* dst_ptr, - int src_width) { - uint64_t src, src_hi, src_lo, dest0, dest1; - const uint64_t mask = 0x0ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "punpcklhw %[src_lo], %[src], %[mask] \n\t" - "punpckhhw %[src_hi], %[src], %[mask] \n\t" - - "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "paddw %[dest0], %[dest0], %[src_lo] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - - "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "paddw %[dest1], %[dest1], %[src_hi] \n\t" - "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [src] "=&f"(src) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width), - [mask] "f"(mask) - : "memory"); -} - -void ScaleARGBRowDownEven_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_argb, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1, dest; - - __asm__ volatile( - "1: \n\t" - "lwc1 %[src0], 0x00(%[src_ptr]) \n\t" - "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t" - "lwc1 %[src1], 0x00(%[src_ptr]) \n\t" - "punpcklwd %[dest], %[src0], %[src1] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), - [src_stepx_4] "r"(src_stepx << 2), [width] "r"(dst_width) - : "memory"); -} - -void ScaleARGBRowDownEvenBox_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_argb, - int dst_width) { - const uint8_t* src0_ptr = src_argb; - const uint8_t* src1_ptr = src_argb + src_stride; - - uint64_t src0, src1, src_hi, src_lo; - uint64_t dest, dest_hi, dest_lo, dest0, dest1; - - const uint64_t mask = 0x0ULL; - const uint64_t ph = 0x0002000200020002ULL; - const uint64_t shift = 0x2ULL; - - __asm__ volatile( - "1: \n\t" - - "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[dest_lo], %[src0], %[mask] \n\t" - "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t" - "punpcklbh %[dest_hi], %[src0], %[mask] \n\t" - - "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src_lo], %[src1], %[mask] \n\t" - "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t" - "punpcklbh %[src_hi], %[src1], %[mask] \n\t" - "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" - "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" - "paddh %[dest0], %[dest_hi], %[dest_lo] \n\t" - "paddh %[dest0], %[dest0], %[ph] \n\t" - "psrlh %[dest0], %[dest0], %[shift] \n\t" - - "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t" - "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t" - - "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[dest_lo], %[src0], %[mask] \n\t" - "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t" - "punpcklbh %[dest_hi], %[src0], %[mask] \n\t" - - "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src_lo], %[src1], %[mask] \n\t" - "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t" - "punpcklbh %[src_hi], %[src1], %[mask] \n\t" - "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" - "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" - "paddh %[dest1], %[dest_hi], %[dest_lo] \n\t" - "paddh %[dest1], %[dest1], %[ph] \n\t" - "psrlh %[dest1], %[dest1], %[shift] \n\t" - - "packushb %[dest], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t" - "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0), - [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), - [dst_ptr] "r"(dst_argb), [width] "r"(dst_width), - [src_stepx_4] "r"(src_stepx << 2), [shift] "f"(shift), [mask] "f"(mask), - [ph] "f"(ph) - : "memory"); -} - -// Scales a single row of pixels using point sampling. -void ScaleARGBCols_MMI(uint8_t* dst_argb, - const uint8_t* src_argb, - int dst_width, - int x, - int dx) { - const uint32_t* src = (const uint32_t*)(src_argb); - uint32_t* dst = (uint32_t*)(dst_argb); - - const uint32_t* src_tmp; - - uint64_t dest, offset; - - const uint64_t shift0 = 16; - const uint64_t shift1 = 2; - - __asm__ volatile( - "1: \n\t" - "srav %[offset], %[x], %[shift0] \n\t" - "sllv %[offset], %[offset], %[shift1] \n\t" - "dadd %[src_tmp], %[src_ptr], %[offset] \n\t" - "lwc1 %[dest], 0x00(%[src_tmp]) \n\t" - "swc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "dadd %[x], %[x], %[dx] \n\t" - - "daddiu %[dst_ptr], %[dst_ptr], 0x04 \n\t" - "daddi %[width], %[width], -0x01 \n\t" - "bnez %[width], 1b \n\t" - : [dest] "=&f"(dest), [offset] "=&r"(offset), [src_tmp] "=&r"(src_tmp) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [dx] "r"(dx), [x] "r"(x), [shift0] "r"(shift0), [shift1] "r"(shift1) - : "memory"); -} - -// Scales a single row of pixels up by 2x using point sampling. -void ScaleARGBColsUp2_MMI(uint8_t* dst_argb, - const uint8_t* src_argb, - int dst_width, - int x, - int dx) { - uint64_t src, dest0, dest1; - (void)x; - (void)dx; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "punpcklwd %[dest0], %[src], %[src] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "punpckhwd %[dest1], %[src], %[src] \n\t" - "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src] "=&f"(src) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width) - : "memory"); -} - -// Divide num by div and return as 16.16 fixed point result. -/* LibYUVBaseTest.TestFixedDiv */ -int FixedDiv_MIPS(int num, int div) { - int quotient = 0; - const int shift = 16; - - asm( - "dsll %[num], %[num], %[shift] \n\t" - "ddiv %[num], %[div] \t\n" - "mflo %[quo] \t\n" - : [quo] "+&r"(quotient) - : [num] "r"(num), [div] "r"(div), [shift] "r"(shift)); - - return quotient; -} - -// Divide num by div and return as 16.16 fixed point result. -/* LibYUVScaleTest.ARGBScaleTo320x240_Linear */ -int FixedDiv1_MIPS(int num, int div) { - int quotient = 0; - const int shift = 16; - const int val1 = 1; - const int64_t val11 = 0x00010001ULL; - - asm( - "dsll %[num], %[num], %[shift] \n\t" - "dsub %[num], %[num], %[val11] \n\t" - "dsub %[div], %[div], %[val1] \n\t" - "ddiv %[num], %[div] \t\n" - "mflo %[quo] \t\n" - : [quo] "+&r"(quotient) - : [num] "r"(num), [div] "r"(div), [val1] "r"(val1), [val11] "r"(val11), - [shift] "r"(shift)); - - return quotient; -} - -// Read 8x2 upsample with filtering and write 16x1. -// actually reads an extra pixel, so 9x2. -void ScaleRowUp2_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - const uint16_t* src2_ptr = src_ptr + src_stride; - - uint64_t src0, src1; - uint64_t dest, dest04, dest15, dest26, dest37; - uint64_t tmp0, tmp1, tmp2, tmp3; - - const uint64_t mask0 = 0x0003000900030009ULL; - const uint64_t mask1 = 0x0001000300010003ULL; - const uint64_t mask2 = 0x0009000300090003ULL; - const uint64_t mask3 = 0x0003000100030001ULL; - const uint64_t ph = 0x0000000800000008ULL; - const uint64_t shift = 4; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src1_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src1_ptr]) \n\t" - "pmaddhw %[dest04], %[src0], %[mask0] \n\t" - "gsldrc1 %[src1], 0x00(%[src2_ptr]) \n\t" - "gsldlc1 %[src1], 0x07(%[src2_ptr]) \n\t" - "pmaddhw %[dest], %[src1], %[mask1] \n\t" - "paddw %[dest04], %[dest04], %[dest] \n\t" - "paddw %[dest04], %[dest04], %[ph] \n\t" - "psrlw %[dest04], %[dest04], %[shift] \n\t" - - "pmaddhw %[dest15], %[src0], %[mask2] \n\t" - "pmaddhw %[dest], %[src1], %[mask3] \n\t" - "paddw %[dest15], %[dest15], %[dest] \n\t" - "paddw %[dest15], %[dest15], %[ph] \n\t" - "psrlw %[dest15], %[dest15], %[shift] \n\t" - - "gsldrc1 %[src0], 0x02(%[src1_ptr]) \n\t" - "gsldlc1 %[src0], 0x09(%[src1_ptr]) \n\t" - "pmaddhw %[dest26], %[src0], %[mask0] \n\t" - "gsldrc1 %[src1], 0x02(%[src2_ptr]) \n\t" - "gsldlc1 %[src1], 0x09(%[src2_ptr]) \n\t" - "pmaddhw %[dest], %[src1], %[mask1] \n\t" - "paddw %[dest26], %[dest26], %[dest] \n\t" - "paddw %[dest26], %[dest26], %[ph] \n\t" - "psrlw %[dest26], %[dest26], %[shift] \n\t" - - "pmaddhw %[dest37], %[src0], %[mask2] \n\t" - "pmaddhw %[dest], %[src1], %[mask3] \n\t" - "paddw %[dest37], %[dest37], %[dest] \n\t" - "paddw %[dest37], %[dest37], %[ph] \n\t" - "psrlw %[dest37], %[dest37], %[shift] \n\t" - - /* tmp0 = ( 00 04 02 06 ) */ - "packsswh %[tmp0], %[dest04], %[dest26] \n\t" - /* tmp1 = ( 01 05 03 07 ) */ - "packsswh %[tmp1], %[dest15], %[dest37] \n\t" - - /* tmp2 = ( 00 01 04 05 )*/ - "punpcklhw %[tmp2], %[tmp0], %[tmp1] \n\t" - /* tmp3 = ( 02 03 06 07 )*/ - "punpckhhw %[tmp3], %[tmp0], %[tmp1] \n\t" - - /* ( 00 01 02 03 ) */ - "punpcklwd %[dest], %[tmp2], %[tmp3] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - /* ( 04 05 06 07 ) */ - "punpckhwd %[dest], %[tmp2], %[tmp3] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest04] "=&f"(dest04), - [dest15] "=&f"(dest15), [dest26] "=&f"(dest26), [dest37] "=&f"(dest37), - [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), - [tmp3] "=&f"(tmp3), [dest] "=&f"(dest) - : [src1_ptr] "r"(src_ptr), [src2_ptr] "r"(src2_ptr), [dst_ptr] "r"(dst), - [width] "r"(dst_width), [mask0] "f"(mask0), [mask1] "f"(mask1), - [mask2] "f"(mask2), [mask3] "f"(mask3), [shift] "f"(shift), [ph] "f"(ph) - : "memory"); -} - -void ScaleRowDown34_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - (void)src_stride; - assert((dst_width % 3 == 0) && (dst_width > 0)); - uint64_t src[2]; - uint64_t tmp[2]; - __asm__ volatile ( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "and %[tmp1], %[src0], %[mask1] \n\t" - "psrlw %[tmp0], %[src0], %[rmov] \n\t" - "psllw %[tmp0], %[tmp0], %[lmov1] \n\t" - "or %[src0], %[tmp0], %[tmp1] \n\t" - "punpckhwd %[tmp0], %[src0], %[src0] \n\t" - "psllw %[tmp1], %[tmp0], %[rmov] \n\t" - "or %[src0], %[src0], %[tmp1] \n\t" - "psrlw %[tmp0], %[tmp0], %[rmov8] \n\t" - "pextrh %[tmp0], %[tmp0], %[zero] \n\t" - "pinsrh_2 %[src0], %[src0], %[tmp0] \n\t" - "pextrh %[tmp0], %[src1], %[zero] \n\t" - "pinsrh_3 %[src0], %[src0], %[tmp0] \n\t" - - "punpckhwd %[tmp0], %[src1], %[src1] \n\t" - "pextrh %[tmp1], %[tmp0], %[zero] \n\t" - "psrlw %[src1], %[src1], %[rmov] \n\t" - "psllw %[tmp1], %[tmp1], %[rmov8] \n\t" - "or %[src1], %[src1], %[tmp1] \n\t" - "and %[tmp0], %[tmp0], %[mask2] \n\t" - "or %[src1], %[src1], %[tmp0] \n\t" - - "gssdlc1 %[src0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[src0], 0x00(%[dst_ptr]) \n\t" - "gsswlc1 %[src1], 0x0b(%[dst_ptr]) \n\t" - "gsswrc1 %[src1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x0c \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t" - "bnez %[width], 1b \n\t" - - : [src0]"=&f"(src[0]), [src1]"=&f"(src[1]), - [tmp0]"=&f"(tmp[0]), [tmp1]"=&f"(tmp[1]) - : [src_ptr]"r"(src_ptr), [dst_ptr]"r"(dst), - [lmov]"f"(0xc), [rmov]"f"(0x18), - [mask1]"f"(0xffff0000ffff), [rmov8]"f"(0x8), - [zero]"f"(0x0), [mask2]"f"(0xff000000), - [width]"r"(dst_width), [lmov1]"f"(0x10) - : "memory" - ); -} -// clang-format on - -#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/files/tools_libyuv/autoroller/roll_deps.py b/files/tools_libyuv/autoroller/roll_deps.py deleted file mode 100755 index 977c86de..00000000 --- a/files/tools_libyuv/autoroller/roll_deps.py +++ /dev/null @@ -1,509 +0,0 @@ -#!/usr/bin/env vpython3 - -# Copyright 2017 The LibYuv Project Authors. All rights reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -# This is a modified copy of the script in -# https://webrtc.googlesource.com/src/+/master/tools_webrtc/autoroller/roll_deps.py -# customized for libyuv. - -"""Script to automatically roll dependencies in the libyuv DEPS file.""" - -import argparse -import base64 -import collections -import logging -import os -import re -import subprocess -import sys -import urllib.request - - -# Skip these dependencies (list without solution name prefix). -DONT_AUTOROLL_THESE = [ - 'src/third_party/gflags/src', -] - -LIBYUV_URL = 'https://chromium.googlesource.com/libyuv/libyuv' -CHROMIUM_SRC_URL = 'https://chromium.googlesource.com/chromium/src' -CHROMIUM_COMMIT_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s' -CHROMIUM_LOG_TEMPLATE = CHROMIUM_SRC_URL + '/+log/%s' -CHROMIUM_FILE_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s/%s' - -COMMIT_POSITION_RE = re.compile('^Cr-Commit-Position: .*#([0-9]+).*$') -CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'([0-9a-z-]+)\'$') -ROLL_BRANCH_NAME = 'roll_chromium_revision' - -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) -CHECKOUT_SRC_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.pardir, - os.pardir)) -CHECKOUT_ROOT_DIR = os.path.realpath(os.path.join(CHECKOUT_SRC_DIR, os.pardir)) - -sys.path.append(os.path.join(CHECKOUT_SRC_DIR, 'build')) -import find_depot_tools # pylint: disable=wrong-import-position -find_depot_tools.add_depot_tools_to_path() - -CLANG_UPDATE_SCRIPT_URL_PATH = 'tools/clang/scripts/update.py' -CLANG_UPDATE_SCRIPT_LOCAL_PATH = os.path.join(CHECKOUT_SRC_DIR, 'tools', - 'clang', 'scripts', 'update.py') - -DepsEntry = collections.namedtuple('DepsEntry', 'path url revision') -ChangedDep = collections.namedtuple('ChangedDep', - 'path url current_rev new_rev') - -class RollError(Exception): - pass - - -def VarLookup(local_scope): - return lambda var_name: local_scope['vars'][var_name] - - -def ParseDepsDict(deps_content): - local_scope = {} - global_scope = { - 'Var': VarLookup(local_scope), - 'Str': lambda s: s, - 'deps_os': {}, - } - exec(deps_content, global_scope, local_scope) - return local_scope - - -def ParseLocalDepsFile(filename): - with open(filename, 'rb') as f: - deps_content = f.read().decode('utf-8') - return ParseDepsDict(deps_content) - - -def ParseRemoteCrDepsFile(revision): - deps_content = ReadRemoteCrFile('DEPS', revision) - return ParseDepsDict(deps_content) - - -def ParseCommitPosition(commit_message): - for line in reversed(commit_message.splitlines()): - m = COMMIT_POSITION_RE.match(line.strip()) - if m: - return int(m.group(1)) - logging.error('Failed to parse commit position id from:\n%s\n', - commit_message) - sys.exit(-1) - - -def _RunCommand(command, working_dir=None, ignore_exit_code=False, - extra_env=None, input_data=None): - """Runs a command and returns the output from that command. - - If the command fails (exit code != 0), the function will exit the process. - - Returns: - A tuple containing the stdout and stderr outputs as strings. - """ - working_dir = working_dir or CHECKOUT_SRC_DIR - logging.debug('CMD: %s CWD: %s', ' '.join(command), working_dir) - env = os.environ.copy() - if extra_env: - assert all(isinstance(value, str) for value in extra_env.values()) - logging.debug('extra env: %s', extra_env) - env.update(extra_env) - p = subprocess.Popen(command, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=env, - cwd=working_dir, - universal_newlines=True) - std_output, err_output = p.communicate(input_data) - p.stdout.close() - p.stderr.close() - if not ignore_exit_code and p.returncode != 0: - logging.error('Command failed: %s\n' - 'stdout:\n%s\n' - 'stderr:\n%s\n', ' '.join(command), std_output, err_output) - sys.exit(p.returncode) - return std_output, err_output - - -def _GetBranches(): - """Returns a tuple of active,branches. - - The 'active' is the name of the currently active branch and 'branches' is a - list of all branches. - """ - lines = _RunCommand(['git', 'branch'])[0].split('\n') - branches = [] - active = '' - for line in lines: - if '*' in line: - # The assumption is that the first char will always be the '*'. - active = line[1:].strip() - branches.append(active) - else: - branch = line.strip() - if branch: - branches.append(branch) - return active, branches - - -def _ReadGitilesContent(url): - # Download and decode BASE64 content until - # https://code.google.com/p/gitiles/issues/detail?id=7 is fixed. - base64_content = ReadUrlContent(url + '?format=TEXT') - return base64.b64decode(base64_content[0]).decode('utf-8') - - -def ReadRemoteCrFile(path_below_src, revision): - """Reads a remote Chromium file of a specific revision. Returns a string.""" - return _ReadGitilesContent(CHROMIUM_FILE_TEMPLATE % (revision, - path_below_src)) - - -def ReadRemoteCrCommit(revision): - """Reads a remote Chromium commit message. Returns a string.""" - return _ReadGitilesContent(CHROMIUM_COMMIT_TEMPLATE % revision) - - -def ReadUrlContent(url): - """Connect to a remote host and read the contents. Returns a list of lines.""" - conn = urllib.request.urlopen(url) - try: - return conn.readlines() - except IOError as e: - logging.exception('Error connecting to %s. Error: %s', url, e) - raise - finally: - conn.close() - - -def GetMatchingDepsEntries(depsentry_dict, dir_path): - """Gets all deps entries matching the provided path. - - This list may contain more than one DepsEntry object. - Example: dir_path='src/testing' would give results containing both - 'src/testing/gtest' and 'src/testing/gmock' deps entries for Chromium's DEPS. - Example 2: dir_path='src/build' should return 'src/build' but not - 'src/buildtools'. - - Returns: - A list of DepsEntry objects. - """ - result = [] - for path, depsentry in depsentry_dict.items(): - if path == dir_path: - result.append(depsentry) - else: - parts = path.split('/') - if all(part == parts[i] - for i, part in enumerate(dir_path.split('/'))): - result.append(depsentry) - return result - -def BuildDepsentryDict(deps_dict): - """Builds a dict of paths to DepsEntry objects from a raw deps dict.""" - result = {} - - def AddDepsEntries(deps_subdict): - for path, deps_url_spec in deps_subdict.items(): - if isinstance(deps_url_spec, dict): - if deps_url_spec.get('dep_type') == 'cipd': - continue - deps_url = deps_url_spec['url'] - else: - deps_url = deps_url_spec - if not path in result: - url, revision = deps_url.split('@') if deps_url else (None, None) - result[path] = DepsEntry(path, url, revision) - - AddDepsEntries(deps_dict['deps']) - for deps_os in ['win', 'mac', 'linux', 'android', 'ios', 'unix']: - AddDepsEntries(deps_dict.get('deps_os', {}).get(deps_os, {})) - return result - - -def CalculateChangedDeps(libyuv_deps, new_cr_deps): - """ - Calculate changed deps entries based on entries defined in the libyuv DEPS - file: - - If a shared dependency with the Chromium DEPS file: roll it to the same - revision as Chromium (i.e. entry in the new_cr_deps dict) - - If it's a Chromium sub-directory, roll it to the HEAD revision (notice - this means it may be ahead of the chromium_revision, but generally these - should be close). - - If it's another DEPS entry (not shared with Chromium), roll it to HEAD - unless it's configured to be skipped. - - Returns: - A list of ChangedDep objects representing the changed deps. - """ - result = [] - libyuv_entries = BuildDepsentryDict(libyuv_deps) - new_cr_entries = BuildDepsentryDict(new_cr_deps) - for path, libyuv_deps_entry in libyuv_entries.items(): - if path in DONT_AUTOROLL_THESE: - continue - cr_deps_entry = new_cr_entries.get(path) - if cr_deps_entry: - # Use the revision from Chromium's DEPS file. - new_rev = cr_deps_entry.revision - assert libyuv_deps_entry.url == cr_deps_entry.url, ( - 'Libyuv DEPS entry %s has a different URL (%s) than Chromium (%s).' % - (path, libyuv_deps_entry.url, cr_deps_entry.url)) - else: - # Use the HEAD of the deps repo. - stdout, _ = _RunCommand(['git', 'ls-remote', libyuv_deps_entry.url, - 'HEAD']) - new_rev = stdout.strip().split('\t')[0] - - # Check if an update is necessary. - if libyuv_deps_entry.revision != new_rev: - logging.debug('Roll dependency %s to %s', path, new_rev) - result.append(ChangedDep(path, libyuv_deps_entry.url, - libyuv_deps_entry.revision, new_rev)) - return sorted(result) - - -def CalculateChangedClang(new_cr_rev): - def GetClangRev(lines): - for line in lines: - match = CLANG_REVISION_RE.match(line) - if match: - return match.group(1) - raise RollError('Could not parse Clang revision from:\n' + '\n'.join(' ' + l for l in lines)) - - with open(CLANG_UPDATE_SCRIPT_LOCAL_PATH, 'r') as f: - current_lines = f.readlines() - current_rev = GetClangRev(current_lines) - - new_clang_update_py = ReadRemoteCrFile(CLANG_UPDATE_SCRIPT_URL_PATH, - new_cr_rev).splitlines() - new_rev = GetClangRev(new_clang_update_py) - return ChangedDep(CLANG_UPDATE_SCRIPT_LOCAL_PATH, None, current_rev, new_rev) - - -def GenerateCommitMessage(current_cr_rev, new_cr_rev, current_commit_pos, - new_commit_pos, changed_deps_list, clang_change): - current_cr_rev = current_cr_rev[0:10] - new_cr_rev = new_cr_rev[0:10] - rev_interval = '%s..%s' % (current_cr_rev, new_cr_rev) - git_number_interval = '%s:%s' % (current_commit_pos, new_commit_pos) - - commit_msg = ['Roll chromium_revision %s (%s)\n' % (rev_interval, - git_number_interval)] - commit_msg.append('Change log: %s' % (CHROMIUM_LOG_TEMPLATE % rev_interval)) - commit_msg.append('Full diff: %s\n' % (CHROMIUM_COMMIT_TEMPLATE % - rev_interval)) - if changed_deps_list: - commit_msg.append('Changed dependencies:') - - for c in changed_deps_list: - commit_msg.append('* %s: %s/+log/%s..%s' % (c.path, c.url, - c.current_rev[0:10], - c.new_rev[0:10])) - change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval, 'DEPS') - commit_msg.append('DEPS diff: %s\n' % change_url) - else: - commit_msg.append('No dependencies changed.') - - if clang_change.current_rev != clang_change.new_rev: - commit_msg.append('Clang version changed %s:%s' % - (clang_change.current_rev, clang_change.new_rev)) - change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval, - CLANG_UPDATE_SCRIPT_URL_PATH) - commit_msg.append('Details: %s\n' % change_url) - else: - commit_msg.append('No update to Clang.\n') - - # TBR needs to be non-empty for Gerrit to process it. - git_author = _RunCommand(['git', 'config', 'user.email'], - working_dir=CHECKOUT_SRC_DIR)[0].strip() - commit_msg.append('TBR=%s' % git_author) - - commit_msg.append('BUG=None') - return '\n'.join(commit_msg) - - -def UpdateDepsFile(deps_filename, old_cr_revision, new_cr_revision, - changed_deps): - """Update the DEPS file with the new revision.""" - - # Update the chromium_revision variable. - with open(deps_filename, 'rb') as deps_file: - deps_content = deps_file.read().decode('utf-8') - deps_content = deps_content.replace(old_cr_revision, new_cr_revision) - with open(deps_filename, 'wb') as deps_file: - deps_file.write(deps_content.encode('utf-8')) - - # Update each individual DEPS entry. - for dep in changed_deps: - local_dep_dir = os.path.join(CHECKOUT_ROOT_DIR, dep.path) - if not os.path.isdir(local_dep_dir): - raise RollError( - 'Cannot find local directory %s. Make sure the .gclient file\n' - 'contains all platforms in the target_os list, i.e.\n' - 'target_os = ["android", "unix", "mac", "ios", "win"];\n' - 'Then run "gclient sync" again.' % local_dep_dir) - _RunCommand( - ['gclient', 'setdep', '--revision', '%s@%s' % (dep.path, dep.new_rev)], - working_dir=CHECKOUT_SRC_DIR) - - -def _IsTreeClean(): - stdout, _ = _RunCommand(['git', 'status', '--porcelain']) - if len(stdout) == 0: - return True - - logging.error('Dirty/unversioned files:\n%s', stdout) - return False - - -def _EnsureUpdatedMasterBranch(dry_run): - current_branch = _RunCommand( - ['git', 'rev-parse', '--abbrev-ref', 'HEAD'])[0].splitlines()[0] - if current_branch != 'main': - logging.error('Please checkout the main branch and re-run this script.') - if not dry_run: - sys.exit(-1) - - logging.info('Updating main branch...') - _RunCommand(['git', 'pull']) - - -def _CreateRollBranch(dry_run): - logging.info('Creating roll branch: %s', ROLL_BRANCH_NAME) - if not dry_run: - _RunCommand(['git', 'checkout', '-b', ROLL_BRANCH_NAME]) - - -def _RemovePreviousRollBranch(dry_run): - active_branch, branches = _GetBranches() - if active_branch == ROLL_BRANCH_NAME: - active_branch = 'main' - if ROLL_BRANCH_NAME in branches: - logging.info('Removing previous roll branch (%s)', ROLL_BRANCH_NAME) - if not dry_run: - _RunCommand(['git', 'checkout', active_branch]) - _RunCommand(['git', 'branch', '-D', ROLL_BRANCH_NAME]) - - -def _LocalCommit(commit_msg, dry_run): - logging.info('Committing changes locally.') - if not dry_run: - _RunCommand(['git', 'add', '--update', '.']) - _RunCommand(['git', 'commit', '-m', commit_msg]) - - -def ChooseCQMode(skip_cq, cq_over, current_commit_pos, new_commit_pos): - if skip_cq: - return 0 - if (new_commit_pos - current_commit_pos) < cq_over: - return 1 - return 2 - - -def _UploadCL(commit_queue_mode): - """Upload the committed changes as a changelist to Gerrit. - - commit_queue_mode: - - 2: Submit to commit queue. - - 1: Run trybots but do not submit to CQ. - - 0: Skip CQ, upload only. - """ - cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks', '--send-mail'] - if commit_queue_mode >= 2: - logging.info('Sending the CL to the CQ...') - cmd.extend(['-o', 'label=Bot-Commit+1']) - cmd.extend(['-o', 'label=Commit-Queue+2']) - elif commit_queue_mode >= 1: - logging.info('Starting CQ dry run...') - cmd.extend(['-o', 'label=Commit-Queue+1']) - extra_env = { - 'EDITOR': 'true', - 'SKIP_GCE_AUTH_FOR_GIT': '1', - } - stdout, stderr = _RunCommand(cmd, extra_env=extra_env) - logging.debug('Output from "git cl upload":\nstdout:\n%s\n\nstderr:\n%s', - stdout, stderr) - - -def main(): - p = argparse.ArgumentParser() - p.add_argument('--clean', action='store_true', default=False, - help='Removes any previous local roll branch.') - p.add_argument('-r', '--revision', - help=('Chromium Git revision to roll to. Defaults to the ' - 'Chromium HEAD revision if omitted.')) - p.add_argument('--dry-run', action='store_true', default=False, - help=('Calculate changes and modify DEPS, but don\'t create ' - 'any local branch, commit, upload CL or send any ' - 'tryjobs.')) - p.add_argument('-i', '--ignore-unclean-workdir', action='store_true', - default=False, - help=('Ignore if the current branch is not main or if there ' - 'are uncommitted changes (default: %(default)s).')) - grp = p.add_mutually_exclusive_group() - grp.add_argument('--skip-cq', action='store_true', default=False, - help='Skip sending the CL to the CQ (default: %(default)s)') - grp.add_argument('--cq-over', type=int, default=1, - help=('Commit queue dry run if the revision difference ' - 'is below this number (default: %(default)s)')) - p.add_argument('-v', '--verbose', action='store_true', default=False, - help='Be extra verbose in printing of log messages.') - opts = p.parse_args() - - if opts.verbose: - logging.basicConfig(level=logging.DEBUG) - else: - logging.basicConfig(level=logging.INFO) - - if not opts.ignore_unclean_workdir and not _IsTreeClean(): - logging.error('Please clean your local checkout first.') - return 1 - - if opts.clean: - _RemovePreviousRollBranch(opts.dry_run) - - if not opts.ignore_unclean_workdir: - _EnsureUpdatedMasterBranch(opts.dry_run) - - new_cr_rev = opts.revision - if not new_cr_rev: - stdout, _ = _RunCommand(['git', 'ls-remote', CHROMIUM_SRC_URL, 'HEAD']) - head_rev = stdout.strip().split('\t')[0] - logging.info('No revision specified. Using HEAD: %s', head_rev) - new_cr_rev = head_rev - - deps_filename = os.path.join(CHECKOUT_SRC_DIR, 'DEPS') - libyuv_deps = ParseLocalDepsFile(deps_filename) - current_cr_rev = libyuv_deps['vars']['chromium_revision'] - - current_commit_pos = ParseCommitPosition(ReadRemoteCrCommit(current_cr_rev)) - new_commit_pos = ParseCommitPosition(ReadRemoteCrCommit(new_cr_rev)) - - new_cr_deps = ParseRemoteCrDepsFile(new_cr_rev) - changed_deps = CalculateChangedDeps(libyuv_deps, new_cr_deps) - clang_change = CalculateChangedClang(new_cr_rev) - commit_msg = GenerateCommitMessage(current_cr_rev, new_cr_rev, - current_commit_pos, new_commit_pos, - changed_deps, clang_change) - logging.debug('Commit message:\n%s', commit_msg) - - _CreateRollBranch(opts.dry_run) - UpdateDepsFile(deps_filename, current_cr_rev, new_cr_rev, changed_deps) - _LocalCommit(commit_msg, opts.dry_run) - commit_queue_mode = ChooseCQMode(opts.skip_cq, opts.cq_over, - current_commit_pos, new_commit_pos) - logging.info('Uploading CL...') - if not opts.dry_run: - _UploadCL(commit_queue_mode) - return 0 - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/files/include/libyuv.h b/include/libyuv.h index a06e1233..a06e1233 100644 --- a/files/include/libyuv.h +++ b/include/libyuv.h diff --git a/files/include/libyuv/basic_types.h b/include/libyuv/basic_types.h index 1bea67f2..1bea67f2 100644 --- a/files/include/libyuv/basic_types.h +++ b/include/libyuv/basic_types.h diff --git a/files/include/libyuv/compare.h b/include/libyuv/compare.h index 3353ad71..3353ad71 100644 --- a/files/include/libyuv/compare.h +++ b/include/libyuv/compare.h diff --git a/files/include/libyuv/compare_row.h b/include/libyuv/compare_row.h index d8e82d72..8293c919 100644 --- a/files/include/libyuv/compare_row.h +++ b/include/libyuv/compare_row.h @@ -28,7 +28,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif diff --git a/files/include/libyuv/convert.h b/include/libyuv/convert.h index 46d37159..88619a4f 100644 --- a/files/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -151,6 +151,33 @@ int MM21ToI420(const uint8_t* src_y, int width, int height); +// Convert MM21 to YUY2 +LIBYUV_API +int MM21ToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height); + +// Convert MT2T to P010 +// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will +// be 10 / 8 times the dimensions of the image. Also for this reason, +// src_stride_y and src_stride_uv are given in bytes. +LIBYUV_API +int MT2TToP010(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height); + // Convert I422 to NV21. LIBYUV_API int I422ToNV21(const uint8_t* src_y, @@ -272,6 +299,23 @@ int I210ToI422(const uint16_t* src_y, int width, int height); +#define H410ToH420 I410ToI420 +LIBYUV_API +int I410ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + #define H410ToH444 I410ToI444 LIBYUV_API int I410ToI444(const uint16_t* src_y, @@ -323,6 +367,23 @@ int I212ToI422(const uint16_t* src_y, int width, int height); +#define H212ToH420 I212ToI420 +LIBYUV_API +int I212ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + #define H412ToH444 I412ToI444 LIBYUV_API int I412ToI444(const uint16_t* src_y, @@ -340,6 +401,23 @@ int I412ToI444(const uint16_t* src_y, int width, int height); +#define H412ToH420 I412ToI420 +LIBYUV_API +int I412ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + #define I412ToI012 I410ToI010 #define H410ToH010 I410ToI010 #define H412ToH012 I410ToI010 @@ -560,6 +638,36 @@ int NV16ToNV24(const uint8_t* src_y, int width, int height); +// Convert P010 to I010. +LIBYUV_API +int P010ToI010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert P012 to I012. +LIBYUV_API +int P012ToI012(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + // Convert P010 to P410. LIBYUV_API int P010ToP410(const uint16_t* src_y, @@ -677,6 +785,21 @@ int ARGBToI420(const uint8_t* src_argb, int width, int height); +// Convert ARGB to I420 with Alpha +LIBYUV_API +int ARGBToI420Alpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height); + // BGRA little endian (argb in memory) to I420. LIBYUV_API int BGRAToI420(const uint8_t* src_bgra, diff --git a/files/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index f66d20ce..35eeac9b 100644 --- a/files/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -67,6 +67,8 @@ LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k) #define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \ I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k) +#define I012ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \ + I012ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k) #define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n) #define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ @@ -404,6 +406,32 @@ int U444ToABGR(const uint8_t* src_y, int width, int height); +// Convert I444 to RGB24. +LIBYUV_API +int I444ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Convert I444 to RAW. +LIBYUV_API +int I444ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + // Convert I010 to ARGB. LIBYUV_API int I010ToARGB(const uint16_t* src_y, @@ -1312,6 +1340,32 @@ int J420ToRAW(const uint8_t* src_y, int width, int height); +// Convert I422 to RGB24. +LIBYUV_API +int I422ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Convert I422 to RAW. +LIBYUV_API +int I422ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + LIBYUV_API int I420ToRGB565(const uint8_t* src_y, int src_stride_y, @@ -1495,6 +1549,20 @@ int I444ToARGBMatrix(const uint8_t* src_y, int width, int height); +// Convert I444 to RGB24 with matrix. +LIBYUV_API +int I444ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height); + // Convert 10 bit 420 YUV to ARGB with matrix. LIBYUV_API int I010ToAR30Matrix(const uint16_t* src_y, @@ -1893,6 +1961,20 @@ int I420ToRGB24Matrix(const uint8_t* src_y, int width, int height); +// Convert I422 to RGB24 with matrix. +LIBYUV_API +int I422ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height); + // Convert I420 to RGB565 with specified color matrix. LIBYUV_API int I420ToRGB565Matrix(const uint8_t* src_y, @@ -1907,6 +1989,20 @@ int I420ToRGB565Matrix(const uint8_t* src_y, int width, int height); +// Convert I422 to RGB565 with specified color matrix. +LIBYUV_API +int I422ToRGB565Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const struct YuvConstants* yuvconstants, + int width, + int height); + // Convert I420 to AR30 with matrix. LIBYUV_API int I420ToAR30Matrix(const uint8_t* src_y, @@ -1961,6 +2057,36 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y, int height, enum FilterMode filter); +// Convert I422 to RGB24 with matrix and UV filter mode. +LIBYUV_API +int I422ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert I420 to RGB24 with matrix and UV filter mode. +LIBYUV_API +int I420ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + // Convert I010 to AR30 with matrix and UV filter mode. LIBYUV_API int I010ToAR30MatrixFilter(const uint16_t* src_y, diff --git a/files/include/libyuv/convert_from.h b/include/libyuv/convert_from.h index 32f42a63..32f42a63 100644 --- a/files/include/libyuv/convert_from.h +++ b/include/libyuv/convert_from.h diff --git a/files/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index 2a488838..ff2a581a 100644 --- a/files/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -209,10 +209,10 @@ int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height); @@ -222,10 +222,10 @@ int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height); @@ -238,6 +238,41 @@ int ARGBToJ400(const uint8_t* src_argb, int width, int height); +// Convert ABGR to J420. (JPeg full range I420). +LIBYUV_API +int ABGRToJ420(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, + int width, + int height); + +// Convert ABGR to J422. +LIBYUV_API +int ABGRToJ422(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, + int width, + int height); + +// Convert ABGR to J400. (JPeg full range). +LIBYUV_API +int ABGRToJ400(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height); + // Convert RGBA to J400. (JPeg full range). LIBYUV_API int RGBAToJ400(const uint8_t* src_rgba, diff --git a/files/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index fb90c6c7..5a81e7c9 100644 --- a/files/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -31,30 +31,36 @@ static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; -static const int kCpuHasSSE42 = 0x100; // unused at this time. +static const int kCpuHasSSE42 = 0x100; static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; static const int kCpuHasFMA3 = 0x1000; static const int kCpuHasF16C = 0x2000; -static const int kCpuHasGFNI = 0x4000; -static const int kCpuHasAVX512BW = 0x8000; -static const int kCpuHasAVX512VL = 0x10000; -static const int kCpuHasAVX512VNNI = 0x20000; -static const int kCpuHasAVX512VBMI = 0x40000; -static const int kCpuHasAVX512VBMI2 = 0x80000; -static const int kCpuHasAVX512VBITALG = 0x100000; -static const int kCpuHasAVX512VPOPCNTDQ = 0x200000; +static const int kCpuHasAVX512BW = 0x4000; +static const int kCpuHasAVX512VL = 0x8000; +static const int kCpuHasAVX512VNNI = 0x10000; +static const int kCpuHasAVX512VBMI = 0x20000; +static const int kCpuHasAVX512VBMI2 = 0x40000; +static const int kCpuHasAVX512VBITALG = 0x80000; +static const int kCpuHasAVX10 = 0x100000; +static const int kCpuHasAVXVNNI = 0x200000; +static const int kCpuHasAVXVNNIINT8 = 0x400000; // These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x400000; -static const int kCpuHasMSA = 0x800000; +static const int kCpuHasMIPS = 0x800000; +static const int kCpuHasMSA = 0x1000000; // These flags are only valid on LOONGARCH processors. static const int kCpuHasLOONGARCH = 0x2000000; static const int kCpuHasLSX = 0x4000000; static const int kCpuHasLASX = 0x8000000; +// These flags are only valid on RISCV processors. +static const int kCpuHasRISCV = 0x10000000; +static const int kCpuHasRVV = 0x20000000; +static const int kCpuHasRVVZVFH = 0x40000000; + // Optional init function. TestCpuFlag does an auto-init. // Returns cpu_info flags. LIBYUV_API @@ -78,6 +84,8 @@ LIBYUV_API int ArmCpuCaps(const char* cpuinfo_name); LIBYUV_API int MipsCpuCaps(const char* cpuinfo_name); +LIBYUV_API +int RiscvCpuCaps(const char* cpuinfo_name); // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. diff --git a/files/include/libyuv/loongson_intrinsics.h b/include/libyuv/loongson_intrinsics.h index 1d613def..1d613def 100644 --- a/files/include/libyuv/loongson_intrinsics.h +++ b/include/libyuv/loongson_intrinsics.h diff --git a/files/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h index b9a44fcc..b9a44fcc 100644 --- a/files/include/libyuv/macros_msa.h +++ b/include/libyuv/macros_msa.h diff --git a/files/include/libyuv/mjpeg_decoder.h b/include/libyuv/mjpeg_decoder.h index 275f8d4c..275f8d4c 100644 --- a/files/include/libyuv/mjpeg_decoder.h +++ b/include/libyuv/mjpeg_decoder.h diff --git a/files/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 1ef2256b..f9344721 100644 --- a/files/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -30,7 +30,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif @@ -85,13 +88,23 @@ void SetPlane(uint8_t* dst_y, // Convert a plane of tiles of 16 x H to linear. LIBYUV_API -void DetilePlane(const uint8_t* src_y, - int src_stride_y, - uint8_t* dst_y, - int dst_stride_y, - int width, - int height, - int tile_height); +int DetilePlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height); + +// Convert a plane of 16 bit tiles of 16 x H to linear. +LIBYUV_API +int DetilePlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height); // Convert a UV plane of tiles of 16 x H into linear U and V planes. LIBYUV_API @@ -105,6 +118,18 @@ void DetileSplitUVPlane(const uint8_t* src_uv, int height, int tile_height); +// Convert a Y and UV plane of tiles into interlaced YUY2. +LIBYUV_API +void DetileToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height, + int tile_height); + // Split interleaved UV plane into separate U and V planes. LIBYUV_API void SplitUVPlane(const uint8_t* src_uv, @@ -370,7 +395,26 @@ int I210Copy(const uint16_t* src_y, int width, int height); +// Copy I410 to I410. +#define I410ToI410 I410Copy +LIBYUV_API +int I410Copy(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + // Copy NV12. Supports inverting. +LIBYUV_API int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, @@ -383,6 +427,7 @@ int NV12Copy(const uint8_t* src_y, int height); // Copy NV21. Supports inverting. +LIBYUV_API int NV21Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, @@ -785,15 +830,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, int width, int height); -typedef void (*ARGBBlendRow)(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); - -// Get function to Alpha Blend ARGB pixels and store to destination. -LIBYUV_API -ARGBBlendRow GetARGBBlend(); - // Alpha Blend ARGB images and store to destination. // Source is pre-multiplied by alpha using ARGBAttenuate. // Alpha of destination is set to 255. diff --git a/files/include/libyuv/rotate.h b/include/libyuv/rotate.h index 684ed5e6..37460c4a 100644 --- a/files/include/libyuv/rotate.h +++ b/include/libyuv/rotate.h @@ -85,6 +85,60 @@ int I444Rotate(const uint8_t* src_y, int height, enum RotationMode mode); +// Rotate I010 frame. +LIBYUV_API +int I010Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + +// Rotate I210 frame. +LIBYUV_API +int I210Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + +// Rotate I410 frame. +LIBYUV_API +int I410Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + // Rotate NV12 input and store in I420. LIBYUV_API int NV12ToI420Rotate(const uint8_t* src_y, @@ -156,6 +210,16 @@ void RotatePlane270(const uint8_t* src, int width, int height); +// Rotate a plane by 0, 90, 180, or 270. +LIBYUV_API +int RotatePlane_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height, + enum RotationMode mode); + // Rotations for when U and V are interleaved. // These functions take one UV input pointer and // split the data into two buffers while diff --git a/files/include/libyuv/rotate_argb.h b/include/libyuv/rotate_argb.h index 20432949..20432949 100644 --- a/files/include/libyuv/rotate_argb.h +++ b/include/libyuv/rotate_argb.h diff --git a/files/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index aa8528a9..3e6a2fef 100644 --- a/files/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -28,7 +28,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif @@ -42,6 +45,8 @@ extern "C" { // The following are available for GCC 32 or 64 bit: #if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__)) #define HAS_TRANSPOSEWX8_SSSE3 +#define HAS_TRANSPOSE4X4_32_SSE2 +#define HAS_TRANSPOSE4X4_32_AVX2 #endif // The following are available for 64 bit GCC: @@ -54,6 +59,7 @@ extern "C" { (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_TRANSPOSEWX8_NEON #define HAS_TRANSPOSEUVWX8_NEON +#define HAS_TRANSPOSE4X4_32_NEON #endif #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) @@ -215,6 +221,48 @@ void TransposeUVWx16_Any_LSX(const uint8_t* src, uint8_t* dst_b, int dst_stride_b, int width); +void TransposeWxH_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height); + +void TransposeWx8_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width); +void TransposeWx1_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width); + +// Transpose 32 bit values (ARGB) +void Transpose4x4_32_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); + +void Transpose4x4_32_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); + +void Transpose4x4_32_AVX2(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); + +void Transpose4x4_32_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); #ifdef __cplusplus } // extern "C" diff --git a/files/include/libyuv/row.h b/include/libyuv/row.h index 1a1cf4b6..46685a50 100644 --- a/files/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -11,7 +11,8 @@ #ifndef INCLUDE_LIBYUV_ROW_H_ #define INCLUDE_LIBYUV_ROW_H_ -#include <stdlib.h> // For malloc. +#include <stddef.h> // For NULL +#include <stdlib.h> // For malloc #include "libyuv/basic_types.h" @@ -30,7 +31,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif @@ -75,9 +79,6 @@ extern "C" { (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) // Conversions: #define HAS_ABGRTOYROW_SSSE3 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_ABGRTOUVROW_SSSE3 -#endif #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 #define HAS_ARGBEXTRACTALPHAROW_SSE2 @@ -92,12 +93,6 @@ extern "C" { #define HAS_ARGBTOYJROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_ARGBTOUV444ROW_SSSE3 -#define HAS_ARGBTOUVJROW_SSSE3 -#define HAS_ARGBTOUVROW_SSSE3 -#define HAS_BGRATOUVROW_SSSE3 -#endif #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 #define HAS_H422TOARGBROW_SSSE3 @@ -111,6 +106,7 @@ extern "C" { #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 +#define HAS_I444TORGB24ROW_SSSE3 #define HAS_INTERPOLATEROW_SSSE3 #define HAS_J400TOARGBROW_SSE2 #define HAS_J422TOARGBROW_SSSE3 @@ -124,16 +120,13 @@ extern "C" { #define HAS_NV21TORGB24ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTORGB24ROW_SSSE3 +#define HAS_RAWTOYJROW_SSSE3 #define HAS_RAWTOYROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 +#define HAS_RGB24TOYJROW_SSSE3 #define HAS_RGB24TOYROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 #define HAS_RGBATOYROW_SSSE3 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_RGB24TOYJROW_SSSE3 -#define HAS_RAWTOYJROW_SSSE3 -#define HAS_RGBATOUVROW_SSSE3 -#endif #define HAS_SETROW_ERMS #define HAS_SETROW_X86 #define HAS_SPLITUVROW_SSE2 @@ -145,13 +138,18 @@ extern "C" { #define HAS_YUY2TOUV422ROW_SSE2 #define HAS_YUY2TOUVROW_SSE2 #define HAS_YUY2TOYROW_SSE2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ABGRTOUVROW_SSSE3 +#define HAS_ARGBTOUV444ROW_SSSE3 +#define HAS_ARGBTOUVJROW_SSSE3 +#define HAS_ARGBTOUVROW_SSSE3 +#define HAS_BGRATOUVROW_SSSE3 +#define HAS_RGBATOUVROW_SSSE3 +#endif // Effects: #define HAS_ARGBADDROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_ARGBATTENUATEROW_SSSE3 -#endif #define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBCOLORTABLEROW_X86 @@ -166,7 +164,6 @@ extern "C" { #define HAS_ARGBSEPIAROW_SSSE3 #define HAS_ARGBSHADEROW_SSE2 #define HAS_ARGBSUBTRACTROW_SSE2 -#define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_BLENDPLANEROW_SSSE3 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 @@ -201,17 +198,10 @@ extern "C" { #define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBTORGB565DITHERROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 -#define HAS_RAWTOYJROW_AVX2 -#define HAS_RGB24TOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_ARGBTOUVJROW_AVX2 -#define HAS_ARGBTOUVROW_AVX2 -#endif #define HAS_COPYROW_AVX #define HAS_H422TOARGBROW_AVX2 #define HAS_HALFFLOATROW_AVX2 -// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 @@ -219,6 +209,7 @@ extern "C" { #define HAS_I422TORGB565ROW_AVX2 #define HAS_I422TORGBAROW_AVX2 #define HAS_I444TOARGBROW_AVX2 +#define HAS_I444TORGB24ROW_AVX2 #define HAS_INTERPOLATEROW_AVX2 #define HAS_J422TOARGBROW_AVX2 #define HAS_MERGEUVROW_AVX2 @@ -228,6 +219,8 @@ extern "C" { #define HAS_NV12TORGB565ROW_AVX2 #define HAS_NV21TOARGBROW_AVX2 #define HAS_NV21TORGB24ROW_AVX2 +#define HAS_RAWTOYJROW_AVX2 +#define HAS_RGB24TOYJROW_AVX2 #define HAS_SPLITUVROW_AVX2 #define HAS_UYVYTOARGBROW_AVX2 #define HAS_UYVYTOUV422ROW_AVX2 @@ -237,15 +230,16 @@ extern "C" { #define HAS_YUY2TOUV422ROW_AVX2 #define HAS_YUY2TOUVROW_AVX2 #define HAS_YUY2TOYROW_AVX2 +// #define HAS_HALFFLOATROW_F16C // Enable to test half float cast +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ARGBTOUVJROW_AVX2 +#define HAS_ARGBTOUVROW_AVX2 +#endif // Effects: #define HAS_ARGBADDROW_AVX2 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_ARGBATTENUATEROW_AVX2 -#endif #define HAS_ARGBMULTIPLYROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2 -#define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_BLENDPLANEROW_AVX2 #if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ @@ -282,28 +276,33 @@ extern "C" { // The following are available for gcc/clang x86 platforms: // TODO(fbarchard): Port to Visual C #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) +#define HAS_AB64TOARGBROW_SSSE3 #define HAS_ABGRTOAR30ROW_SSSE3 +#define HAS_ABGRTOYJROW_SSSE3 +#define HAS_AR64TOARGBROW_SSSE3 +#define HAS_ARGBATTENUATEROW_SSSE3 +#define HAS_ARGBTOAB64ROW_SSSE3 #define HAS_ARGBTOAR30ROW_SSSE3 #define HAS_ARGBTOAR64ROW_SSSE3 -#define HAS_ARGBTOAB64ROW_SSSE3 -#define HAS_AR64TOARGBROW_SSSE3 -#define HAS_AB64TOARGBROW_SSSE3 +#define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT8TO16ROW_SSE2 +#define HAS_DETILEROW_16_SSE2 #define HAS_DETILEROW_SSE2 #define HAS_DETILESPLITUVROW_SSSE3 +#define HAS_DETILETOYUY2_SSE2 #define HAS_HALFMERGEUVROW_SSSE3 #define HAS_I210TOAR30ROW_SSSE3 #define HAS_I210TOARGBROW_SSSE3 #define HAS_I212TOAR30ROW_SSSE3 #define HAS_I212TOARGBROW_SSSE3 #define HAS_I400TOARGBROW_SSE2 -#define HAS_I422TOAR30ROW_SSSE3 #define HAS_I410TOAR30ROW_SSSE3 #define HAS_I410TOARGBROW_SSSE3 +#define HAS_I422TOAR30ROW_SSSE3 #define HAS_MERGEARGBROW_SSE2 -#define HAS_MERGEXRGBROW_SSE2 #define HAS_MERGERGBROW_SSSE3 +#define HAS_MERGEXRGBROW_SSE2 #define HAS_MIRRORUVROW_SSSE3 #define HAS_NV21TOYUV24ROW_SSSE3 #define HAS_P210TOAR30ROW_SSSE3 @@ -312,15 +311,17 @@ extern "C" { #define HAS_P410TOARGBROW_SSSE3 #define HAS_RAWTORGBAROW_SSSE3 #define HAS_RGB24MIRRORROW_SSSE3 -#if !defined(LIBYUV_BIT_EXACT) #define HAS_RGBATOYJROW_SSSE3 -#endif #define HAS_SPLITARGBROW_SSE2 #define HAS_SPLITARGBROW_SSSE3 +#define HAS_SPLITRGBROW_SSSE3 #define HAS_SPLITXRGBROW_SSE2 #define HAS_SPLITXRGBROW_SSSE3 -#define HAS_SPLITRGBROW_SSSE3 #define HAS_SWAPUVROW_SSSE3 +#define HAS_YUY2TONVUVROW_SSE2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ABGRTOUVJROW_SSSE3 +#endif #if defined(__x86_64__) || !defined(__pic__) // TODO(fbarchard): fix build error on android_full_debug=1 @@ -335,31 +336,23 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || defined(__i386__)) && \ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#define HAS_AB64TOARGBROW_AVX2 #define HAS_ABGRTOAR30ROW_AVX2 -#if !defined(LIBYUV_BIT_EXACT) -#define HAS_ABGRTOUVROW_AVX2 +#define HAS_ABGRTOYJROW_AVX2 #define HAS_ABGRTOYROW_AVX2 -#endif +#define HAS_AR64TOARGBROW_AVX2 +#define HAS_ARGBATTENUATEROW_AVX2 +#define HAS_ARGBTOAB64ROW_AVX2 #define HAS_ARGBTOAR30ROW_AVX2 +#define HAS_ARGBTOAR64ROW_AVX2 #define HAS_ARGBTORAWROW_AVX2 #define HAS_ARGBTORGB24ROW_AVX2 -#define HAS_ARGBTOAR64ROW_AVX2 -#define HAS_ARGBTOAB64ROW_AVX2 -#define HAS_AR64TOARGBROW_AVX2 -#define HAS_AB64TOARGBROW_AVX2 +#define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2 -#define HAS_INTERPOLATEROW_16TO8_AVX2 #define HAS_CONVERT8TO16ROW_AVX2 +#define HAS_DETILEROW_16_AVX #define HAS_DIVIDEROW_16_AVX2 #define HAS_HALFMERGEUVROW_AVX2 -#define HAS_MERGEAR64ROW_AVX2 -#define HAS_MERGEARGB16TO8ROW_AVX2 -#define HAS_MERGEARGBROW_AVX2 -#define HAS_MERGEXR30ROW_AVX2 -#define HAS_MERGEXR64ROW_AVX2 -#define HAS_MERGEXRGB16TO8ROW_AVX2 -#define HAS_MERGEXRGBROW_AVX2 -#define HAS_NV21TOYUV24ROW_AVX2 #define HAS_I210TOAR30ROW_AVX2 #define HAS_I210TOARGBROW_AVX2 #define HAS_I212TOAR30ROW_AVX2 @@ -367,23 +360,35 @@ extern "C" { #define HAS_I400TOARGBROW_AVX2 #define HAS_I410TOAR30ROW_AVX2 #define HAS_I410TOARGBROW_AVX2 -#define HAS_P210TOAR30ROW_AVX2 -#define HAS_P210TOARGBROW_AVX2 -#define HAS_P410TOAR30ROW_AVX2 -#define HAS_P410TOARGBROW_AVX2 #define HAS_I422TOAR30ROW_AVX2 #define HAS_I422TOUYVYROW_AVX2 #define HAS_I422TOYUY2ROW_AVX2 +#define HAS_INTERPOLATEROW_16TO8_AVX2 +#define HAS_MERGEAR64ROW_AVX2 +#define HAS_MERGEARGB16TO8ROW_AVX2 +#define HAS_MERGEARGBROW_AVX2 #define HAS_MERGEUVROW_16_AVX2 +#define HAS_MERGEXR30ROW_AVX2 +#define HAS_MERGEXR64ROW_AVX2 +#define HAS_MERGEXRGB16TO8ROW_AVX2 +#define HAS_MERGEXRGBROW_AVX2 #define HAS_MIRRORUVROW_AVX2 #define HAS_MULTIPLYROW_16_AVX2 -#if !defined(LIBYUV_BIT_EXACT) +#define HAS_NV21TOYUV24ROW_AVX2 +#define HAS_P210TOAR30ROW_AVX2 +#define HAS_P210TOARGBROW_AVX2 +#define HAS_P410TOAR30ROW_AVX2 +#define HAS_P410TOARGBROW_AVX2 #define HAS_RGBATOYJROW_AVX2 -#endif #define HAS_SPLITARGBROW_AVX2 -#define HAS_SPLITXRGBROW_AVX2 #define HAS_SPLITUVROW_16_AVX2 +#define HAS_SPLITXRGBROW_AVX2 #define HAS_SWAPUVROW_AVX2 +#define HAS_YUY2TONVUVROW_AVX2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ABGRTOUVJROW_AVX2 +#define HAS_ABGRTOUVROW_AVX2 +#endif #if defined(__x86_64__) || !defined(__pic__) // TODO(fbarchard): fix build error on android_full_debug=1 @@ -397,8 +402,9 @@ extern "C" { // TODO(fbarchard): Port to GCC and Visual C // TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789 #if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || defined(__i386__)) && (defined(CLANG_HAS_AVX512)) + (defined(__x86_64__) || defined(__i386__)) && defined(CLANG_HAS_AVX512) #define HAS_ARGBTORGB24ROW_AVX512VBMI +#define HAS_MERGEUVROW_AVX512BW #endif // The following are available for AVX512 clang x64 platforms: @@ -412,7 +418,9 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) #define HAS_AB64TOARGBROW_NEON +#define HAS_ABGRTOUVJROW_NEON #define HAS_ABGRTOUVROW_NEON +#define HAS_ABGRTOYJROW_NEON #define HAS_ABGRTOYROW_NEON #define HAS_AR64TOARGBROW_NEON #define HAS_ARGB1555TOARGBROW_NEON @@ -444,8 +452,11 @@ extern "C" { #define HAS_BYTETOFLOATROW_NEON #define HAS_CONVERT16TO8ROW_NEON #define HAS_COPYROW_NEON +#define HAS_DETILEROW_16_NEON #define HAS_DETILEROW_NEON #define HAS_DETILESPLITUVROW_NEON +#define HAS_DETILETOYUY2_NEON +#define HAS_UNPACKMT2T_NEON #define HAS_DIVIDEROW_16_NEON #define HAS_HALFFLOATROW_NEON #define HAS_HALFMERGEUVROW_NEON @@ -461,6 +472,7 @@ extern "C" { #define HAS_I422TOYUY2ROW_NEON #define HAS_I444ALPHATOARGBROW_NEON #define HAS_I444TOARGBROW_NEON +#define HAS_I444TORGB24ROW_NEON #define HAS_INTERPOLATEROW_16_NEON #define HAS_INTERPOLATEROW_NEON #define HAS_J400TOARGBROW_NEON @@ -513,6 +525,7 @@ extern "C" { #define HAS_UYVYTOUVROW_NEON #define HAS_UYVYTOYROW_NEON #define HAS_YUY2TOARGBROW_NEON +#define HAS_YUY2TONVUVROW_NEON #define HAS_YUY2TOUV422ROW_NEON #define HAS_YUY2TOUVROW_NEON #define HAS_YUY2TOYROW_NEON @@ -524,13 +537,13 @@ extern "C" { #define HAS_ARGBCOLORMATRIXROW_NEON #define HAS_ARGBGRAYROW_NEON #define HAS_ARGBMIRRORROW_NEON -#define HAS_RGB24MIRRORROW_NEON #define HAS_ARGBMULTIPLYROW_NEON #define HAS_ARGBQUANTIZEROW_NEON #define HAS_ARGBSEPIAROW_NEON #define HAS_ARGBSHADEROW_NEON #define HAS_ARGBSHUFFLEROW_NEON #define HAS_ARGBSUBTRACTROW_NEON +#define HAS_RGB24MIRRORROW_NEON #define HAS_SOBELROW_NEON #define HAS_SOBELTOPLANEROW_NEON #define HAS_SOBELXROW_NEON @@ -540,12 +553,13 @@ extern "C" { // The following are available on AArch64 platforms: #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) +#define HAS_GAUSSCOL_F32_NEON +#define HAS_GAUSSROW_F32_NEON #define HAS_INTERPOLATEROW_16TO8_NEON #define HAS_SCALESUMSAMPLES_NEON -#define HAS_GAUSSROW_F32_NEON -#define HAS_GAUSSCOL_F32_NEON #endif #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#define HAS_ABGRTOUVJROW_MSA #define HAS_ABGRTOUVROW_MSA #define HAS_ABGRTOYROW_MSA #define HAS_ARGB1555TOARGBROW_MSA @@ -581,27 +595,25 @@ extern "C" { #define HAS_BGRATOYROW_MSA #define HAS_HALFFLOATROW_MSA #define HAS_I400TOARGBROW_MSA -#define HAS_I422TOUYVYROW_MSA -#define HAS_I422TOYUY2ROW_MSA -#define HAS_I422TOARGBROW_MSA -#define HAS_I422TORGBAROW_MSA #define HAS_I422ALPHATOARGBROW_MSA +#define HAS_I422TOARGB1555ROW_MSA +#define HAS_I422TOARGB4444ROW_MSA +#define HAS_I422TOARGBROW_MSA #define HAS_I422TORGB24ROW_MSA #define HAS_I422TORGB565ROW_MSA -#define HAS_I422TOARGB4444ROW_MSA -#define HAS_I422TOARGB1555ROW_MSA -#define HAS_NV12TOARGBROW_MSA -#define HAS_NV12TORGB565ROW_MSA -#define HAS_NV21TOARGBROW_MSA -#define HAS_YUY2TOARGBROW_MSA -#define HAS_UYVYTOARGBROW_MSA +#define HAS_I422TORGBAROW_MSA +#define HAS_I422TOUYVYROW_MSA +#define HAS_I422TOYUY2ROW_MSA #define HAS_I444TOARGBROW_MSA #define HAS_INTERPOLATEROW_MSA #define HAS_J400TOARGBROW_MSA #define HAS_MERGEUVROW_MSA #define HAS_MIRRORROW_MSA -#define HAS_MIRRORUVROW_MSA #define HAS_MIRRORSPLITUVROW_MSA +#define HAS_MIRRORUVROW_MSA +#define HAS_NV12TOARGBROW_MSA +#define HAS_NV12TORGB565ROW_MSA +#define HAS_NV21TOARGBROW_MSA #define HAS_RAWTOARGBROW_MSA #define HAS_RAWTORGB24ROW_MSA #define HAS_RAWTOUVROW_MSA @@ -621,113 +633,226 @@ extern "C" { #define HAS_SOBELXYROW_MSA #define HAS_SOBELYROW_MSA #define HAS_SPLITUVROW_MSA +#define HAS_UYVYTOARGBROW_MSA #define HAS_UYVYTOUVROW_MSA #define HAS_UYVYTOYROW_MSA +#define HAS_YUY2TOARGBROW_MSA #define HAS_YUY2TOUV422ROW_MSA #define HAS_YUY2TOUVROW_MSA #define HAS_YUY2TOYROW_MSA #endif #if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) -#define HAS_ARGB4444TOARGBROW_LSX +#define HAS_ABGRTOUVROW_LSX +#define HAS_ABGRTOYROW_LSX #define HAS_ARGB1555TOARGBROW_LSX -#define HAS_RGB565TOARGBROW_LSX -#define HAS_RGB24TOARGBROW_LSX -#define HAS_RAWTOARGBROW_LSX -#define HAS_ARGB1555TOYROW_LSX #define HAS_ARGB1555TOUVROW_LSX -#define HAS_RGB565TOYROW_LSX -#define HAS_RGB565TOUVROW_LSX -#define HAS_RGB24TOYROW_LSX -#define HAS_RGB24TOUVROW_LSX -#define HAS_RAWTOYROW_LSX -#define HAS_RAWTOUVROW_LSX +#define HAS_ARGB1555TOYROW_LSX +#define HAS_ARGB4444TOARGBROW_LSX +#define HAS_ARGBADDROW_LSX +#define HAS_ARGBATTENUATEROW_LSX +#define HAS_ARGBBLENDROW_LSX +#define HAS_ARGBCOLORMATRIXROW_LSX +#define HAS_ARGBEXTRACTALPHAROW_LSX +#define HAS_ARGBGRAYROW_LSX +#define HAS_ARGBSEPIAROW_LSX +#define HAS_ARGBSHADEROW_LSX +#define HAS_ARGBSHUFFLEROW_LSX +#define HAS_ARGBSUBTRACTROW_LSX +#define HAS_ARGBQUANTIZEROW_LSX +#define HAS_ARGBSETROW_LSX +#define HAS_ARGBTOARGB1555ROW_LSX +#define HAS_ARGBTOARGB4444ROW_LSX +#define HAS_ARGBTORAWROW_LSX +#define HAS_ARGBTORGB24ROW_LSX +#define HAS_ARGBTORGB565ROW_LSX +#define HAS_ARGBTORGB565DITHERROW_LSX +#define HAS_ARGBTOUVJROW_LSX +#define HAS_ARGBTOUV444ROW_LSX +#define HAS_ARGBTOUVROW_LSX +#define HAS_ARGBTOYJROW_LSX +#define HAS_ARGBMIRRORROW_LSX +#define HAS_ARGBMULTIPLYROW_LSX +#define HAS_BGRATOUVROW_LSX +#define HAS_BGRATOYROW_LSX +#define HAS_I400TOARGBROW_LSX +#define HAS_I444TOARGBROW_LSX +#define HAS_INTERPOLATEROW_LSX +#define HAS_I422ALPHATOARGBROW_LSX +#define HAS_I422TOARGB1555ROW_LSX +#define HAS_I422TOARGB4444ROW_LSX +#define HAS_I422TORGB24ROW_LSX +#define HAS_I422TORGB565ROW_LSX +#define HAS_I422TORGBAROW_LSX +#define HAS_I422TOUYVYROW_LSX +#define HAS_I422TOYUY2ROW_LSX +#define HAS_J400TOARGBROW_LSX +#define HAS_MERGEUVROW_LSX +#define HAS_MIRRORROW_LSX +#define HAS_MIRRORUVROW_LSX +#define HAS_MIRRORSPLITUVROW_LSX #define HAS_NV12TOARGBROW_LSX #define HAS_NV12TORGB565ROW_LSX #define HAS_NV21TOARGBROW_LSX +#define HAS_RAWTOARGBROW_LSX +#define HAS_RAWTORGB24ROW_LSX +#define HAS_RAWTOUVROW_LSX +#define HAS_RAWTOYROW_LSX +#define HAS_RGB24TOARGBROW_LSX +#define HAS_RGB24TOUVROW_LSX +#define HAS_RGB24TOYROW_LSX +#define HAS_RGB565TOARGBROW_LSX +#define HAS_RGB565TOUVROW_LSX +#define HAS_RGB565TOYROW_LSX +#define HAS_RGBATOUVROW_LSX +#define HAS_RGBATOYROW_LSX +#define HAS_SETROW_LSX #define HAS_SOBELROW_LSX #define HAS_SOBELTOPLANEROW_LSX #define HAS_SOBELXYROW_LSX -#define HAS_ARGBTOYJROW_LSX -#define HAS_BGRATOYROW_LSX -#define HAS_BGRATOUVROW_LSX -#define HAS_ABGRTOYROW_LSX -#define HAS_ABGRTOUVROW_LSX -#define HAS_RGBATOYROW_LSX -#define HAS_RGBATOUVROW_LSX -#define HAS_ARGBTOUVJROW_LSX -#define HAS_I444TOARGBROW_LSX -#define HAS_I400TOARGBROW_LSX -#define HAS_J400TOARGBROW_LSX -#define HAS_YUY2TOARGBROW_LSX -#define HAS_UYVYTOARGBROW_LSX -#define HAS_INTERPOLATEROW_LSX -#define HAS_ARGBSETROW_LSX -#define HAS_RAWTORGB24ROW_LSX -#define HAS_MERGEUVROW_LSX -#define HAS_ARGBEXTRACTALPHAROW_LSX -#define HAS_ARGBBLENDROW_LSX -#define HAS_ARGBQUANTIZEROW_LSX -#define HAS_ARGBCOLORMATRIXROW_LSX #define HAS_SPLITUVROW_LSX -#define HAS_SETROW_LSX -#define HAS_MIRRORSPLITUVROW_LSX +#define HAS_UYVYTOARGBROW_LSX +#define HAS_UYVYTOUV422ROW_LSX +#define HAS_UYVYTOUVROW_LSX +#define HAS_UYVYTOYROW_LSX +#define HAS_YUY2TOARGBROW_LSX +#define HAS_YUY2TOUVROW_LSX +#define HAS_YUY2TOUV422ROW_LSX +#define HAS_YUY2TOYROW_LSX +#define HAS_ARGBTOYROW_LSX +#define HAS_ABGRTOYJROW_LSX +#define HAS_RGBATOYJROW_LSX +#define HAS_RGB24TOYJROW_LSX +#define HAS_RAWTOYJROW_LSX +#endif + +#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) +#define HAS_I422TOARGBROW_LSX #endif #if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx) +#define HAS_ARGB1555TOARGBROW_LASX +#define HAS_ARGB1555TOUVROW_LASX +#define HAS_ARGB1555TOYROW_LASX +#define HAS_ARGB4444TOARGBROW_LASX +#define HAS_ARGBADDROW_LASX +#define HAS_ARGBATTENUATEROW_LASX +#define HAS_ARGBGRAYROW_LASX +#define HAS_ARGBMIRRORROW_LASX +#define HAS_ARGBMULTIPLYROW_LASX +#define HAS_ARGBSEPIAROW_LASX +#define HAS_ARGBSHADEROW_LASX +#define HAS_ARGBSHUFFLEROW_LASX +#define HAS_ARGBSUBTRACTROW_LASX +#define HAS_ARGBTOARGB1555ROW_LASX +#define HAS_ARGBTOARGB4444ROW_LASX +#define HAS_ARGBTORAWROW_LASX +#define HAS_ARGBTORGB24ROW_LASX +#define HAS_ARGBTORGB565DITHERROW_LASX +#define HAS_ARGBTORGB565ROW_LASX +#define HAS_ARGBTOUV444ROW_LASX +#define HAS_ARGBTOUVJROW_LASX +#define HAS_ARGBTOUVROW_LASX +#define HAS_ARGBTOYJROW_LASX +#define HAS_ARGBTOYROW_LASX +#define HAS_ABGRTOYJROW_LASX +#define HAS_ABGRTOYROW_LASX +#define HAS_I422ALPHATOARGBROW_LASX +#define HAS_I422TOARGB1555ROW_LASX +#define HAS_I422TOARGB4444ROW_LASX #define HAS_I422TOARGBROW_LASX +#define HAS_I422TORGB24ROW_LASX +#define HAS_I422TORGB565ROW_LASX #define HAS_I422TORGBAROW_LASX -#define HAS_I422ALPHATOARGBROW_LASX -#define HAS_I422TOYUY2ROW_LASX #define HAS_I422TOUYVYROW_LASX +#define HAS_I422TOYUY2ROW_LASX #define HAS_MIRRORROW_LASX #define HAS_MIRRORUVROW_LASX -#define HAS_ARGBMIRRORROW_LASX -#define HAS_I422TORGB24ROW_LASX -#define HAS_I422TORGB565ROW_LASX -#define HAS_I422TOARGB4444ROW_LASX -#define HAS_I422TOARGB1555ROW_LASX -#define HAS_YUY2TOUVROW_LASX -#define HAS_YUY2TOYROW_LASX -#define HAS_YUY2TOUV422ROW_LASX -#define HAS_UYVYTOYROW_LASX -#define HAS_UYVYTOUVROW_LASX -#define HAS_UYVYTOUV422ROW_LASX -#define HAS_ARGBTOYROW_LASX -#define HAS_ARGBTOUVROW_LASX -#define HAS_ARGBTORGB24ROW_LASX -#define HAS_ARGBTORAWROW_LASX -#define HAS_ARGBTORGB565ROW_LASX -#define HAS_ARGBTOARGB1555ROW_LASX -#define HAS_ARGBTOARGB4444ROW_LASX -#define HAS_ARGBTOUV444ROW_LASX -#define HAS_ARGBMULTIPLYROW_LASX -#define HAS_ARGBADDROW_LASX -#define HAS_ARGBSUBTRACTROW_LASX -#define HAS_ARGBATTENUATEROW_LASX -#define HAS_ARGBTORGB565DITHERROW_LASX -#define HAS_ARGBSHUFFLEROW_LASX -#define HAS_ARGBSHADEROW_LASX -#define HAS_ARGBGRAYROW_LASX -#define HAS_ARGBSEPIAROW_LASX -#define HAS_ARGB4444TOARGBROW_LASX -#define HAS_ARGB1555TOARGBROW_LASX -#define HAS_RGB565TOARGBROW_LASX -#define HAS_RGB24TOARGBROW_LASX -#define HAS_RAWTOARGBROW_LASX -#define HAS_ARGB1555TOYROW_LASX -#define HAS_ARGB1555TOUVROW_LASX -#define HAS_RGB565TOYROW_LASX -#define HAS_RGB565TOUVROW_LASX -#define HAS_RGB24TOYROW_LASX -#define HAS_RGB24TOUVROW_LASX -#define HAS_RAWTOYROW_LASX -#define HAS_RAWTOUVROW_LASX #define HAS_NV12TOARGBROW_LASX #define HAS_NV12TORGB565ROW_LASX #define HAS_NV21TOARGBROW_LASX -#define HAS_ARGBTOYJROW_LASX -#define HAS_ARGBTOUVJROW_LASX +#define HAS_RAWTOARGBROW_LASX +#define HAS_RAWTOUVROW_LASX +#define HAS_RAWTOYROW_LASX +#define HAS_RGB24TOARGBROW_LASX +#define HAS_RGB24TOUVROW_LASX +#define HAS_RGB24TOYROW_LASX +#define HAS_RGB565TOARGBROW_LASX +#define HAS_RGB565TOUVROW_LASX +#define HAS_RGB565TOYROW_LASX +#define HAS_UYVYTOUV422ROW_LASX +#define HAS_UYVYTOUVROW_LASX +#define HAS_UYVYTOYROW_LASX +#define HAS_YUY2TOUV422ROW_LASX +#define HAS_YUY2TOUVROW_LASX +#define HAS_YUY2TOYROW_LASX +#define HAS_RGBATOYROW_LASX +#define HAS_RGBATOYJROW_LASX +#define HAS_BGRATOYROW_LASX +#define HAS_RGB24TOYJROW_LASX +#define HAS_RAWTOYJROW_LASX +#endif + +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) +#define HAS_COPYROW_RVV +#if __riscv_v_intrinsic == 11000 +#define HAS_AB64TOARGBROW_RVV +#define HAS_ABGRTOYJROW_RVV +#define HAS_ABGRTOYROW_RVV +#define HAS_AR64TOARGBROW_RVV +#define HAS_AR64TOAB64ROW_RVV +#define HAS_ARGBATTENUATEROW_RVV +#define HAS_ARGBBLENDROW_RVV +#define HAS_ARGBCOPYYTOALPHAROW_RVV +#define HAS_ARGBEXTRACTALPHAROW_RVV +#define HAS_ARGBTOAB64ROW_RVV +#define HAS_ARGBTOABGRROW_RVV +#define HAS_ARGBTOAR64ROW_RVV +#define HAS_ARGBTOBGRAROW_RVV +#define HAS_ARGBTORAWROW_RVV +#define HAS_ARGBTORGB24ROW_RVV +#define HAS_ARGBTORGBAROW_RVV +#define HAS_ARGBTOYJROW_RVV +#define HAS_ARGBTOYMATRIXROW_RVV +#define HAS_ARGBTOYROW_RVV +#define HAS_BGRATOYROW_RVV +#define HAS_BLENDPLANEROW_RVV +#define HAS_I400TOARGBROW_RVV +#define HAS_I422ALPHATOARGBROW_RVV +#define HAS_I422TOARGBROW_RVV +#define HAS_I422TORGB24ROW_RVV +#define HAS_I422TORGBAROW_RVV +#define HAS_I444ALPHATOARGBROW_RVV +#define HAS_I444TOARGBROW_RVV +#define HAS_I444TORGB24ROW_RVV +#define HAS_INTERPOLATEROW_RVV +#define HAS_J400TOARGBROW_RVV +#define HAS_MERGEARGBROW_RVV +#define HAS_MERGERGBROW_RVV +#define HAS_MERGEUVROW_RVV +#define HAS_MERGEXRGBROW_RVV +#define HAS_NV12TOARGBROW_RVV +#define HAS_NV12TORGB24ROW_RVV +#define HAS_NV21TOARGBROW_RVV +#define HAS_NV21TORGB24ROW_RVV +#define HAS_RAWTOARGBROW_RVV +#define HAS_RAWTORGB24ROW_RVV +#define HAS_RAWTORGBAROW_RVV +#define HAS_RAWTOYJROW_RVV +#define HAS_RAWTOYROW_RVV +#define HAS_RGB24TOARGBROW_RVV +#define HAS_RGB24TOYJROW_RVV +#define HAS_RGB24TOYROW_RVV +#define HAS_RGBATOARGBROW_RVV +#define HAS_RGBATOYJROW_RVV +#define HAS_RGBATOYMATRIXROW_RVV +#define HAS_RGBATOYROW_RVV +#define HAS_RGBTOYMATRIXROW_RVV +#define HAS_SPLITARGBROW_RVV +#define HAS_SPLITRGBROW_RVV +#define HAS_SPLITUVROW_RVV +#define HAS_SPLITXRGBROW_RVV +#endif #endif #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) @@ -789,8 +914,8 @@ typedef uint32_t ulvec32[8]; typedef uint8_t ulvec8[32]; #endif -#if defined(__aarch64__) || defined(__arm__) -// This struct is for ARM color conversion. +#if defined(__aarch64__) || defined(__arm__) || defined(__riscv) +// This struct is for ARM and RISC-V color conversion. struct YuvConstants { uvec8 kUVCoeff; vec16 kRGBCoeffBias; @@ -816,13 +941,13 @@ struct YuvConstants { #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) -#define align_buffer_64(var, size) \ - uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \ - uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ +#define align_buffer_64(var, size) \ + void* var##_mem = malloc((size) + 63); /* NOLINT */ \ + uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */ #define free_aligned_buffer_64(var) \ free(var##_mem); \ - var = 0 + var = NULL #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP @@ -894,6 +1019,12 @@ void I444ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -981,6 +1112,50 @@ void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I444ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); void I444ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1000,6 +1175,12 @@ void I422ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422ToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1012,6 +1193,12 @@ void I422ToRGBARow_MSA(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422ToRGBARow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGBARow_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1025,6 +1212,13 @@ void I422AlphaToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422AlphaToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1038,6 +1232,12 @@ void I422ToRGB24Row_MSA(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422ToRGB24Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB24Row_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1050,6 +1250,12 @@ void I422ToRGB565Row_MSA(const uint8_t* src_y, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); +void I422ToRGB565Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB565Row_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1062,6 +1268,12 @@ void I422ToARGB4444Row_MSA(const uint8_t* src_y, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); +void I422ToARGB4444Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGB4444Row_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1074,6 +1286,12 @@ void I422ToARGB1555Row_MSA(const uint8_t* src_y, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); +void I422ToARGB1555Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGB1555Row_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1142,15 +1360,39 @@ void UYVYToARGBRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void NV12ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width); void ABGRToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void ABGRToYJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void ABGRToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGBAToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); @@ -1164,13 +1406,23 @@ void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width); void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width); void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); -void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); -void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width); +void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width); +void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width); +void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width); +void ABGRToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width); +void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width); void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width); +void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width); void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width); +void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width); void ARGBToUV444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, @@ -1189,11 +1441,20 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); +void ARGBToUVRow_LSX(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUVRow_LASX(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); +void ARGBToUV444Row_LSX(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUV444Row_LASX(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, @@ -1203,6 +1464,11 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); +void ABGRToUVJRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_uj, + uint8_t* dst_vj, + int width); void BGRAToUVRow_NEON(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_u, @@ -1258,6 +1524,11 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); +void ABGRToUVJRow_MSA(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void BGRAToUVRow_MSA(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, @@ -1372,6 +1643,13 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width); +void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); +void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width); void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); @@ -1384,6 +1662,8 @@ void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width); void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width); void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width); void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width); +void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); +void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width); void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555, uint8_t* dst_y, int width); @@ -1393,9 +1673,15 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width); void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); +void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width); +void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width); void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void ABGRToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); void BGRAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); void ABGRToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); @@ -1409,6 +1695,7 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width); void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -1423,6 +1710,7 @@ void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -1453,10 +1741,15 @@ void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -1465,7 +1758,14 @@ void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void BGRAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -1485,6 +1785,11 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); +void ABGRToUVJRow_AVX2(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUVRow_SSSE3(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -1495,6 +1800,11 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); +void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void BGRAToUVRow_SSSE3(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_u, @@ -1525,6 +1835,11 @@ void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void ABGRToUVJRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, @@ -1535,6 +1850,11 @@ void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void ABGRToUVJRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, @@ -1568,11 +1888,20 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void ARGBToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUVRow_Any_LASX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void ARGBToUV444Row_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUV444Row_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -1582,6 +1911,11 @@ void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void ABGRToUVJRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, @@ -1747,16 +2081,16 @@ void ARGBToUVJRow_C(const uint8_t* src_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); +void ABGRToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_C(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void BGRAToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, @@ -1772,6 +2106,11 @@ void RGBAToUVRow_C(const uint8_t* src_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); +void RGBAToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void RGB24ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, @@ -1826,6 +2165,7 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -1833,17 +2173,20 @@ void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorSplitUVRow_SSSE3(const uint8_t* src, @@ -1867,10 +2210,13 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_v, int width); +void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width); + void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, @@ -1883,6 +2229,7 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBMirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -1925,6 +2272,10 @@ void SplitUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); +void SplitUVRow_RVV(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -1949,7 +2300,6 @@ void DetileRow_C(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, int width); - void DetileRow_NEON(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, @@ -1966,6 +2316,42 @@ void DetileRow_Any_SSE2(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, int width); +void DetileRow_AVX(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_Any_AVX(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_16_C(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_Any_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_SSE2(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_Any_SSE2(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_AVX(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); +void DetileRow_16_Any_AVX(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width); void DetileSplitUVRow_C(const uint8_t* src_uv, ptrdiff_t src_tile_stride, uint8_t* dst_u, @@ -1991,6 +2377,38 @@ void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); +void DetileToYUY2_C(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width); +void DetileToYUY2_SSE2(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width); +void DetileToYUY2_Any_SSE2(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width); +void DetileToYUY2_NEON(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width); +void DetileToYUY2_Any_NEON(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width); +void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size); +void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size); void MergeUVRow_C(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, @@ -2003,6 +2421,10 @@ void MergeUVRow_AVX2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); +void MergeUVRow_AVX512BW(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); void MergeUVRow_NEON(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, @@ -2015,6 +2437,10 @@ void MergeUVRow_LSX(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); +void MergeUVRow_RVV(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); void MergeUVRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -2023,6 +2449,10 @@ void MergeUVRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); +void MergeUVRow_Any_AVX512BW(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); void MergeUVRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -2079,6 +2509,11 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb, uint8_t* dst_g, uint8_t* dst_b, int width); +void SplitRGBRow_RVV(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, @@ -2105,6 +2540,11 @@ void MergeRGBRow_NEON(const uint8_t* src_r, const uint8_t* src_b, uint8_t* dst_rgb, int width); +void MergeRGBRow_RVV(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2139,6 +2579,12 @@ void MergeARGBRow_NEON(const uint8_t* src_r, const uint8_t* src_a, uint8_t* dst_argb, int width); +void MergeARGBRow_RVV(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); void MergeARGBRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2187,6 +2633,12 @@ void SplitARGBRow_NEON(const uint8_t* src_rgba, uint8_t* dst_b, uint8_t* dst_a, int width); +void SplitARGBRow_RVV(const uint8_t* src_rgba, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, @@ -2231,6 +2683,11 @@ void MergeXRGBRow_NEON(const uint8_t* src_r, const uint8_t* src_b, uint8_t* dst_argb, int width); +void MergeXRGBRow_RVV(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2271,6 +2728,11 @@ void SplitXRGBRow_NEON(const uint8_t* src_rgba, uint8_t* dst_g, uint8_t* dst_b, int width); +void SplitXRGBRow_RVV(const uint8_t* src_rgba, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, @@ -2604,8 +3066,8 @@ void Convert16To8Row_NEON(const uint16_t* src_y, uint8_t* dst_y, int scale, int width); -void Convert16To8Row_Any_NEON(const uint16_t* src_y, - uint8_t* dst_y, +void Convert16To8Row_Any_NEON(const uint16_t* src_ptr, + uint8_t* dst_ptr, int scale, int width); @@ -2614,6 +3076,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width); void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width); void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width); void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count); +void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int count); void CopyRow_C(const uint8_t* src, uint8_t* dst, int count); void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -2647,6 +3110,9 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb, uint8_t* dst_a, int width); +void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb, + uint8_t* dst_a, + int width); void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -2666,6 +3132,7 @@ void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr, void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -2713,6 +3180,10 @@ void ARGBShuffleRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); +void ARGBShuffleRow_LSX(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); void ARGBShuffleRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, @@ -2733,6 +3204,10 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, int width); +void ARGBShuffleRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); void ARGBShuffleRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, @@ -2765,14 +3240,18 @@ void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); @@ -2932,15 +3411,15 @@ void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, uint8_t* dst, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, uint8_t* dst, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); @@ -2968,7 +3447,7 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, int width); void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); @@ -2981,23 +3460,44 @@ void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, int width); void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, + int width); +void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb, + uint8_t* dst_rgb, + uint32_t dither4, int width); void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width); +void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_LSX(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); void ARGBToARGB1555Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB4444Row_LSX(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); void ARGBToARGB4444Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width); +void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width); +void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width); +void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width); + +void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width); +void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width); void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); @@ -3011,6 +3511,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width); void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width); void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width); void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width); +void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width); void AR64ShuffleRow_C(const uint8_t* src_ar64, uint8_t* dst_ar64, const uint8_t* shuffler, @@ -3035,6 +3537,12 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width); void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width); void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width); void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width); +void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width); +void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width); +void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width); +void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width); void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr, uint16_t* dst_ptr, int width); @@ -3077,6 +3585,7 @@ void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, @@ -3096,6 +3605,12 @@ void I444ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -3290,6 +3805,18 @@ void I444ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void I444ToRGB24Row_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -3631,12 +4158,24 @@ void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -3823,13 +4362,13 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToYUV24Row_Any_SSSE3(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* dst_yuv24, +void NV21ToYUV24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); -void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* dst_yuv24, +void NV21ToYUV24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, @@ -3976,6 +4515,10 @@ void I400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I400ToARGBRow_RVV(const uint8_t* src_y, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* param, @@ -4014,6 +4557,10 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); +void ARGBBlendRow_RVV(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); void ARGBBlendRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, @@ -4040,6 +4587,11 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); +void BlendPlaneRow_RVV(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width); void BlendPlaneRow_C(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, @@ -4084,10 +4636,18 @@ void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); +void ARGBMultiplyRow_LSX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); void ARGBMultiplyRow_LASX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); +void ARGBMultiplyRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); void ARGBMultiplyRow_Any_LASX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -4130,10 +4690,18 @@ void ARGBAddRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); +void ARGBAddRow_LSX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); void ARGBAddRow_LASX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); +void ARGBAddRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); void ARGBAddRow_Any_LASX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -4177,10 +4745,18 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); +void ARGBSubtractRow_LSX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); void ARGBSubtractRow_LASX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); +void ARGBSubtractRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); void ARGBSubtractRow_Any_LASX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -4273,21 +4849,37 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, int width); +void ARGBToRGB565DitherRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); void ARGBToRGB565DitherRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, int width); - +void ARGBToRGB24Row_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ARGBToRGB24Row_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRAWRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRAWRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ARGBToRGB565Row_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToARGB1555Row_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ARGBToARGB1555Row_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToARGB4444Row_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4298,6 +4890,12 @@ void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4443,6 +5041,12 @@ void I422ToARGBRow_Any_MSA(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToARGBRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4455,6 +5059,12 @@ void I422ToRGBARow_Any_MSA(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToRGBARow_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGBARow_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4468,6 +5078,13 @@ void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422AlphaToARGBRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4481,6 +5098,12 @@ void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToRGB24Row_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4493,6 +5116,12 @@ void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToRGB565Row_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4505,6 +5134,12 @@ void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToARGB4444Row_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4517,6 +5152,12 @@ void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToARGB1555Row_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4592,6 +5233,10 @@ void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToNVUVRow_AVX2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width); void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, @@ -4602,6 +5247,10 @@ void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToNVUVRow_SSE2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width); void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, @@ -4612,17 +5261,27 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width); void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToUVRow_LSX(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void YUY2ToUVRow_LASX(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, @@ -4632,6 +5291,10 @@ void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, @@ -4642,6 +5305,10 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToNVUVRow_C(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_uv, + int width); void YUY2ToUV422Row_C(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, @@ -4652,6 +5319,10 @@ void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToNVUVRow_Any_AVX2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width); void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -4662,6 +5333,10 @@ void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToNVUVRow_Any_SSE2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width); void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -4672,17 +5347,27 @@ void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToNVUVRow_Any_NEON(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width); void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, @@ -4692,6 +5377,10 @@ void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void YUY2ToUV422Row_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -4737,12 +5426,18 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, uint8_t* dst_v, int width); void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToYRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_MSA(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); +void UYVYToUVRow_LSX(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void UYVYToUVRow_LASX(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, @@ -4752,6 +5447,10 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); +void UYVYToUV422Row_LSX(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void UYVYToUV422Row_LASX(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, @@ -4798,12 +5497,18 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_v, int width); void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void UYVYToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, @@ -4813,6 +5518,10 @@ void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void UYVYToUV422Row_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void UYVYToUV422Row_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -4927,6 +5636,11 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y, const uint8_t* src_v, uint8_t* dst_yuy2, int width); +void I422ToYUY2Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); void I422ToYUY2Row_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -4937,6 +5651,11 @@ void I422ToUYVYRow_MSA(const uint8_t* src_y, const uint8_t* src_v, uint8_t* dst_uyvy, int width); +void I422ToUYVYRow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); void I422ToUYVYRow_LASX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -4947,6 +5666,11 @@ void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); +void I422ToYUY2Row_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4957,6 +5681,11 @@ void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); +void I422ToUYVYRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4977,9 +5706,15 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, void ARGBAttenuateRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBAttenuateRow_LSX(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); void ARGBAttenuateRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBAttenuateRow_RVV(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4992,6 +5727,9 @@ void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBAttenuateRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ARGBAttenuateRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -5018,12 +5756,14 @@ void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBSepiaRow_C(uint8_t* dst_argb, int width); void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); +void ARGBSepiaRow_LSX(uint8_t* dst_argb, int width); void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width); void ARGBColorMatrixRow_C(const uint8_t* src_argb, @@ -5103,6 +5843,10 @@ void ARGBShadeRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value); +void ARGBShadeRow_LSX(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); void ARGBShadeRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width, @@ -5175,6 +5919,11 @@ void InterpolateRow_LSX(uint8_t* dst_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); +void InterpolateRow_RVV(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); void InterpolateRow_Any_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, @@ -5482,7 +6231,19 @@ void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr, float* dst_ptr, float param, int width); - +// Convert FP16 Half Floats to FP32 Floats +void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16 + float* dst, + int width); +// Convert a column of FP16 Half Floats to a row of FP32 Floats +void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16 + int src_stride, // stride in elements + float* dst, + int width); +// Convert FP32 Floats to FP16 Half Floats +void ConvertFP32ToFP16Row_NEON(const float* src, + uint16_t* dst, // fp16 + int width); void ARGBLumaColorTableRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width, @@ -5526,6 +6287,17 @@ void GaussCol_F32_C(const float* src0, float* dst, int width); +void GaussRow_C(const uint32_t* src, uint16_t* dst, int width); +void GaussCol_C(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width); + +void ClampFloatToZero_SSE2(const float* src_x, float* dst_y, int width); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/include/libyuv/scale.h b/include/libyuv/scale.h index 443f89c2..bfe4a344 100644 --- a/files/include/libyuv/scale.h +++ b/include/libyuv/scale.h @@ -27,39 +27,40 @@ typedef enum FilterMode { } FilterModeEnum; // Scale a YUV plane. +// Returns 0 if successful. LIBYUV_API -void ScalePlane(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); LIBYUV_API -void ScalePlane_16(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); // Sample is expected to be in the low 12 bits. LIBYUV_API -void ScalePlane_12(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering); +int ScalePlane_12(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); // Scales a YUV 4:2:0 image from the src width and height to the // dst width and height. diff --git a/files/include/libyuv/scale_argb.h b/include/libyuv/scale_argb.h index 7641f18e..7641f18e 100644 --- a/files/include/libyuv/scale_argb.h +++ b/include/libyuv/scale_argb.h diff --git a/files/include/libyuv/scale_rgb.h b/include/libyuv/scale_rgb.h index d17c39fd..d17c39fd 100644 --- a/files/include/libyuv/scale_rgb.h +++ b/include/libyuv/scale_rgb.h diff --git a/files/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 6cb5e128..02ed61ca 100644 --- a/files/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -29,7 +29,10 @@ extern "C" { #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) -#if __has_feature(memory_sanitizer) +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON) +#define LIBYUV_DISABLE_NEON +#endif +#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86) #define LIBYUV_DISABLE_X86 #endif #endif @@ -133,6 +136,8 @@ extern "C" { #define HAS_SCALEROWDOWN34_NEON #define HAS_SCALEROWDOWN38_NEON #define HAS_SCALEROWDOWN4_NEON +#define HAS_SCALEUVROWDOWN2_NEON +#define HAS_SCALEUVROWDOWN2LINEAR_NEON #define HAS_SCALEUVROWDOWN2BOX_NEON #define HAS_SCALEUVROWDOWNEVEN_NEON #define HAS_SCALEROWUP2_LINEAR_NEON @@ -173,6 +178,38 @@ extern "C" { #define HAS_SCALEROWDOWN34_LSX #endif +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) +#define HAS_SCALEADDROW_RVV +// TODO: Test ScaleARGBRowDownEven_RVV and enable it +// #define HAS_SCALEARGBROWDOWNEVEN_RVV +#define HAS_SCALEUVROWDOWN4_RVV +#define HAS_SCALEUVROWDOWNEVEN_RVV +#if __riscv_v_intrinsic == 11000 +#define HAS_SCALEARGBROWDOWN2_RVV +#define HAS_SCALEARGBROWDOWN2BOX_RVV +#define HAS_SCALEARGBROWDOWN2LINEAR_RVV +#define HAS_SCALEARGBROWDOWNEVENBOX_RVV +#define HAS_SCALEROWDOWN2_RVV +#define HAS_SCALEROWDOWN2BOX_RVV +#define HAS_SCALEROWDOWN2LINEAR_RVV +#define HAS_SCALEROWDOWN34_0_BOX_RVV +#define HAS_SCALEROWDOWN34_1_BOX_RVV +#define HAS_SCALEROWDOWN34_RVV +#define HAS_SCALEROWDOWN38_2_BOX_RVV +#define HAS_SCALEROWDOWN38_3_BOX_RVV +#define HAS_SCALEROWDOWN38_RVV +#define HAS_SCALEROWDOWN4_RVV +#define HAS_SCALEROWDOWN4BOX_RVV +#define HAS_SCALEROWUP2_BILINEAR_RVV +#define HAS_SCALEROWUP2_LINEAR_RVV +#define HAS_SCALEUVROWDOWN2_RVV +#define HAS_SCALEUVROWDOWN2BOX_RVV +#define HAS_SCALEUVROWDOWN2LINEAR_RVV +#define HAS_SCALEUVROWUP2_BILINEAR_RVV +#define HAS_SCALEUVROWUP2_LINEAR_RVV +#endif +#endif + // Scale ARGB vertically with bilinear interpolation. void ScalePlaneVertical(int src_height, int dst_width, @@ -214,6 +251,17 @@ void ScalePlaneVertical_16To8(int src_height, int scale, enum FilterMode filtering); +void ScalePlaneDown2_16To8(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint8_t* dst_ptr, + int scale, + enum FilterMode filtering); + // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, @@ -259,6 +307,16 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); +void ScaleRowDown2_16To8_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale); +void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale); void ScaleRowDown2Linear_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, @@ -267,6 +325,16 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); +void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale); +void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale); void ScaleRowDown2Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, @@ -279,6 +347,16 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); +void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale); +void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale); void ScaleRowDown4_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, @@ -906,6 +984,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); +void ScaleARGBRowDown2_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, @@ -1018,6 +1108,16 @@ void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb, int src_stepx, uint8_t* dst_argb, int dst_width); +void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, @@ -1100,6 +1200,18 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width); +void ScaleUVRowDown2_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Linear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -1160,6 +1272,16 @@ void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr, int src_stepx, uint8_t* dst_uv, int dst_width); +void ScaleUVRowDown4_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_uv, + int dst_width); void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, int32_t src_stepx, @@ -1249,6 +1371,14 @@ void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width); +void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width); @@ -1701,6 +1831,61 @@ void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width); +void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleRowDown2_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); + +void ScaleRowDown4_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); + +void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/include/libyuv/scale_uv.h b/include/libyuv/scale_uv.h index 8e74e319..8e74e319 100644 --- a/files/include/libyuv/scale_uv.h +++ b/include/libyuv/scale_uv.h diff --git a/files/include/libyuv/version.h b/include/libyuv/version.h index a85be048..a9c54400 100644 --- a/files/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1837 +#define LIBYUV_VERSION 1883 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/files/include/libyuv/video_common.h b/include/libyuv/video_common.h index 32b8a521..32b8a521 100644 --- a/files/include/libyuv/video_common.h +++ b/include/libyuv/video_common.h diff --git a/infra/config/OWNERS b/infra/config/OWNERS new file mode 100644 index 00000000..2c4f90a0 --- /dev/null +++ b/infra/config/OWNERS @@ -0,0 +1,3 @@ +fbarchard@chromium.org +mbonadei@chromium.org +jansson@google.com diff --git a/files/infra/config/PRESUBMIT.py b/infra/config/PRESUBMIT.py index 01ec0eed..f79e08ad 100644 --- a/files/infra/config/PRESUBMIT.py +++ b/infra/config/PRESUBMIT.py @@ -2,6 +2,8 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. +USE_PYTHON3 = True + def CheckChangeOnUpload(input_api, output_api): return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api) diff --git a/files/infra/config/README.md b/infra/config/README.md index e5e3b5f8..e5e3b5f8 100644 --- a/files/infra/config/README.md +++ b/infra/config/README.md diff --git a/files/infra/config/codereview.settings b/infra/config/codereview.settings index 6d742273..6d742273 100644 --- a/files/infra/config/codereview.settings +++ b/infra/config/codereview.settings diff --git a/files/infra/config/commit-queue.cfg b/infra/config/commit-queue.cfg index 4a8d77f4..4a8d77f4 100644 --- a/files/infra/config/commit-queue.cfg +++ b/infra/config/commit-queue.cfg diff --git a/files/infra/config/cr-buildbucket.cfg b/infra/config/cr-buildbucket.cfg index 061cf33b..7415851b 100644 --- a/files/infra/config/cr-buildbucket.cfg +++ b/infra/config/cr-buildbucket.cfg @@ -29,10 +29,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -60,10 +59,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -91,10 +89,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -111,7 +108,7 @@ buckets { name: "Android Tester ARM32 Debug (Nexus 5X)" swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" - dimensions: "device_type:bullhead" + dimensions: "device_type:walleye" dimensions: "pool:luci.flex.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -120,9 +117,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -139,7 +136,7 @@ buckets { name: "Android Tester ARM32 Release (Nexus 5X)" swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" - dimensions: "device_type:bullhead" + dimensions: "device_type:walleye" dimensions: "pool:luci.flex.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -148,9 +145,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -167,7 +164,7 @@ buckets { name: "Android Tester ARM64 Debug (Nexus 5X)" swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" - dimensions: "device_type:bullhead" + dimensions: "device_type:walleye" dimensions: "pool:luci.flex.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -176,9 +173,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -206,10 +203,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -237,10 +233,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -268,10 +263,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -299,10 +293,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -330,10 +323,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -361,10 +353,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -392,10 +383,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -423,10 +413,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -454,10 +443,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -485,10 +473,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -516,10 +503,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -537,7 +523,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -546,9 +532,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -566,7 +552,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -575,9 +561,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -595,7 +581,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -604,9 +590,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -634,10 +620,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -665,10 +650,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -696,10 +680,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -727,10 +710,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -758,10 +740,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -789,10 +770,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -820,10 +800,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -851,10 +830,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -872,7 +850,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -881,9 +859,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -901,7 +879,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -910,9 +888,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-trusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "client.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -985,7 +963,7 @@ buckets { name: "android" swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" - dimensions: "device_type:bullhead" + dimensions: "device_type:walleye" dimensions: "pool:luci.flex.try" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -994,9 +972,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1013,7 +991,7 @@ buckets { name: "android_arm64" swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" - dimensions: "device_type:bullhead" + dimensions: "device_type:walleye" dimensions: "pool:luci.flex.try" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -1022,9 +1000,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1041,7 +1019,7 @@ buckets { name: "android_rel" swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" - dimensions: "device_type:bullhead" + dimensions: "device_type:walleye" dimensions: "pool:luci.flex.try" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -1050,9 +1028,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1080,10 +1058,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1111,10 +1088,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1132,7 +1108,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.try" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -1141,9 +1117,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1161,7 +1137,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.try" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -1170,9 +1146,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1200,10 +1176,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1231,10 +1206,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1262,10 +1236,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1293,10 +1266,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1324,10 +1296,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1355,10 +1326,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1386,10 +1356,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1417,10 +1386,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1438,7 +1406,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.try" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -1447,9 +1415,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1467,7 +1435,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.try" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -1476,9 +1444,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1496,7 +1464,7 @@ buckets { swarming_host: "chromium-swarm.appspot.com" swarming_tags: "vpython:native-python-wrapper" dimensions: "cpu:x86-64" - dimensions: "os:Mac-10.15" + dimensions: "os:Mac-12" dimensions: "pool:luci.flex.try" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -1505,9 +1473,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1535,10 +1503,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": true,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "run_presubmit",' @@ -1568,10 +1535,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1599,10 +1565,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1630,10 +1595,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1661,10 +1625,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1692,10 +1655,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' @@ -1723,10 +1685,9 @@ buckets { } properties: '{' - ' "$build/goma": {' - ' "enable_ats": false,' - ' "server_host": "goma.chromium.org",' - ' "use_luci_auth": true' + ' "$build/reclient": {' + ' "instance": "rbe-webrtc-untrusted",' + ' "metrics_project": "chromium-reclient-metrics"' ' },' ' "builder_group": "tryserver.libyuv",' ' "recipe": "libyuv/libyuv"' diff --git a/files/infra/config/luci-logdog.cfg b/infra/config/luci-logdog.cfg index adc75bef..adc75bef 100644 --- a/files/infra/config/luci-logdog.cfg +++ b/infra/config/luci-logdog.cfg diff --git a/files/infra/config/luci-milo.cfg b/infra/config/luci-milo.cfg index baf786f2..baf786f2 100644 --- a/files/infra/config/luci-milo.cfg +++ b/infra/config/luci-milo.cfg diff --git a/files/infra/config/luci-scheduler.cfg b/infra/config/luci-scheduler.cfg index 0ec5dd0e..0ec5dd0e 100644 --- a/files/infra/config/luci-scheduler.cfg +++ b/infra/config/luci-scheduler.cfg diff --git a/files/infra/config/main.star b/infra/config/main.star index b922ca02..e83afe4f 100755 --- a/files/infra/config/main.star +++ b/infra/config/main.star @@ -8,22 +8,14 @@ lucicfg.check_version("1.30.9") LIBYUV_GIT = "https://chromium.googlesource.com/libyuv/libyuv" LIBYUV_GERRIT = "https://chromium-review.googlesource.com/libyuv/libyuv" -GOMA_BACKEND_RBE_PROD = { - "server_host": "goma.chromium.org", - "use_luci_auth": True, +RECLIENT_CI = { + "instance": "rbe-webrtc-trusted", + "metrics_project": "chromium-reclient-metrics", } -GOMA_BACKEND_RBE_ATS_PROD = { - "server_host": "goma.chromium.org", - "use_luci_auth": True, - "enable_ats": True, -} - -# Disable ATS on Windows CQ/try. -GOMA_BACKEND_RBE_NO_ATS_PROD = { - "server_host": "goma.chromium.org", - "use_luci_auth": True, - "enable_ats": False, +RECLIENT_CQ = { + "instance": "rbe-webrtc-untrusted", + "metrics_project": "chromium-reclient-metrics", } # Use LUCI Scheduler BBv2 names and add Scheduler realms configs. @@ -70,6 +62,10 @@ luci.project( ], bindings = [ luci.binding( + roles = "role/swarming.taskTriggerer", # for LED tasks. + groups = "project-libyuv-admins", + ), + luci.binding( roles = "role/configs.validator", users = "libyuv-try-builder@chops-service-accounts.iam.gserviceaccount.com", ), @@ -195,28 +191,15 @@ luci.bucket( def get_os_dimensions(os): if os == "android": - return {"device_type": "bullhead"} + return {"device_type": "walleye"} if os == "ios" or os == "mac": - return {"os": "Mac-10.15", "cpu": "x86-64"} + return {"os": "Mac-12", "cpu": "x86-64"} elif os == "win": return {"os": "Windows-10", "cores": "8", "cpu": "x86-64"} elif os == "linux": return {"os": "Ubuntu-18.04", "cores": "8", "cpu": "x86-64"} return {} -def get_os_properties(os, try_builder = False): - if os == "android": - return {"$build/goma": GOMA_BACKEND_RBE_PROD} - elif os in ("ios", "mac"): - return {"$build/goma": GOMA_BACKEND_RBE_PROD} - elif os == "win" and try_builder: - return {"$build/goma": GOMA_BACKEND_RBE_NO_ATS_PROD} - elif os == "win": - return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD} - elif os == "linux": - return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD} - return {} - def libyuv_ci_builder(name, dimensions, properties, triggered_by): return luci.builder( name = name, @@ -254,7 +237,7 @@ def libyuv_try_builder(name, dimensions, properties, recipe_name = "libyuv/libyu def ci_builder(name, os, category, short_name = None): dimensions = get_os_dimensions(os) - properties = get_os_properties(os) + properties = {"$build/reclient": RECLIENT_CI} dimensions["pool"] = "luci.flex.ci" properties["builder_group"] = "client.libyuv" @@ -265,7 +248,7 @@ def ci_builder(name, os, category, short_name = None): def try_builder(name, os, experiment_percentage = None): dimensions = get_os_dimensions(os) - properties = get_os_properties(os, try_builder = True) + properties = {"$build/reclient": RECLIENT_CQ} dimensions["pool"] = "luci.flex.try" properties["builder_group"] = "tryserver.libyuv" diff --git a/files/infra/config/project.cfg b/infra/config/project.cfg index 700226ad..3c327118 100644 --- a/files/infra/config/project.cfg +++ b/infra/config/project.cfg @@ -7,7 +7,7 @@ name: "libyuv" access: "group:all" lucicfg { - version: "1.30.9" + version: "1.39.14" package_dir: "." config_dir: "." entry_point: "main.star" diff --git a/files/infra/config/realms.cfg b/infra/config/realms.cfg index ae04529e..16ffaac9 100644 --- a/files/infra/config/realms.cfg +++ b/infra/config/realms.cfg @@ -38,6 +38,10 @@ realms { role: "role/scheduler.reader" principals: "group:all" } + bindings { + role: "role/swarming.taskTriggerer" + principals: "group:project-libyuv-admins" + } } realms { name: "ci" diff --git a/files/libyuv.gni b/libyuv.gni index 8df40ba2..343160c3 100644 --- a/files/libyuv.gni +++ b/libyuv.gni @@ -6,13 +6,15 @@ # in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. -import("//build_overrides/build.gni") import("//build/config/arm.gni") +import("//build/config/loongarch64.gni") import("//build/config/mips.gni") +import("//build_overrides/build.gni") declare_args() { libyuv_include_tests = !build_with_chromium libyuv_disable_jpeg = false + libyuv_disable_rvv = false libyuv_use_neon = current_cpu == "arm64" || (current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)) @@ -20,4 +22,8 @@ declare_args() { (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa libyuv_use_mmi = (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi + libyuv_use_lsx = + (current_cpu == "loong64") && loongarch64_use_lsx + libyuv_use_lasx = + (current_cpu == "loong64") && loongarch64_use_lasx } diff --git a/files/libyuv.gyp b/libyuv.gyp index f73a1a4b..f73a1a4b 100644 --- a/files/libyuv.gyp +++ b/libyuv.gyp diff --git a/files/libyuv.gypi b/libyuv.gypi index 48936aa7..48936aa7 100644 --- a/files/libyuv.gypi +++ b/libyuv.gypi diff --git a/files/linux.mk b/linux.mk index b541b47c..d19a888a 100644 --- a/files/linux.mk +++ b/linux.mk @@ -33,6 +33,7 @@ LOCAL_OBJ_FILES := \ source/rotate_argb.o \ source/rotate_common.o \ source/rotate_gcc.o \ + source/rotate_lsx.o \ source/rotate_msa.o \ source/rotate_neon.o \ source/rotate_neon64.o \ @@ -40,19 +41,24 @@ LOCAL_OBJ_FILES := \ source/row_any.o \ source/row_common.o \ source/row_gcc.o \ + source/row_lasx.o \ + source/row_lsx.o \ source/row_msa.o \ source/row_neon.o \ source/row_neon64.o \ + source/row_rvv.o \ source/row_win.o \ source/scale.o \ source/scale_any.o \ source/scale_argb.o \ source/scale_common.o \ source/scale_gcc.o \ + source/scale_lsx.o \ source/scale_msa.o \ source/scale_neon.o \ source/scale_neon64.o \ source/scale_rgb.o \ + source/scale_rvv.o \ source/scale_uv.o \ source/scale_win.o \ source/video_common.o @@ -3,7 +3,7 @@ # Note that dependencies on NDK are not directly listed since NDK auto adds # them. -LIBYUV_INCLUDES := $(LIBYUV_PATH)/files/include +LIBYUV_INCLUDES := $(LIBYUV_PATH)/include LIBYUV_C_FLAGS := diff --git a/files/pylintrc b/pylintrc index b8bea334..b8bea334 100644 --- a/files/pylintrc +++ b/pylintrc diff --git a/riscv_script/prepare_toolchain_qemu.sh b/riscv_script/prepare_toolchain_qemu.sh new file mode 100755 index 00000000..2a901739 --- /dev/null +++ b/riscv_script/prepare_toolchain_qemu.sh @@ -0,0 +1,74 @@ +#!/bin/bash +set -ev + +# Download & build RISC-V Clang toolchain & QEMU emulator. +# RISC-V Clang is for cross compile with the RISC-V Vector ISA. +# RISC-V QEMU is used to run the test suite. +# +# Requirements: Linux host w/ working C++ compiler, git, cmake, ninja, wget, tar + +# NOTE: this script must be run from the top-level directory of the LIBYUV_SRC_DIR. + +RISCV_TRIPLE="riscv64-unknown-linux-gnu" +RISCV_QEMU="qemu-riscv64" + +LIBYUV_SRC_DIR=$(pwd) +BUILD_DIR="$LIBYUV_SRC_DIR"/build-toolchain-qemu +INSTALL_QEMU="$BUILD_DIR"/riscv-qemu +INSTALL_CLANG="$BUILD_DIR"/riscv-clang + +LLVM_VERSION="16.0.0" +LLVM_NAME=llvm-project-"$LLVM_VERSION".src + +RISCV_GNU_TOOLCHAIN="$BUILD_DIR"/riscv-gnu-toolchain +RISCV_CLANG_TOOLCHAIN="$BUILD_DIR"/"$LLVM_NAME" + +QEMU_NAME="qemu-7.0.0" + +mkdir -p "$BUILD_DIR" +cd "$BUILD_DIR" + +# Download and install RISC-V GNU Toolchain (needed to build Clang) +if [ ! -d "$RISCV_GNU_TOOLCHAIN" ] +then + git clone git@github.com:riscv/riscv-gnu-toolchain.git + pushd "$RISCV_GNU_TOOLCHAIN" + git submodule update --init --recursive + ./configure --with-cmodel=medany --prefix="$INSTALL_CLANG" + ionice nice make linux -j `nproc` install + popd +fi + +# Download Clang toolchain & build cross compiler +if [ ! -d "$RISCV_CLANG_TOOLCHAIN" ] +then + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-"$LLVM_VERSION"/"$LLVM_NAME".tar.xz + tar xvJf "$LLVM_NAME".tar.xz + pushd "$RISCV_CLANG_TOOLCHAIN" + cmake -DCMAKE_INSTALL_PREFIX="$INSTALL_CLANG" \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_TARGETS_TO_BUILD="RISCV" \ + -DLLVM_ENABLE_PROJECTS="clang" \ + -DLLVM_DEFAULT_TARGET_TRIPLE="$RISCV_TRIPLE" \ + -DLLVM_INSTALL_TOOLCHAIN_ONLY=On \ + -DDEFAULT_SYSROOT=../sysroot \ + -G "Ninja" "$RISCV_CLANG_TOOLCHAIN"/llvm + ionice nice ninja -j `nproc` + ionice nice ninja -j `nproc` install + popd + pushd "$INSTALL_CLANG"/bin + ln -sf clang "$RISCV_TRIPLE"-clang + ln -sf clang++ "$RISCV_TRIPLE"-clang++ + popd +fi + +# Download QEMU and build the riscv64 Linux usermode emulator +if [ ! -d "$QEMU_NAME" ] +then + wget https://download.qemu.org/"$QEMU_NAME".tar.xz + tar xvJf "$QEMU_NAME".tar.xz + pushd "$QEMU_NAME" + ./configure --target-list=riscv64-linux-user --prefix="$INSTALL_QEMU" + ionice nice make -j `nproc` install + popd +fi diff --git a/riscv_script/riscv-clang.cmake b/riscv_script/riscv-clang.cmake new file mode 100644 index 00000000..e287941f --- /dev/null +++ b/riscv_script/riscv-clang.cmake @@ -0,0 +1,55 @@ +set(CMAKE_CROSSCOMPILING TRUE) +set(CMAKE_SYSTEM_NAME "Linux") +set(CMAKE_SYSTEM_PROCESSOR "riscv64") + +option(USE_RVV "Enable riscv vector or not." ON) +option(USE_AUTO_VECTORIZER "Enable riscv auto vectorizer or not." OFF) + +# Avoid to use system path for cross-compile +set(CMAKE_FIND_USE_CMAKE_SYSTEM_PATH FALSE) + +set(TOOLCHAIN_PATH "" CACHE STRING "The toolcahin path.") +if(NOT TOOLCHAIN_PATH) + set(TOOLCHAIN_PATH ${CMAKE_SOURCE_DIR}/build-toolchain-qemu/riscv-clang) +endif() + +set(TOOLCHAIN_PREFIX "riscv64-unknown-linux-gnu-" CACHE STRING "The toolcahin prefix.") + +# toolchain setting +set(CMAKE_C_COMPILER "${TOOLCHAIN_PATH}/bin/${TOOLCHAIN_PREFIX}clang") +set(CMAKE_CXX_COMPILER "${TOOLCHAIN_PATH}/bin/${TOOLCHAIN_PREFIX}clang++") + +# CMake will just use the host-side tools for the following tools, so we setup them here. +set(CMAKE_C_COMPILER_AR "${TOOLCHAIN_PATH}/bin/llvm-ar") +set(CMAKE_CXX_COMPILER_AR "${TOOLCHAIN_PATH}/bin/llvm-ar") +set(CMAKE_C_COMPILER_RANLIB "${TOOLCHAIN_PATH}/bin/llvm-ranlib") +set(CMAKE_CXX_COMPILER_RANLIB "${TOOLCHAIN_PATH}/bin/llvm-ranlib") +set(CMAKE_OBJDUMP "${TOOLCHAIN_PATH}/bin/llvm-objdump") +set(CMAKE_OBJCOPY "${TOOLCHAIN_PATH}/bin/llvm-objcopy") + +# compile options +set(RISCV_COMPILER_FLAGS "" CACHE STRING "Compile flags") +# if user provides RISCV_COMPILER_FLAGS, appeding compile flags is avoided. +if(RISCV_COMPILER_FLAGS STREQUAL "") + message(STATUS "USE_RVV: ${USE_RVV}") + message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}") + if(USE_RVV) + list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv") + if(NOT USE_AUTO_VECTORIZER) + # Disable auto-vectorizer + add_compile_options(-fno-vectorize -fno-slp-vectorize) + endif() + else() + list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc") + endif() +endif() +message(STATUS "RISCV_COMPILER_FLAGS: ${RISCV_COMPILER_FLAGS}") + +set(CMAKE_C_FLAGS "${RISCV_COMPILER_FLAGS} ${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${RISCV_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}") + +set(RISCV_LINKER_FLAGS "-lstdc++ -lpthread -lm -ldl") +set(RISCV_LINKER_FLAGS_EXE) +set(CMAKE_SHARED_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}") +set(CMAKE_MODULE_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}") +set(CMAKE_EXE_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${RISCV_LINKER_FLAGS_EXE} ${CMAKE_EXE_LINKER_FLAGS}") diff --git a/riscv_script/run_qemu.sh b/riscv_script/run_qemu.sh new file mode 100755 index 00000000..080af3b1 --- /dev/null +++ b/riscv_script/run_qemu.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -x +set -e + +USE_RVV="${USE_RVV:-OFF}" +TOOLCHAIN_PATH="${TOOLCHAIN_PATH:-../../build-toolchain-qemu/riscv-clang}" +QEMU_PREFIX_PATH="${QEMU_PREFIX_PATH:-../../build-toolchain-qemu/riscv-qemu/}" + +if [ "${USE_RVV}" = "ON" ];then + QEMU_OPTION="-cpu rv64,zba=true,zbb=true,zbc=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0 -L ${TOOLCHAIN_PATH}/sysroot" +else + QEMU_OPTION="-cpu rv64,zba=true,zbb=true,zbc=true,zbs=true -L ${TOOLCHAIN_PATH}/sysroot" +fi + +$QEMU_PREFIX_PATH/bin/qemu-riscv64 $QEMU_OPTION $@ diff --git a/files/source/compare.cc b/source/compare.cc index d4713b60..50a736bd 100644 --- a/files/source/compare.cc +++ b/source/compare.cc @@ -45,7 +45,7 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) { } #endif - while (count >= (uint64_t)(kBlockSize)) { + while (count >= (uint64_t)kBlockSize) { seed = HashDjb2_SSE(src, kBlockSize, seed); src += kBlockSize; count -= kBlockSize; @@ -359,10 +359,10 @@ static double Ssim8x8_C(const uint8_t* src_a, (sum_a_sq + sum_b_sq + c1) * (count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2); - if (ssim_d == 0.0) { + if (ssim_d == 0) { return DBL_MAX; } - return ssim_n * 1.0 / ssim_d; + return (double)ssim_n / (double)ssim_d; } } diff --git a/files/source/compare_common.cc b/source/compare_common.cc index d1cab8d2..d1cab8d2 100644 --- a/files/source/compare_common.cc +++ b/source/compare_common.cc diff --git a/files/source/compare_gcc.cc b/source/compare_gcc.cc index b834b42a..33cbe25d 100644 --- a/files/source/compare_gcc.cc +++ b/source/compare_gcc.cc @@ -67,7 +67,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a, : : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10"); - return static_cast<uint32_t>(diff); + return (uint32_t)(diff); } #else uint32_t HammingDistance_SSE42(const uint8_t* src_a, diff --git a/files/source/compare_msa.cc b/source/compare_msa.cc index 0b807d37..0b807d37 100644 --- a/files/source/compare_msa.cc +++ b/source/compare_msa.cc diff --git a/files/source/compare_neon.cc b/source/compare_neon.cc index afdd6012..afdd6012 100644 --- a/files/source/compare_neon.cc +++ b/source/compare_neon.cc diff --git a/files/source/compare_neon64.cc b/source/compare_neon64.cc index 70fb9b91..70fb9b91 100644 --- a/files/source/compare_neon64.cc +++ b/source/compare_neon64.cc diff --git a/files/source/compare_win.cc b/source/compare_win.cc index 9bb27f1d..9bb27f1d 100644 --- a/files/source/compare_win.cc +++ b/source/compare_win.cc diff --git a/files/source/convert.cc b/source/convert.cc index 7178580f..6ac5bc43 100644 --- a/files/source/convert.cc +++ b/source/convert.cc @@ -24,6 +24,10 @@ namespace libyuv { extern "C" { #endif +// Subsample amount uses a shift. +// v is value +// a is amount to add to round up +// s is shift to subsample down #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) static __inline int Abs(int v) { return v >= 0 ? v : -v; @@ -50,18 +54,25 @@ static int I4xxToI420(const uint8_t* src_y, const int dst_y_height = Abs(src_y_height); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + int r; if (src_uv_width <= 0 || src_uv_height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, - dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, + dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } // Copy I420 with optional flipping. @@ -199,6 +210,99 @@ static int Planar16bitTo8bit(const uint16_t* src_y, return 0; } +static int I41xToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int depth) { + const int scale = 1 << (24 - depth); + + if (width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + { + const int uv_width = SUBSAMPLE(width, 1, 1); + const int uv_height = SUBSAMPLE(height, 1, 1); + + Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width, + height); + ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_u, + dst_stride_u, src_u, dst_u, scale, kFilterBilinear); + ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_v, + dst_stride_v, src_v, dst_v, scale, kFilterBilinear); + } + return 0; +} + +static int I21xToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int depth) { + const int scale = 1 << (24 - depth); + + if (width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + { + const int uv_width = SUBSAMPLE(width, 1, 1); + const int uv_height = SUBSAMPLE(height, 1, 1); + const int dy = FixedDiv(height, uv_height); + + Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width, + height); + ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u, + dst_stride_u, src_u, dst_u, 0, 32768, dy, + /*bpp=*/1, scale, kFilterBilinear); + ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v, + dst_stride_v, src_v, dst_v, 0, 32768, dy, + /*bpp=*/1, scale, kFilterBilinear); + } + return 0; +} + // Convert 10 bit YUV to 8 bit. LIBYUV_API int I010ToI420(const uint16_t* src_y, @@ -236,38 +340,9 @@ int I210ToI420(const uint16_t* src_y, int dst_stride_v, int width, int height) { - const int depth = 10; - const int scale = 1 << (24 - depth); - - if (width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (height - 1) * src_stride_u; - src_v = src_v + (height - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - - { - const int uv_width = SUBSAMPLE(width, 1, 1); - const int uv_height = SUBSAMPLE(height, 1, 1); - const int dy = FixedDiv(height, uv_height); - - Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width, - height); - ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u, - dst_stride_u, src_u, dst_u, 0, 32768, dy, - /*bpp=*/1, scale, kFilterBilinear); - ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v, - dst_stride_v, src_v, dst_v, 0, 32768, dy, - /*bpp=*/1, scale, kFilterBilinear); - } - return 0; + return I21xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, 10); } LIBYUV_API @@ -292,6 +367,26 @@ int I210ToI422(const uint16_t* src_y, } LIBYUV_API +int I410ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return I41xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, 10); +} + +LIBYUV_API int I410ToI444(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, @@ -355,6 +450,26 @@ int I212ToI422(const uint16_t* src_y, } LIBYUV_API +int I212ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return I21xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, 12); +} + +LIBYUV_API int I412ToI444(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, @@ -375,6 +490,26 @@ int I412ToI444(const uint16_t* src_y, 0, 12); } +LIBYUV_API +int I412ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return I41xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, 12); +} + // Any Ix10 To I010 format with mirroring. static int Ix10ToI010(const uint16_t* src_y, int src_stride_y, @@ -398,18 +533,25 @@ static int Ix10ToI010(const uint16_t* src_y, const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } LIBYUV_API @@ -649,6 +791,8 @@ int I422ToNV21(const uint8_t* src_y, // Allocate u and v buffers align_buffer_64(plane_u, halfwidth * halfheight * 2); uint8_t* plane_v = plane_u + halfwidth * halfheight; + if (!plane_u) + return 1; I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width, @@ -713,6 +857,112 @@ int MM21ToI420(const uint8_t* src_y, return 0; } +LIBYUV_API +int MM21ToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height) { + if (!src_y || !src_uv || !dst_yuy2 || width <= 0) { + return -1; + } + + DetileToYUY2(src_y, src_stride_y, src_uv, src_stride_uv, dst_yuy2, + dst_stride_yuy2, width, height, 32); + + return 0; +} + +// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format +// documentation. +// TODO(greenjustin): Add an MT2T to I420 conversion. +LIBYUV_API +int MT2TToP010(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + if (width <= 0 || !height || !src_uv || !dst_uv) { + return -1; + } + + { + int uv_width = (width + 1) & ~1; + int uv_height = (height + 1) / 2; + int y = 0; + const int tile_width = 16; + const int y_tile_height = 32; + const int uv_tile_height = 16; + int padded_width = (width + tile_width - 1) & ~(tile_width - 1); + int y_tile_row_size = padded_width * y_tile_height * 10 / 8; + int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8; + size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t); + void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) = + UnpackMT2T_C; + align_buffer_64(row_buf, row_buf_size); + if (!row_buf) + return 1; + +#if defined(HAS_UNPACKMT2T_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + UnpackMT2T = UnpackMT2T_NEON; + } +#endif + // Negative height means invert the image. + if (height < 0) { + height = -height; + uv_height = (height + 1) / 2; + if (dst_y) { + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv; + dst_stride_uv = -dst_stride_uv; + } + + // Unpack and detile Y in rows of tiles + if (src_y && dst_y) { + for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) { + UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y, + width, y_tile_height, y_tile_height); + src_y += src_stride_y * y_tile_height; + dst_y += dst_stride_y * y_tile_height; + } + if (height & (y_tile_height - 1)) { + UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y, + width, height & (y_tile_height - 1), y_tile_height); + } + } + + // Unpack and detile UV plane + for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) { + UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv, + uv_width, uv_tile_height, uv_tile_height); + src_uv += src_stride_uv * uv_tile_height; + dst_uv += dst_stride_uv * uv_tile_height; + } + if (uv_height & (uv_tile_height - 1)) { + UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size); + DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv, + uv_width, uv_height & (uv_tile_height - 1), + uv_tile_height); + } + free_aligned_buffer_64(row_buf); + } + return 0; +} + #ifdef I422TONV21_ROW_VERSION // Unittest fails for this version. // 422 chroma is 1/2 width, 1x height @@ -734,7 +984,7 @@ int I422ToNV21(const uint8_t* src_y, int y; void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; int halfwidth = (width + 1) >> 1; @@ -764,11 +1014,19 @@ int I422ToNV21(const uint8_t* src_y, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow = MergeUVRow_Any_NEON; @@ -793,6 +1051,11 @@ int I422ToNV21(const uint8_t* src_y, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow = MergeUVRow_RVV; + } +#endif #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -833,6 +1096,11 @@ int I422ToNV21(const uint8_t* src_y, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, halfwidth, height); @@ -842,6 +1110,8 @@ int I422ToNV21(const uint8_t* src_y, int awidth = halfwidth * 2; align_buffer_64(row_vu_0, awidth * 2); uint8_t* row_vu_1 = row_vu_0 + awidth; + if (!row_vu_0) + return 1; for (y = 0; y < height - 1; y += 2) { MergeUVRow(src_v, src_u, row_vu_0, halfwidth); @@ -1080,18 +1350,22 @@ int NV12ToNV24(const uint8_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), + Abs(height), kFilterBilinear); + return r; } LIBYUV_API @@ -1105,20 +1379,88 @@ int NV16ToNV24(const uint8_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, - dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, + dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + return r; +} + +// Any P[420]1[02] to I[420]1[02] format with mirroring. +static int PxxxToIxxx(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + int subsample_x, + int subsample_y, + int depth) { + const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x); + const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y); + if (width <= 0 || height == 0) { + return -1; + } + ConvertToLSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height, + depth); + SplitUVPlane_16(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, + dst_stride_v, uv_width, uv_height, depth); return 0; } LIBYUV_API +int P010ToI010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y, + dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, + width, height, 1, 1, 10); +} + +LIBYUV_API +int P012ToI012(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y, + dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, + width, height, 1, 1, 12); +} + +LIBYUV_API int P010ToP410(const uint16_t* src_y, int src_stride_y, const uint16_t* src_uv, @@ -1129,18 +1471,22 @@ int P010ToP410(const uint16_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), + Abs(height), kFilterBilinear); + return r; } LIBYUV_API @@ -1154,17 +1500,21 @@ int P210ToP410(const uint16_t* src_y, int dst_stride_uv, int width, int height) { + int r; if (width <= 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, - dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, + dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + return r; } // Convert YUY2 to I420. @@ -1231,6 +1581,16 @@ int YUY2ToI420(const uint8_t* src_yuy2, } } #endif +#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + YUY2ToYRow = YUY2ToYRow_Any_LSX; + YUY2ToUVRow = YUY2ToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + YUY2ToYRow = YUY2ToYRow_LSX; + YUY2ToUVRow = YUY2ToUVRow_LSX; + } + } +#endif #if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { YUY2ToYRow = YUY2ToYRow_Any_LASX; @@ -1322,6 +1682,26 @@ int UYVYToI420(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_UYVYTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + UYVYToYRow = UYVYToYRow_Any_LSX; + UYVYToUVRow = UYVYToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_LSX; + UYVYToUVRow = UYVYToUVRow_LSX; + } + } +#endif +#if defined(HAS_UYVYTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + UYVYToYRow = UYVYToYRow_Any_LSX; + UYVYToUVRow = UYVYToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_LSX; + UYVYToUVRow = UYVYToUVRow_LSX; + } + } +#endif #if defined(HAS_UYVYTOYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { UYVYToYRow = UYVYToYRow_Any_LASX; @@ -1574,6 +1954,176 @@ int ARGBToI420(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYRow = ARGBToYRow_Any_LASX; + ARGBToUVRow = ARGBToUVRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_LASX; + ARGBToUVRow = ARGBToUVRow_LASX; + } + } +#endif + + for (y = 0; y < height - 1; y += 2) { + ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); + ARGBToYRow(src_argb, dst_y, width); + ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); + src_argb += src_stride_argb * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); + ARGBToYRow(src_argb, dst_y, width); + } + return 0; +} + +#ifdef USE_EXTRACTALPHA +// Convert ARGB to I420 with Alpha +// The following version calls ARGBExtractAlpha on the full image. +LIBYUV_API +int ARGBToI420Alpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { + int r = ARGBToI420(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height); + if (r == 0) { + r = ARGBExtractAlpha(src_argb, src_stride_argb, dst_a, dst_stride_a, width, + height); + } + return r; +} +#else // USE_EXTRACTALPHA +// Convert ARGB to I420 with Alpha +LIBYUV_API +int ARGBToI420Alpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { + int y; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYRow_C; + void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, + int width) = ARGBExtractAlphaRow_C; + if (!src_argb || !dst_y || !dst_u || !dst_v || !dst_a || width <= 0 || + height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } +#if defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + ARGBToUVRow = ARGBToUVRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -1584,22 +2134,63 @@ int ARGBToI420(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 + : ARGBExtractAlphaRow_Any_SSE2; + } +#endif +#if defined(HAS_ARGBEXTRACTALPHAROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2 + : ARGBExtractAlphaRow_Any_AVX2; + } +#endif +#if defined(HAS_ARGBEXTRACTALPHAROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON + : ARGBExtractAlphaRow_Any_NEON; + } +#endif +#if defined(HAS_ARGBEXTRACTALPHAROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA + : ARGBExtractAlphaRow_Any_MSA; + } +#endif +#if defined(HAS_ARGBEXTRACTALPHAROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX + : ARGBExtractAlphaRow_Any_LSX; + } +#endif +#if defined(HAS_ARGBEXTRACTALPHAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV; + } +#endif for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); + ARGBExtractAlphaRow(src_argb, dst_a, width); + ARGBExtractAlphaRow(src_argb + src_stride_argb, dst_a + dst_stride_a, + width); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; + dst_a += dst_stride_a * 2; } if (height & 1) { ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); + ARGBExtractAlphaRow(src_argb, dst_a, width); } return 0; } +#endif // USE_EXTRACTALPHA // Convert BGRA to I420. LIBYUV_API @@ -1628,16 +2219,6 @@ int BGRAToI420(const uint8_t* src_bgra, src_bgra = src_bgra + (height - 1) * src_stride_bgra; src_stride_bgra = -src_stride_bgra; } -#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - BGRAToUVRow = BGRAToUVRow_Any_SSSE3; - BGRAToYRow = BGRAToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_SSSE3; - BGRAToYRow = BGRAToYRow_SSSE3; - } - } -#endif #if defined(HAS_BGRATOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { BGRAToYRow = BGRAToYRow_Any_NEON; @@ -1654,12 +2235,46 @@ int BGRAToI420(const uint8_t* src_bgra, } } #endif +#if defined(HAS_BGRATOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + BGRAToYRow = BGRAToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_SSSE3; + } + } +#endif +#if defined(HAS_BGRATOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + BGRAToUVRow = BGRAToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + BGRAToUVRow = BGRAToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_BGRATOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + BGRAToYRow = BGRAToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + BGRAToYRow = BGRAToYRow_AVX2; + } + } +#endif +#if defined(HAS_BGRATOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + BGRAToUVRow = BGRAToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + BGRAToUVRow = BGRAToUVRow_AVX2; + } + } +#endif #if defined(HAS_BGRATOYROW_MSA) && defined(HAS_BGRATOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { BGRAToYRow = BGRAToYRow_Any_MSA; BGRAToUVRow = BGRAToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { BGRAToYRow = BGRAToYRow_MSA; + } + if (IS_ALIGNED(width, 32)) { BGRAToUVRow = BGRAToUVRow_MSA; } } @@ -1674,6 +2289,19 @@ int BGRAToI420(const uint8_t* src_bgra, } } #endif +#if defined(HAS_BGRATOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + BGRAToYRow = BGRAToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + BGRAToYRow = BGRAToYRow_LASX; + } + } +#endif +#if defined(HAS_BGRATOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + BGRAToYRow = BGRAToYRow_RVV; + } +#endif for (y = 0; y < height - 1; y += 2) { BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); @@ -1786,6 +2414,19 @@ int ABGRToI420(const uint8_t* src_abgr, } } #endif +#if defined(HAS_ABGRTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYRow = ABGRToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYRow = ABGRToYRow_RVV; + } +#endif for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); @@ -1882,6 +2523,19 @@ int RGBAToI420(const uint8_t* src_rgba, } } #endif +#if defined(HAS_RGBATOYROW_LASX) + if (TestCpuFlag(kCpuHasNEON)) { + RGBAToYRow = RGBAToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGBAToYRow = RGBAToYRow_LASX; + } + } +#endif +#if defined(HAS_RGBATOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBAToYRow = RGBAToYRow_RVV; + } +#endif for (y = 0; y < height - 1; y += 2) { RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); @@ -1901,7 +2555,7 @@ int RGBAToI420(const uint8_t* src_rgba, // Enabled if 1 pass is available #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_LSX)) + defined(HAS_RGB24TOYROW_LSX) || defined(HAS_RGB24TOYROW_RVV)) #define HAS_RGB24TOYROW #endif @@ -1986,6 +2640,11 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } #endif +#if defined(HAS_RGB24TOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToYRow = RGB24ToYRow_RVV; + } +#endif // Other platforms do intermediate conversion from RGB24 to ARGB. #else // HAS_RGB24TOYROW @@ -2035,8 +2694,10 @@ int RGB24ToI420(const uint8_t* src_rgb24, { #if !defined(HAS_RGB24TOYROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -2046,10 +2707,10 @@ int RGB24ToI420(const uint8_t* src_rgb24, RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); #else RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_rgb24 += src_stride_rgb24 * 2; dst_y += dst_stride_y * 2; @@ -2075,7 +2736,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, #undef HAS_RGB24TOYROW // Enabled if 1 pass is available -#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) +#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ + defined(HAS_RGB24TOYJROW_RVV) #define HAS_RGB24TOYJROW #endif @@ -2140,6 +2802,27 @@ int RGB24ToJ420(const uint8_t* src_rgb24, } } #endif +#if defined(HAS_RGB24TOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToYJRow = RGB24ToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToYJRow = RGB24ToYJRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToYJRow = RGB24ToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToYJRow = RGB24ToYJRow_LASX; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToYJRow = RGB24ToYJRow_RVV; + } +#endif // Other platforms do intermediate conversion from RGB24 to ARGB. #else // HAS_RGB24TOYJROW @@ -2189,8 +2872,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24, { #if !defined(HAS_RGB24TOYJROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -2200,10 +2885,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24, RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); #else RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); - ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); + ARGBToUVJRow(row, row_size, dst_u, dst_v, width); ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); #endif src_rgb24 += src_stride_rgb24 * 2; dst_y += dst_stride_y * 2; @@ -2230,7 +2915,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24, // Enabled if 1 pass is available #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_LSX)) + defined(HAS_RAWTOYROW_LSX) || defined(HAS_RAWTOYROW_RVV)) #define HAS_RAWTOYROW #endif @@ -2314,6 +2999,11 @@ int RAWToI420(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToYRow = RAWToYRow_RVV; + } +#endif // Other platforms do intermediate conversion from RAW to ARGB. #else // HAS_RAWTOYROW @@ -2363,8 +3053,10 @@ int RAWToI420(const uint8_t* src_raw, { #if !defined(HAS_RAWTOYROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -2374,10 +3066,10 @@ int RAWToI420(const uint8_t* src_raw, RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; @@ -2403,7 +3095,8 @@ int RAWToI420(const uint8_t* src_raw, #undef HAS_RAWTOYROW // Enabled if 1 pass is available -#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) +#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ + defined(HAS_RAWTOYJROW_RVV) #define HAS_RAWTOYJROW #endif @@ -2468,6 +3161,27 @@ int RAWToJ420(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToYJRow = RAWToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToYJRow = RAWToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToYJRow = RAWToYJRow_LASX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToYJRow = RAWToYJRow_RVV; + } +#endif // Other platforms do intermediate conversion from RAW to ARGB. #else // HAS_RAWTOYJROW @@ -2517,8 +3231,10 @@ int RAWToJ420(const uint8_t* src_raw, { #if !defined(HAS_RAWTOYJROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -2528,10 +3244,10 @@ int RAWToJ420(const uint8_t* src_raw, RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVJRow(row, row_size, dst_u, dst_v, width); ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); #endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; @@ -2695,8 +3411,10 @@ int RGB565ToI420(const uint8_t* src_rgb565, #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ @@ -2706,10 +3424,10 @@ int RGB565ToI420(const uint8_t* src_rgb565, RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); #else RGB565ToARGBRow(src_rgb565, row, width); - RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_rgb565 += src_stride_rgb565 * 2; dst_y += dst_stride_y * 2; @@ -2875,8 +3593,10 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -2888,11 +3608,11 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, width); #else ARGB1555ToARGBRow(src_argb1555, row, width); - ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize, + ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + row_size, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_argb1555 += src_stride_argb1555 * 2; dst_y += dst_stride_y * 2; @@ -3055,6 +3775,24 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -3070,8 +3808,10 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, { #if !(defined(HAS_ARGB4444TOYROW_NEON)) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; #endif for (y = 0; y < height - 1; y += 2) { @@ -3082,11 +3822,11 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, width); #else ARGB4444ToARGBRow(src_argb4444, row, width); - ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize, + ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + row_size, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); #endif src_argb4444 += src_stride_argb4444 * 2; dst_y += dst_stride_y * 2; @@ -3167,6 +3907,27 @@ int RGB24ToJ400(const uint8_t* src_rgb24, } } #endif +#if defined(HAS_RGB24TOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToYJRow = RGB24ToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToYJRow = RGB24ToYJRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToYJRow = RGB24ToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToYJRow = RGB24ToYJRow_LASX; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToYJRow = RGB24ToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { RGB24ToYJRow(src_rgb24, dst_yj, width); @@ -3235,6 +3996,27 @@ int RAWToJ400(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToYJRow = RAWToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToYJRow = RAWToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToYJRow = RAWToYJRow_LASX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToYJRow = RAWToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { RAWToYJRow(src_raw, dst_yj, width); diff --git a/files/source/convert_argb.cc b/source/convert_argb.cc index 71ef8c10..871fea59 100644 --- a/files/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -7,8 +7,12 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ + #include "libyuv/convert_argb.h" +#include <assert.h> + +#include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" #ifdef HAVE_JPEG #include "libyuv/mjpeg_decoder.h" @@ -65,6 +69,7 @@ int I420ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -115,6 +120,14 @@ int I420ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToARGBRow = I422ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToARGBRow = I422ToARGBRow_LSX; + } + } +#endif #if defined(HAS_I422TOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToARGBRow = I422ToARGBRow_Any_LASX; @@ -123,6 +136,11 @@ int I420ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToARGBRow = I422ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); @@ -298,6 +316,7 @@ int I422ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -355,6 +374,14 @@ int I422ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToARGBRow = I422ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToARGBRow = I422ToARGBRow_LSX; + } + } +#endif #if defined(HAS_I422TOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToARGBRow = I422ToARGBRow_Any_LASX; @@ -363,6 +390,11 @@ int I422ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToARGBRow = I422ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); @@ -536,6 +568,7 @@ int I444ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -592,6 +625,11 @@ int I444ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I444TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444ToARGBRow = I444ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); @@ -747,6 +785,133 @@ int U444ToABGR(const uint8_t* src_y, width, height); } +// Convert I444 to RGB24 with matrix. +LIBYUV_API +int I444ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } + // Coalesce rows. + if (src_stride_y == width && src_stride_u == width && src_stride_v == width && + dst_stride_rgb24 == width * 3) { + width *= height; + height = 1; + src_stride_y = src_stride_u = src_stride_v = dst_stride_rgb24 = 0; + } +#if defined(HAS_I444TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444ToRGB24Row = I444ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_MSA; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I444ToRGB24Row = I444ToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_LSX; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444ToRGB24Row = I444ToRGB24Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + I444ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + return 0; +} + +// Convert I444 to RGB24. +LIBYUV_API +int I444ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return I444ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + &kYuvI601Constants, width, height); +} + +// Convert I444 to RAW. +LIBYUV_API +int I444ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { + return I444ToRGB24Matrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_raw, dst_stride_raw, + &kYvuI601Constants, // Use Yvu matrix + width, height); +} + // Convert 10 bit YUV to ARGB with matrix. // TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to // multiply 10 bit yuv into high bits to allow any number of bits. @@ -767,6 +932,7 @@ int I010ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -926,6 +1092,7 @@ int I012ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I212ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -983,6 +1150,7 @@ int I210ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1137,6 +1305,7 @@ int I410ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1190,6 +1359,7 @@ int I010ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1353,6 +1523,7 @@ int I012ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I212ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1408,6 +1579,7 @@ int I210ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1568,6 +1740,7 @@ int I410ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1617,6 +1790,7 @@ int P010ToARGBMatrix(const uint16_t* src_y, void (*P210ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1667,6 +1841,7 @@ int P210ToARGBMatrix(const uint16_t* src_y, void (*P210ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1715,6 +1890,7 @@ int P010ToAR30Matrix(const uint16_t* src_y, void (*P210ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1765,6 +1941,7 @@ int P210ToAR30Matrix(const uint16_t* src_y, void (*P210ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1823,6 +2000,7 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, int width) = I422AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1865,6 +2043,14 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422ALPHATOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422AlphaToARGBRow = I422AlphaToARGBRow_LSX; + } + } +#endif #if defined(HAS_I422ALPHATOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LASX; @@ -1873,6 +2059,11 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422ALPHATOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422AlphaToARGBRow = I422AlphaToARGBRow_RVV; + } +#endif #if defined(HAS_ARGBATTENUATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; @@ -1905,6 +2096,11 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, @@ -1947,6 +2143,7 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, int width) = I422AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1989,6 +2186,14 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422ALPHATOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422AlphaToARGBRow = I422AlphaToARGBRow_LSX; + } + } +#endif #if defined(HAS_I422ALPHATOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LASX; @@ -1997,6 +2202,11 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422ALPHATOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422AlphaToARGBRow = I422AlphaToARGBRow_RVV; + } +#endif #if defined(HAS_ARGBATTENUATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; @@ -2029,6 +2239,11 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, @@ -2069,6 +2284,7 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y, int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2111,6 +2327,11 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I444ALPHATOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_RVV; + } +#endif #if defined(HAS_ARGBATTENUATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; @@ -2143,6 +2364,11 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I444AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, @@ -2312,6 +2538,7 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y, int width) = I210AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2370,6 +2597,11 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, @@ -2412,6 +2644,7 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y, int width) = I210AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2470,6 +2703,11 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, @@ -2510,6 +2748,7 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y, int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2568,6 +2807,11 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I410AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, @@ -2597,6 +2841,7 @@ int I400ToARGBMatrix(const uint8_t* src_y, void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I400ToARGBRow_C; + assert(yuvconstants); if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -2652,6 +2897,11 @@ int I400ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I400TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I400ToARGBRow = I400ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { I400ToARGBRow(src_y, dst_argb, yuvconstants, width); @@ -2739,6 +2989,12 @@ int J400ToARGB(const uint8_t* src_y, } } #endif +#if defined(HAS_J400TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + J400ToARGBRow = J400ToARGBRow_RVV; + } +#endif + for (y = 0; y < height; ++y) { J400ToARGBRow(src_y, dst_argb, width); src_y += src_stride_y; @@ -2747,6 +3003,7 @@ int J400ToARGB(const uint8_t* src_y, return 0; } +#ifndef __riscv // Shuffle table for converting BGRA to ARGB. static const uvec8 kShuffleMaskBGRAToARGB = { 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u}; @@ -2834,6 +3091,195 @@ int AR64ToAB64(const uint16_t* src_ar64, return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64, (const uint8_t*)&kShuffleMaskAR64ToAB64, width, height); } +#else +// Convert BGRA to ARGB (same as ARGBToBGRA). +LIBYUV_API +int BGRAToARGB(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBToBGRA(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, width, + height); +} + +// Convert ARGB to BGRA. +LIBYUV_API +int ARGBToBGRA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height) { + int y; + void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) = + ARGBToBGRARow_C; + if (!src_argb || !dst_bgra || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_bgra = 0; + } + +#if defined(HAS_ARGBTOBGRAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToBGRARow = ARGBToBGRARow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToBGRARow(src_argb, dst_bgra, width); + src_argb += src_stride_argb; + dst_bgra += dst_stride_bgra; + } + return 0; +} + +// Convert ARGB to ABGR. +LIBYUV_API +int ARGBToABGR(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + int y; + void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) = + ARGBToABGRRow_C; + if (!src_argb || !dst_abgr || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_abgr = 0; + } + +#if defined(HAS_ARGBTOABGRROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToABGRRow = ARGBToABGRRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToABGRRow(src_argb, dst_abgr, width); + src_argb += src_stride_argb; + dst_abgr += dst_stride_abgr; + } + return 0; +} + +// Convert ABGR to ARGB (same as ARGBToABGR). +LIBYUV_API +int ABGRToARGB(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBToABGR(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, width, + height); +} + +// Convert RGBA to ARGB. +LIBYUV_API +int RGBAToARGB(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + int y; + void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) = + RGBAToARGBRow_C; + if (!src_rgba || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; + src_stride_rgba = -src_stride_rgba; + } + // Coalesce rows. + if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_rgba = dst_stride_argb = 0; + } + +#if defined(HAS_RGBATOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBAToARGBRow = RGBAToARGBRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + RGBAToARGBRow(src_rgba, dst_argb, width); + src_rgba += src_stride_rgba; + dst_argb += dst_stride_argb; + } + return 0; +} + +// Convert AR64 To AB64. +LIBYUV_API +int AR64ToAB64(const uint16_t* src_ar64, + int src_stride_ar64, + uint16_t* dst_ab64, + int dst_stride_ab64, + int width, + int height) { + int y; + void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64, + int width) = AR64ToAB64Row_C; + if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar64 = src_ar64 + (height - 1) * src_stride_ar64; + src_stride_ar64 = -src_stride_ar64; + } + // Coalesce rows. + if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) { + width *= height; + height = 1; + src_stride_ar64 = dst_stride_ab64 = 0; + } + +#if defined(HAS_AR64TOAB64ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + AR64ToAB64Row = AR64ToAB64Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + AR64ToAB64Row(src_ar64, dst_ab64, width); + src_ar64 += src_stride_ar64; + dst_ab64 += dst_stride_ab64; + } + return 0; +} +#endif // Convert RGB24 to ARGB. LIBYUV_API @@ -2901,6 +3347,11 @@ int RGB24ToARGB(const uint8_t* src_rgb24, } } #endif +#if defined(HAS_RGB24TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToARGBRow = RGB24ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { RGB24ToARGBRow(src_rgb24, dst_argb, width); @@ -2976,6 +3427,11 @@ int RAWToARGB(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToARGBRow = RAWToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { RAWToARGBRow(src_raw, dst_argb, width); @@ -3027,6 +3483,11 @@ int RAWToRGBA(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToRGBARow = RAWToRGBARow_RVV; + } +#endif for (y = 0; y < height; ++y) { RAWToRGBARow(src_raw, dst_rgba, width); @@ -3431,6 +3892,11 @@ int AR64ToARGB(const uint16_t* src_ar64, } } #endif +#if defined(HAS_AR64TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + AR64ToARGBRow = AR64ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { AR64ToARGBRow(src_ar64, dst_argb, width); @@ -3490,6 +3956,11 @@ int AB64ToARGB(const uint16_t* src_ab64, } } #endif +#if defined(HAS_AB64TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + AB64ToARGBRow = AB64ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { AB64ToARGBRow(src_ab64, dst_argb, width); @@ -3514,6 +3985,7 @@ int NV12ToARGBMatrix(const uint8_t* src_y, void (*NV12ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3571,6 +4043,11 @@ int NV12ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_NV12TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + NV12ToARGBRow = NV12ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width); @@ -3598,6 +4075,7 @@ int NV21ToARGBMatrix(const uint8_t* src_y, void (*NV21ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3655,6 +4133,11 @@ int NV21ToARGBMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_NV21TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + NV21ToARGBRow = NV21ToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width); @@ -3741,6 +4224,7 @@ int NV12ToRGB24Matrix(const uint8_t* src_y, void (*NV12ToRGB24Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -3774,6 +4258,11 @@ int NV12ToRGB24Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_NV12TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + NV12ToRGB24Row = NV12ToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width); @@ -3801,6 +4290,7 @@ int NV21ToRGB24Matrix(const uint8_t* src_y, void (*NV21ToRGB24Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -3834,6 +4324,11 @@ int NV21ToRGB24Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_NV21TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + NV21ToRGB24Row = NV21ToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width); @@ -4143,6 +4638,7 @@ int Android420ToARGBMatrix(const uint8_t* src_y, const ptrdiff_t vu_off = src_v - src_u; int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -4174,6 +4670,8 @@ int Android420ToARGBMatrix(const uint8_t* src_y, // General case fallback creates NV12 align_buffer_64(plane_uv, halfwidth * 2 * halfheight); + if (!plane_uv) + return 1; dst_uv = plane_uv; for (y = 0; y < halfheight; ++y) { WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth); @@ -4243,6 +4741,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } @@ -4284,6 +4783,14 @@ int I422ToRGBAMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGBAROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToRGBARow = I422ToRGBARow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToRGBARow = I422ToRGBARow_LSX; + } + } +#endif #if defined(HAS_I422TORGBAROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToRGBARow = I422ToRGBARow_Any_LASX; @@ -4292,6 +4799,11 @@ int I422ToRGBAMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToRGBARow = I422ToRGBARow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); @@ -4354,6 +4866,7 @@ int NV12ToRGB565Matrix(const uint8_t* src_y, void (*NV12ToRGB565Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -4456,6 +4969,7 @@ int I420ToRGBAMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } @@ -4497,6 +5011,14 @@ int I420ToRGBAMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGBAROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToRGBARow = I422ToRGBARow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToRGBARow = I422ToRGBARow_LSX; + } + } +#endif #if defined(HAS_I422TORGBAROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToRGBARow = I422ToRGBARow_Any_LASX; @@ -4505,6 +5027,11 @@ int I420ToRGBAMatrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToRGBARow = I422ToRGBARow_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); @@ -4572,6 +5099,7 @@ int I420ToRGB24Matrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -4613,6 +5141,14 @@ int I420ToRGB24Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToRGB24Row = I422ToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_LSX; + } + } +#endif #if defined(HAS_I422TORGB24ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToRGB24Row = I422ToRGB24Row_Any_LASX; @@ -4621,6 +5157,11 @@ int I420ToRGB24Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToRGB24Row = I422ToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); @@ -4742,6 +5283,134 @@ int H420ToRAW(const uint8_t* src_y, width, height); } +// Convert I422 to RGB24 with matrix. +LIBYUV_API +int I422ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToRGB24Row_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_I422TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToRGB24Row = I422ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToRGB24Row = I422ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I422ToRGB24Row = I422ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I422ToRGB24Row = I422ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToRGB24Row = I422ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_MSA; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToRGB24Row = I422ToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_LSX; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I422ToRGB24Row = I422ToRGB24Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I422ToRGB24Row = I422ToRGB24Row_LASX; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToRGB24Row = I422ToRGB24Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + return 0; +} + +// Convert I422 to RGB24. +LIBYUV_API +int I422ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + &kYuvI601Constants, width, height); +} + +// Convert I422 to RAW. +LIBYUV_API +int I422ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { + return I422ToRGB24Matrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_raw, dst_stride_raw, + &kYvuI601Constants, // Use Yvu matrix + width, height); +} + // Convert I420 to ARGB1555. LIBYUV_API int I420ToARGB1555(const uint8_t* src_y, @@ -4801,6 +5470,14 @@ int I420ToARGB1555(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGB1555ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToARGB1555Row = I422ToARGB1555Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToARGB1555Row = I422ToARGB1555Row_LSX; + } + } +#endif #if defined(HAS_I422TOARGB1555ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToARGB1555Row = I422ToARGB1555Row_Any_LASX; @@ -4882,6 +5559,14 @@ int I420ToARGB4444(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGB4444ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToARGB4444Row = I422ToARGB4444Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToARGB4444Row = I422ToARGB4444Row_LSX; + } + } +#endif #if defined(HAS_I422TOARGB4444ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToARGB4444Row = I422ToARGB4444Row_Any_LASX; @@ -4922,6 +5607,7 @@ int I420ToRGB565Matrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -4963,6 +5649,14 @@ int I420ToRGB565Matrix(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGB565ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToRGB565Row = I422ToRGB565Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToRGB565Row = I422ToRGB565Row_LSX; + } + } +#endif #if defined(HAS_I422TORGB565ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToRGB565Row = I422ToRGB565Row_Any_LASX; @@ -5035,23 +5729,25 @@ int H420ToRGB565(const uint8_t* src_y, &kYuvH709Constants, width, height); } -// Convert I422 to RGB565. +// Convert I422 to RGB565 with specified color matrix. LIBYUV_API -int I422ToRGB565(const uint8_t* src_y, - int src_stride_y, - const uint8_t* src_u, - int src_stride_u, - const uint8_t* src_v, - int src_stride_v, - uint8_t* dst_rgb565, - int dst_stride_rgb565, - int width, - int height) { +int I422ToRGB565Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const struct YuvConstants* yuvconstants, + int width, + int height) { int y; void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -5093,6 +5789,14 @@ int I422ToRGB565(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TORGB565ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToRGB565Row = I422ToRGB565Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToRGB565Row = I422ToRGB565Row_LSX; + } + } +#endif #if defined(HAS_I422TORGB565ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToRGB565Row = I422ToRGB565Row_Any_LASX; @@ -5103,7 +5807,7 @@ int I422ToRGB565(const uint8_t* src_y, #endif for (y = 0; y < height; ++y) { - I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width); + I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, yuvconstants, width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; src_u += src_stride_u; @@ -5112,6 +5816,23 @@ int I422ToRGB565(const uint8_t* src_y, return 0; } +// Convert I422 to RGB565. +LIBYUV_API +int I422ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height) { + return I422ToRGB565Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb565, dst_stride_rgb565, + &kYuvI601Constants, width, height); +} + // Ordered 8x8 dither for 888 to 565. Values from 0 to 7. static const uint8_t kDither565_4x4[16] = { 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, @@ -5136,7 +5857,7 @@ int I420ToRGB565Dither(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, int width) = + uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; @@ -5191,6 +5912,14 @@ int I420ToRGB565Dither(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToARGBRow = I422ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToARGBRow = I422ToARGBRow_LSX; + } + } +#endif #if defined(HAS_I422TOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToARGBRow = I422ToARGBRow_Any_LASX; @@ -5199,6 +5928,11 @@ int I420ToRGB565Dither(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToARGBRow = I422ToARGBRow_RVV; + } +#endif #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2; @@ -5231,6 +5965,14 @@ int I420ToRGB565Dither(const uint8_t* src_y, } } #endif +#if defined(HAS_ARGBTORGB565DITHERROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LSX; + } + } +#endif #if defined(HAS_ARGBTORGB565DITHERROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX; @@ -5242,6 +5984,8 @@ int I420ToRGB565Dither(const uint8_t* src_y, { // Allocate a row of argb. align_buffer_64(row_argb, width * 4); + if (!row_argb) + return 1; for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); ARGBToRGB565DitherRow(row_argb, dst_rgb565, @@ -5278,6 +6022,7 @@ int I420ToAR30Matrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5401,9 +6146,12 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; - void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_Any_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -5453,48 +6201,65 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, } } #endif +#if defined(HAS_I444TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444ToARGBRow = I444ToARGBRow_RVV; + } +#endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; + } +#endif +#if defined(HAS_SCALEROWUP2_BILINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; } #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4); uint8_t* temp_u_1 = row; - uint8_t* temp_u_2 = row + kRowSize; - uint8_t* temp_v_1 = row + kRowSize * 2; - uint8_t* temp_v_2 = row + kRowSize * 3; - - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + uint8_t* temp_u_2 = row + row_size; + uint8_t* temp_v_1 = row + row_size * 2; + uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; + + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5506,8 +6271,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); } @@ -5531,8 +6296,9 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; - void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = - ScaleRowUp2_Linear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -5582,36 +6348,48 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, } } #endif +#if defined(HAS_I444TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444ToARGBRow = I444ToARGBRow_RVV; + } +#endif #if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; } #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; - uint8_t* temp_v = row + kRowSize; + uint8_t* temp_v = row + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I444ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5623,6 +6401,156 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, return 0; } +static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_I444TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444ToRGB24Row = I444ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_MSA; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I444ToRGB24Row = I444ToRGB24Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_LASX; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444ToRGB24Row = I444ToRGB24Row_RVV; + } +#endif + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; + } +#endif + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; + } +#endif + +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; + } +#endif + +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; + } +#endif +#if defined(HAS_SCALEROWUP2_BILINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; + } +#endif + + // alloc 4 lines temp + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4); + uint8_t* temp_u_1 = row; + uint8_t* temp_u_2 = row + row_size; + uint8_t* temp_v_1 = row + row_size * 2; + uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; + + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + I444ToRGB24Row(src_y, temp_u_2, temp_v_2, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + if (!(height & 1)) { + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); + } + + free_aligned_buffer_64(row); + return 0; +} + static int I010ToAR30MatrixBilinear(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, @@ -5639,9 +6567,12 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5668,41 +6599,46 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4 * sizeof(uint16_t)); uint16_t* temp_u_1 = (uint16_t*)(row); - uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; - uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; - uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; - - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; + uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; + + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -5714,8 +6650,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); } @@ -5740,8 +6676,9 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5770,29 +6707,31 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); - uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear_12(src_u, temp_u, width); + ScaleRowUp2_Linear_12(src_v, temp_v, width); I410ToAR30Row(src_y, temp_u, temp_v, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -5819,9 +6758,12 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -5848,41 +6790,46 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4 * sizeof(uint16_t)); uint16_t* temp_u_1 = (uint16_t*)(row); - uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; - uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; - uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; - - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; + uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; + + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5894,8 +6841,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); } @@ -5919,8 +6866,9 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -5949,29 +6897,31 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); - uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear_12(src_u, temp_u, width); + ScaleRowUp2_Linear_12(src_v, temp_v, width); I410ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6006,9 +6956,12 @@ static int I420AlphaToARGBMatrixBilinear( int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_Any_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6059,6 +7012,11 @@ static int I420AlphaToARGBMatrixBilinear( } } #endif +#if defined(HAS_I444ALPHATOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_RVV; + } +#endif #if defined(HAS_ARGBATTENUATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; @@ -6091,40 +7049,58 @@ static int I420AlphaToARGBMatrixBilinear( } } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; + } +#endif +#if defined(HAS_SCALEROWUP2_BILINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; } #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4); uint8_t* temp_u_1 = row; - uint8_t* temp_u_2 = row + kRowSize; - uint8_t* temp_v_1 = row + kRowSize * 2; - uint8_t* temp_v_2 = row + kRowSize * 3; - - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + uint8_t* temp_u_2 = row + row_size; + uint8_t* temp_v_1 = row + row_size * 2; + uint8_t* temp_v_2 = row + row_size * 3; + if (!row) + return 1; + + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6135,8 +7111,8 @@ static int I420AlphaToARGBMatrixBilinear( src_a += src_stride_a; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6158,8 +7134,8 @@ static int I420AlphaToARGBMatrixBilinear( } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6193,8 +7169,9 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = - ScaleRowUp2_Linear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6245,6 +7222,11 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, } } #endif +#if defined(HAS_I444ALPHATOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_RVV; + } +#endif #if defined(HAS_ARGBATTENUATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; @@ -6277,36 +7259,49 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif + #if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; } #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2); uint8_t* temp_u = row; - uint8_t* temp_v = row + kRowSize; + uint8_t* temp_v = row + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I444AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6346,9 +7341,12 @@ static int I010AlphaToARGBMatrixBilinear( int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6407,35 +7405,45 @@ static int I010AlphaToARGBMatrixBilinear( } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif // alloc 4 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 4 * sizeof(uint16_t)); uint16_t* temp_u_1 = (uint16_t*)(row); - uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; - uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; - uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; - - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + uint16_t* temp_u_2 = (uint16_t*)(row) + row_size; + uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; + if (!row) + return 1; + + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6446,8 +7454,8 @@ static int I010AlphaToARGBMatrixBilinear( src_a += src_stride_a; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6469,8 +7477,8 @@ static int I010AlphaToARGBMatrixBilinear( } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); - Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6504,8 +7512,9 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6564,32 +7573,39 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_NEON; } #endif // alloc 2 lines temp - const int kRowSize = (width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_u = (uint16_t*)(row); - uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v = (uint16_t*)(row) + row_size; + if (!row) + return 1; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I410AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6618,9 +7634,10 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, void (*P410ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleUVRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_16)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6649,35 +7666,37 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON; } #endif // alloc 2 lines temp - const int kRowSize = (2 * width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (2 * width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_uv_1 = (uint16_t*)(row); - uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize; + uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; + if (!row) + return 1; - Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width); + Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6688,7 +7707,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); } @@ -6709,8 +7728,9 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, void (*P410ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C; - void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = - ScaleUVRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv, + int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6739,28 +7759,30 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON; } #endif - const int kRowSize = (2 * width + 31) & ~31; - align_buffer_64(row, kRowSize * sizeof(uint16_t)); + const int row_size = (2 * width + 31) & ~31; + align_buffer_64(row, row_size * sizeof(uint16_t)); uint16_t* temp_uv = (uint16_t*)(row); + if (!row) + return 1; for (y = 0; y < height; ++y) { - ScaleRowUp(src_uv, temp_uv, width); + ScaleRowUp2_Linear(src_uv, temp_uv, width); P410ToARGBRow(src_y, temp_uv, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6784,9 +7806,10 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, void (*P410ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleUVRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_16)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -6815,35 +7838,37 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON; } #endif // alloc 2 lines temp - const int kRowSize = (2 * width + 31) & ~31; - align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + const int row_size = (2 * width + 31) & ~31; + align_buffer_64(row, row_size * 2 * sizeof(uint16_t)); uint16_t* temp_uv_1 = (uint16_t*)(row); - uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize; + uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; + if (!row) + return 1; - Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width); + Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -6854,7 +7879,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); } @@ -6875,8 +7900,9 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, void (*P410ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C; - void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = - ScaleUVRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv, + int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -6905,28 +7931,30 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON; } #endif - const int kRowSize = (2 * width + 31) & ~31; - align_buffer_64(row, kRowSize * sizeof(uint16_t)); + const int row_size = (2 * width + 31) & ~31; + align_buffer_64(row, row_size * sizeof(uint16_t)); uint16_t* temp_uv = (uint16_t*)(row); + if (!row) + return 1; for (y = 0; y < height; ++y) { - ScaleRowUp(src_uv, temp_uv, width); + ScaleRowUp2_Linear(src_uv, temp_uv, width); P410ToAR30Row(src_y, temp_uv, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -6937,6 +7965,140 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, return 0; } +static int I422ToRGB24MatrixLinear(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_I444TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I444ToRGB24Row = I444ToRGB24Row_RVV; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV; + } +#endif + + // alloc 2 lines temp + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2); + uint8_t* temp_u = row; + uint8_t* temp_v = row + row_size; + if (!row) + return 1; + + for (y = 0; y < height; ++y) { + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); + I444ToRGB24Row(src_y, temp_u, temp_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + free_aligned_buffer_64(row); + return 0; +} + +LIBYUV_API +int I422ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter) { + switch (filter) { + case kFilterNone: + return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return I422ToRGB24MatrixLinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); + } + + return -1; +} + LIBYUV_API int I420ToARGBMatrixFilter(const uint8_t* src_y, int src_stride_y, @@ -6998,6 +8160,35 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y, } LIBYUV_API +int I420ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter) { + switch (filter) { + case kFilterNone: + return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 + case kFilterBilinear: + case kFilterBox: + return I420ToRGB24MatrixBilinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); + } + + return -1; +} + +LIBYUV_API int I010ToAR30MatrixFilter(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, @@ -7015,13 +8206,12 @@ int I010ToAR30MatrixFilter(const uint16_t* src_y, return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010ToAR30MatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7074,13 +8264,12 @@ int I010ToARGBMatrixFilter(const uint16_t* src_y, return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010ToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7137,14 +8326,13 @@ int I420AlphaToARGBMatrixFilter(const uint8_t* src_y, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I420AlphaToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); - case kFilterLinear: - return -1; } return -1; @@ -7206,14 +8394,13 @@ int I010AlphaToARGBMatrixFilter(const uint16_t* src_y, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010AlphaToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); - case kFilterLinear: - return -1; } return -1; @@ -7253,6 +8440,8 @@ int I210AlphaToARGBMatrixFilter(const uint16_t* src_y, return -1; } +// TODO(fb): Verify this function works correctly. P010 is like NV12 but 10 bit +// UV is biplanar. LIBYUV_API int P010ToARGBMatrixFilter(const uint16_t* src_y, int src_stride_y, @@ -7269,13 +8458,12 @@ int P010ToARGBMatrixFilter(const uint16_t* src_y, return P010ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, dst_stride_argb, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return P010ToARGBMatrixBilinear(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, dst_stride_argb, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7324,13 +8512,12 @@ int P010ToAR30MatrixFilter(const uint16_t* src_y, return P010ToAR30Matrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_ar30, dst_stride_ar30, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return P010ToAR30MatrixBilinear(src_y, src_stride_y, src_uv, src_stride_uv, dst_ar30, dst_stride_ar30, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; diff --git a/files/source/convert_from.cc b/source/convert_from.cc index 8bd07e4c..e69da9e9 100644 --- a/files/source/convert_from.cc +++ b/source/convert_from.cc @@ -52,19 +52,26 @@ static int I420ToI4xx(const uint8_t* src_y, const int dst_y_height = Abs(src_y_height); const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1); const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1); + int r; if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 || dst_uv_height <= 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, - dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, + dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, - dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, - dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return r; } // Convert 8 bit YUV to 10 bit. @@ -223,21 +230,28 @@ int I010ToI410(const uint16_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); - } - ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width), - Abs(height), kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), - SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width), - Abs(height), kFilterBilinear); - return 0; + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + } + r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width), + Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width), + Abs(height), kFilterBilinear); + return r; } // 422 chroma to 444 chroma, 10/12 bit version @@ -256,19 +270,26 @@ int I210ToI410(const uint16_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, - dst_stride_u, Abs(width), Abs(height), kFilterBilinear); - ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, - dst_stride_v, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, + dst_stride_u, Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, + dst_stride_v, Abs(width), Abs(height), kFilterBilinear); + return r; } // 422 chroma is 1/2 width, 1x height @@ -288,19 +309,26 @@ int I422ToI444(const uint8_t* src_y, int dst_stride_v, int width, int height) { + int r; if (width == 0 || height == 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, - Abs(width), Abs(height), kFilterBilinear); + r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } } - ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, - dst_stride_u, Abs(width), Abs(height), kFilterBilinear); - ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, - dst_stride_v, Abs(width), Abs(height), kFilterBilinear); - return 0; + r = ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u, + dst_stride_u, Abs(width), Abs(height), kFilterBilinear); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v, + dst_stride_v, Abs(width), Abs(height), kFilterBilinear); + return r; } // Copy to I400. Source can be I420,422,444,400,NV12,NV21 @@ -446,6 +474,14 @@ int I420ToYUY2(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOYUY2ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToYUY2Row = I422ToYUY2Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_LSX; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToYUY2Row = I422ToYUY2Row_Any_LASX; @@ -533,6 +569,14 @@ int I422ToUYVY(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOUYVYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToUYVYRow = I422ToUYVYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_LSX; + } + } +#endif #if defined(HAS_I422TOUYVYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToUYVYRow = I422ToUYVYRow_Any_LASX; @@ -608,6 +652,14 @@ int I420ToUYVY(const uint8_t* src_y, } } #endif +#if defined(HAS_I422TOUYVYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToUYVYRow = I422ToUYVYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_LSX; + } + } +#endif #if defined(HAS_I422TOUYVYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToUYVYRow = I422ToUYVYRow_Any_LASX; diff --git a/files/source/convert_from_argb.cc b/source/convert_from_argb.cc index e50c2af3..b45de8c8 100644 --- a/files/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -76,6 +76,14 @@ int ARGBToI444(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOUV444ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUV444Row = ARGBToUV444Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444Row = ARGBToUV444Row_LSX; + } + } +#endif #if defined(HAS_ARGBTOUV444ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToUV444Row = ARGBToUV444Row_Any_LASX; @@ -116,6 +124,14 @@ int ARGBToI444(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -124,6 +140,11 @@ int ARGBToI444(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToUV444Row(src_argb, dst_u, dst_v, width); @@ -230,7 +251,24 @@ int ARGBToI422(const uint8_t* src_argb, } } #endif - +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -241,6 +279,11 @@ int ARGBToI422(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); @@ -340,6 +383,14 @@ int ARGBToNV12(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -350,6 +401,11 @@ int ARGBToNV12(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -361,11 +417,19 @@ int ARGBToNV12(const uint8_t* src_argb, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -390,10 +454,17 @@ int ARGBToNV12(const uint8_t* src_argb, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -502,6 +573,24 @@ int ARGBToNV21(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -512,6 +601,11 @@ int ARGBToNV21(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -523,11 +617,19 @@ int ARGBToNV21(const uint8_t* src_argb, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -552,10 +654,17 @@ int ARGBToNV21(const uint8_t* src_argb, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -663,6 +772,27 @@ int ABGRToNV12(const uint8_t* src_abgr, } } #endif +#if defined(HAS_ABGRTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYRow = ABGRToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYRow = ABGRToYRow_RVV; + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -674,11 +804,19 @@ int ABGRToNV12(const uint8_t* src_abgr, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -703,10 +841,17 @@ int ABGRToNV12(const uint8_t* src_abgr, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); @@ -815,6 +960,27 @@ int ABGRToNV21(const uint8_t* src_abgr, } } #endif +#if defined(HAS_ABGRTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYRow = ABGRToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYRow = ABGRToYRow_RVV; + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -826,11 +992,19 @@ int ABGRToNV21(const uint8_t* src_abgr, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -855,10 +1029,17 @@ int ABGRToNV21(const uint8_t* src_abgr, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); @@ -972,6 +1153,24 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -982,6 +1181,11 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; @@ -1014,6 +1218,14 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif +#if defined(HAS_I422TOYUY2ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToYUY2Row = I422ToYUY2Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_LSX; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToYUY2Row = I422ToYUY2Row_Any_LASX; @@ -1028,6 +1240,8 @@ int ARGBToYUY2(const uint8_t* src_argb, align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; + if (!row_y) + return 1; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -1135,6 +1349,24 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -1145,6 +1377,11 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; @@ -1177,6 +1414,14 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif +#if defined(HAS_I422TOUYVYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToUYVYRow = I422ToUYVYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_LSX; + } + } +#endif #if defined(HAS_I422TOUYVYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToUYVYRow = I422ToUYVYRow_Any_LASX; @@ -1191,6 +1436,8 @@ int ARGBToUYVY(const uint8_t* src_argb, align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; + if (!row_y) + return 1; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -1262,6 +1509,14 @@ int ARGBToI400(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -1270,6 +1525,11 @@ int ARGBToI400(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToYRow(src_argb, dst_y, width); @@ -1279,6 +1539,7 @@ int ARGBToI400(const uint8_t* src_argb, return 0; } +#ifndef __riscv // Shuffle table for converting ARGB to RGBA. static const uvec8 kShuffleMaskARGBToRGBA = { 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u}; @@ -1294,6 +1555,47 @@ int ARGBToRGBA(const uint8_t* src_argb, return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba, (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height); } +#else +// Convert ARGB to RGBA. +LIBYUV_API +int ARGBToRGBA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height) { + int y; + void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) = + ARGBToRGBARow_C; + if (!src_argb || !dst_rgba || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_rgba = 0; + } + +#if defined(HAS_ARGBTORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToRGBARow = ARGBToRGBARow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToRGBARow(src_argb, dst_rgba, width); + src_argb += src_stride_argb; + dst_rgba += dst_stride_rgba; + } + return 0; +} +#endif // Convert ARGB To RGB24. LIBYUV_API @@ -1360,6 +1662,14 @@ int ARGBToRGB24(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToRGB24Row = ARGBToRGB24Row_LSX; + } + } +#endif #if defined(HAS_ARGBTORGB24ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX; @@ -1368,6 +1678,11 @@ int ARGBToRGB24(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToRGB24Row = ARGBToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToRGB24Row(src_argb, dst_rgb24, width); @@ -1434,6 +1749,14 @@ int ARGBToRAW(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORAWROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRAWRow = ARGBToRAWRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToRAWRow = ARGBToRAWRow_LSX; + } + } +#endif #if defined(HAS_ARGBTORAWROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRAWRow = ARGBToRAWRow_Any_LASX; @@ -1442,6 +1765,11 @@ int ARGBToRAW(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORAWROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToRAWRow = ARGBToRAWRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToRAWRow(src_argb, dst_raw, width); @@ -1467,7 +1795,7 @@ int ARGBToRGB565Dither(const uint8_t* src_argb, int height) { int y; void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, int width) = + uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; @@ -1512,6 +1840,14 @@ int ARGBToRGB565Dither(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB565DITHERROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LSX; + } + } +#endif #if defined(HAS_ARGBTORGB565DITHERROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX; @@ -1589,6 +1925,15 @@ int ARGBToRGB565(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB565ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRGB565Row = ARGBToRGB565Row_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565Row = ARGBToRGB565Row_LSX; + } + } +#endif + #if defined(HAS_ARGBTORGB565ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_LASX; @@ -1663,6 +2008,14 @@ int ARGBToARGB1555(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOARGB1555ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_LSX; + } + } +#endif #if defined(HAS_ARGBTOARGB1555ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LASX; @@ -1737,6 +2090,14 @@ int ARGBToARGB4444(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOARGB4444ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_LSX; + } + } +#endif #if defined(HAS_ARGBTOARGB4444ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LASX; @@ -1858,19 +2219,19 @@ int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { + if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1879,6 +2240,22 @@ int ARGBToJ420(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } +#if defined(HAS_ARGBTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVJRow = ARGBToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_NEON; + } + } +#endif #if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYJRow = ARGBToYJRow_Any_SSSE3; @@ -1903,19 +2280,11 @@ int ARGBToJ420(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVJRow = ARGBToUVJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_NEON; +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; } } #endif @@ -1951,18 +2320,23 @@ int ARGBToJ420(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif for (y = 0; y < height - 1; y += 2) { - ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, src_stride_argb, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width); src_argb += src_stride_argb * 2; dst_yj += dst_stride_yj * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } if (height & 1) { - ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); } return 0; @@ -1974,19 +2348,19 @@ int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { + if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1997,10 +2371,10 @@ int ARGBToJ422(const uint8_t* src_argb, } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_yj == width && - dst_stride_u * 2 == width && dst_stride_v * 2 == width) { + dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) { width *= height; height = 1; - src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0; + src_stride_argb = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0; } #if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -2026,6 +2400,14 @@ int ARGBToJ422(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } +#endif #if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYJRow = ARGBToYJRow_Any_NEON; @@ -2074,270 +2456,649 @@ int ARGBToJ422(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; dst_yj += dst_stride_yj; - dst_u += dst_stride_u; - dst_v += dst_stride_v; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } return 0; } -// Convert ARGB to AR64. +// Convert ARGB to J400. LIBYUV_API -int ARGBToAR64(const uint8_t* src_argb, +int ARGBToJ400(const uint8_t* src_argb, int src_stride_argb, - uint16_t* dst_ar64, - int dst_stride_ar64, + uint8_t* dst_yj, + int dst_stride_yj, int width, int height) { int y; - void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, - int width) = ARGBToAR64Row_C; - if (!src_argb || !dst_ar64 || width <= 0 || height == 0) { + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = + ARGBToYJRow_C; + if (!src_argb || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_argb = dst_stride_ar64 = 0; + src_stride_argb = dst_stride_yj = 0; } -#if defined(HAS_ARGBTOAR64ROW_SSSE3) +#if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBToAR64Row = ARGBToAR64Row_SSSE3; + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOAR64ROW_AVX2) +#if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToAR64Row = ARGBToAR64Row_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBToAR64Row = ARGBToAR64Row_AVX2; + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOAR64ROW_NEON) +#if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToAR64Row = ARGBToAR64Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToAR64Row = ARGBToAR64Row_NEON; + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_NEON; } } #endif +#if defined(HAS_ARGBTOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYJRow = ARGBToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - ARGBToAR64Row(src_argb, dst_ar64, width); + ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; - dst_ar64 += dst_stride_ar64; + dst_yj += dst_stride_yj; } return 0; } -// Convert ARGB to AB64. +// Convert RGBA to J400. LIBYUV_API -int ARGBToAB64(const uint8_t* src_argb, - int src_stride_argb, - uint16_t* dst_ab64, - int dst_stride_ab64, +int RGBAToJ400(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_yj, + int dst_stride_yj, int width, int height) { int y; - void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, - int width) = ARGBToAB64Row_C; - if (!src_argb || !dst_ab64 || width <= 0 || height == 0) { + void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) = + RGBAToYJRow_C; + if (!src_rgba || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; + src_stride_rgba = -src_stride_rgba; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) { + if (src_stride_rgba == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_argb = dst_stride_ab64 = 0; + src_stride_rgba = dst_stride_yj = 0; } -#if defined(HAS_ARGBTOAB64ROW_SSSE3) +#if defined(HAS_RGBATOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBToAB64Row = ARGBToAB64Row_SSSE3; + RGBAToYJRow = RGBAToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOAB64ROW_AVX2) +#if defined(HAS_RGBATOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToAB64Row = ARGBToAB64Row_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBToAB64Row = ARGBToAB64Row_AVX2; + RGBAToYJRow = RGBAToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGBAToYJRow = RGBAToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOAB64ROW_NEON) +#if defined(HAS_RGBATOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToAB64Row = ARGBToAB64Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToAB64Row = ARGBToAB64Row_NEON; + RGBAToYJRow = RGBAToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_NEON; + } + } +#endif +#if defined(HAS_RGBATOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGBAToYJRow = RGBAToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_MSA; + } + } +#endif +#if defined(HAS_RGBATOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGBAToYJRow = RGBAToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_LSX; } } #endif +#if defined(HAS_RGBATOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGBAToYJRow = RGBAToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGBAToYJRow = RGBAToYJRow_LASX; + } + } +#endif +#if defined(HAS_RGBATOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBAToYJRow = RGBAToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - ARGBToAB64Row(src_argb, dst_ab64, width); - src_argb += src_stride_argb; - dst_ab64 += dst_stride_ab64; + RGBAToYJRow(src_rgba, dst_yj, width); + src_rgba += src_stride_rgba; + dst_yj += dst_stride_yj; } return 0; } -// Convert ARGB to J400. +// Convert ABGR to J420. (JPeg full range I420). LIBYUV_API -int ARGBToJ400(const uint8_t* src_argb, - int src_stride_argb, +int ABGRToJ420(const uint8_t* src_abgr, + int src_stride_abgr, uint8_t* dst_yj, int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = - ARGBToYJRow_C; - if (!src_argb || !dst_yj || width <= 0 || height == 0) { + void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_uj, uint8_t* dst_vj, int width) = + ABGRToUVJRow_C; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } +#if defined(HAS_ABGRTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYJRow = ABGRToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ABGRToYJRow = ABGRToYJRow_Any_MSA; + ABGRToUVJRow = ABGRToUVJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_MSA; + ABGRToUVJRow = ABGRToUVJRow_MSA; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif + + for (y = 0; y < height - 1; y += 2) { + ABGRToUVJRow(src_abgr, src_stride_abgr, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + ABGRToYJRow(src_abgr + src_stride_abgr, dst_yj + dst_stride_yj, width); + src_abgr += src_stride_abgr * 2; + dst_yj += dst_stride_yj * 2; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; + } + if (height & 1) { + ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + } + return 0; +} + +// Convert ABGR to J422. (JPeg full range I422). +LIBYUV_API +int ABGRToJ422(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, + int width, + int height) { + int y; + void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_uj, uint8_t* dst_vj, int width) = + ABGRToUVJRow_C; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_yj == width) { + if (src_stride_abgr == width * 4 && dst_stride_yj == width && + dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) { width *= height; height = 1; - src_stride_argb = dst_stride_yj = 0; + src_stride_abgr = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0; } -#if defined(HAS_ARGBTOYJROW_SSSE3) +#if defined(HAS_ABGRTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; + ABGRToYJRow = ABGRToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) +#if defined(HAS_ABGRTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; + ABGRToYJRow = ABGRToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; + ABGRToYJRow = ABGRToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOYJROW_NEON) +#if defined(HAS_ABGRTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; + ABGRToYJRow = ABGRToYJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON; + ABGRToYJRow = ABGRToYJRow_NEON; } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) +#if defined(HAS_ABGRTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; + ABGRToYJRow = ABGRToYJRow_Any_MSA; + ABGRToUVJRow = ABGRToUVJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; + ABGRToYJRow = ABGRToYJRow_MSA; + } + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_MSA; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; } } #endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - ARGBToYJRow(src_argb, dst_yj, width); - src_argb += src_stride_argb; + ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + src_abgr += src_stride_abgr; dst_yj += dst_stride_yj; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } return 0; } -// Convert RGBA to J400. +// Convert ABGR to J400. LIBYUV_API -int RGBAToJ400(const uint8_t* src_rgba, - int src_stride_rgba, +int ABGRToJ400(const uint8_t* src_abgr, + int src_stride_abgr, uint8_t* dst_yj, int dst_stride_yj, int width, int height) { int y; - void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) = - RGBAToYJRow_C; - if (!src_rgba || !dst_yj || width <= 0 || height == 0) { + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_rgba = src_rgba + (height - 1) * src_stride_rgba; - src_stride_rgba = -src_stride_rgba; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; } // Coalesce rows. - if (src_stride_rgba == width * 4 && dst_stride_yj == width) { + if (src_stride_abgr == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_rgba = dst_stride_yj = 0; + src_stride_abgr = dst_stride_yj = 0; } -#if defined(HAS_RGBATOYJROW_SSSE3) +#if defined(HAS_ABGRTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - RGBAToYJRow = RGBAToYJRow_Any_SSSE3; + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_SSSE3; + ABGRToYJRow = ABGRToYJRow_SSSE3; } } #endif -#if defined(HAS_RGBATOYJROW_AVX2) +#if defined(HAS_ABGRTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - RGBAToYJRow = RGBAToYJRow_Any_AVX2; + ABGRToYJRow = ABGRToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - RGBAToYJRow = RGBAToYJRow_AVX2; + ABGRToYJRow = ABGRToYJRow_AVX2; } } #endif -#if defined(HAS_RGBATOYJROW_NEON) +#if defined(HAS_ABGRTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - RGBAToYJRow = RGBAToYJRow_Any_NEON; + ABGRToYJRow = ABGRToYJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_NEON; + ABGRToYJRow = ABGRToYJRow_NEON; } } #endif -#if defined(HAS_RGBATOYJROW_MSA) +#if defined(HAS_ABGRTOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - RGBAToYJRow = RGBAToYJRow_Any_MSA; + ABGRToYJRow = ABGRToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_MSA; + ABGRToYJRow = ABGRToYJRow_MSA; } } #endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - RGBAToYJRow(src_rgba, dst_yj, width); - src_rgba += src_stride_rgba; + ABGRToYJRow(src_abgr, dst_yj, width); + src_abgr += src_stride_abgr; dst_yj += dst_stride_yj; } return 0; } +// Convert ARGB to AR64. +LIBYUV_API +int ARGBToAR64(const uint8_t* src_argb, + int src_stride_argb, + uint16_t* dst_ar64, + int dst_stride_ar64, + int width, + int height) { + int y; + void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, + int width) = ARGBToAR64Row_C; + if (!src_argb || !dst_ar64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_ar64 = 0; + } +#if defined(HAS_ARGBTOAR64ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAR64Row = ARGBToAR64Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAR64ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAR64Row = ARGBToAR64Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAR64Row = ARGBToAR64Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOAR64ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToAR64Row = ARGBToAR64Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToAR64Row = ARGBToAR64Row_NEON; + } + } +#endif +#if defined(HAS_ARGBTOAR64ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToAR64Row = ARGBToAR64Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToAR64Row(src_argb, dst_ar64, width); + src_argb += src_stride_argb; + dst_ar64 += dst_stride_ar64; + } + return 0; +} + +// Convert ARGB to AB64. +LIBYUV_API +int ARGBToAB64(const uint8_t* src_argb, + int src_stride_argb, + uint16_t* dst_ab64, + int dst_stride_ab64, + int width, + int height) { + int y; + void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, + int width) = ARGBToAB64Row_C; + if (!src_argb || !dst_ab64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_ab64 = 0; + } +#if defined(HAS_ARGBTOAB64ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAB64Row = ARGBToAB64Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAB64ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAB64Row = ARGBToAB64Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAB64Row = ARGBToAB64Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOAB64ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToAB64Row = ARGBToAB64Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToAB64Row = ARGBToAB64Row_NEON; + } + } +#endif +#if defined(HAS_ARGBTOAB64ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToAB64Row = ARGBToAB64Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToAB64Row(src_argb, dst_ab64, width); + src_argb += src_stride_argb; + dst_ab64 += dst_stride_ab64; + } + return 0; +} + // Enabled if 1 pass is available -#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) +#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ + defined(HAS_RAWTOYJROW_RVV) #define HAS_RAWTOYJROW #endif @@ -2355,7 +3116,7 @@ int RAWToJNV21(const uint8_t* src_raw, int halfwidth = (width + 1) >> 1; #if defined(HAS_RAWTOYJROW) void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = RAWToUVJRow_C; void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = RAWToYJRow_C; @@ -2363,12 +3124,12 @@ int RAWToJNV21(const uint8_t* src_raw, void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYJRow_C; #endif - void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, + void (*MergeUVRow_)(const uint8_t* src_uj, const uint8_t* src_vj, uint8_t* dst_vu, int width) = MergeUVRow_C; if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) { return -1; @@ -2403,6 +3164,27 @@ int RAWToJNV21(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToYJRow = RAWToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToYJRow = RAWToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToYJRow = RAWToYJRow_LASX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToYJRow = RAWToYJRow_RVV; + } +#endif // Other platforms do intermediate conversion from RAW to ARGB. #else // HAS_RAWTOYJROW @@ -2459,11 +3241,19 @@ int RAWToJNV21(const uint8_t* src_raw, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -2488,29 +3278,41 @@ int RAWToJNV21(const uint8_t* src_raw, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { +#if defined(HAS_RAWTOYJROW) // Allocate a row of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); -#if !defined(HAS_RAWTOYJROW) - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_uv_size = ((halfwidth + 31) & ~31); + align_buffer_64(row_uj, row_uv_size * 2); + uint8_t* row_vj = row_uj + row_uv_size; +#else + // Allocate row of uv and 2 rows of ARGB. + const int row_size = ((width * 4 + 31) & ~31); + const int row_uv_size = ((halfwidth + 31) & ~31); + align_buffer_64(row_uj, row_uv_size * 2 + row_size * 2); + uint8_t* row_vj = row_uj + row_uv_size; + uint8_t* row = row_vj + row_uv_size; #endif + if (!row_uj) + return 1; for (y = 0; y < height - 1; y += 2) { #if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, src_stride_raw, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToUVJRow(src_raw, src_stride_raw, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); RAWToYJRow(src_raw, dst_y, width); RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVJRow(row, kRowSize, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVJRow(row, row_size, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); #endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; @@ -2518,20 +3320,17 @@ int RAWToJNV21(const uint8_t* src_raw, } if (height & 1) { #if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToUVJRow(src_raw, 0, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); RAWToYJRow(src_raw, dst_y, width); #else RAWToARGBRow(src_raw, row, width); - ARGBToUVJRow(row, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + ARGBToUVJRow(row, 0, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); ARGBToYJRow(row, dst_y, width); #endif } -#if !defined(HAS_RAWTOYJROW) - free_aligned_buffer_64(row); -#endif - free_aligned_buffer_64(row_u); + free_aligned_buffer_64(row_uj); } return 0; } diff --git a/files/source/convert_jpeg.cc b/source/convert_jpeg.cc index d7556ee9..d7556ee9 100644 --- a/files/source/convert_jpeg.cc +++ b/source/convert_jpeg.cc diff --git a/files/source/convert_to_argb.cc b/source/convert_to_argb.cc index 84df16c8..84df16c8 100644 --- a/files/source/convert_to_argb.cc +++ b/source/convert_to_argb.cc diff --git a/files/source/convert_to_i420.cc b/source/convert_to_i420.cc index 5869ecd7..5869ecd7 100644 --- a/files/source/convert_to_i420.cc +++ b/source/convert_to_i420.cc diff --git a/files/source/cpu_id.cc b/source/cpu_id.cc index 56fe60e4..eedce16b 100644 --- a/files/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -40,7 +40,6 @@ extern "C" { // cpu_info_ variable for SIMD instruction sets detected. LIBYUV_API int cpu_info_ = 0; -// TODO(fbarchard): Consider using int for cpuid so casting is not needed. // Low level cpuid for X86. #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ defined(__x86_64__)) && \ @@ -108,14 +107,14 @@ void CpuId(int eax, int ecx, int* cpu_info) { // } // For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code. // https://code.google.com/p/libyuv/issues/detail?id=529 -#if defined(_M_IX86) && (_MSC_VER < 1900) +#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900) #pragma optimize("g", off) #endif #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ defined(__x86_64__)) && \ !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. -int GetXCR0() { +static int GetXCR0() { int xcr0 = 0; #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT @@ -129,7 +128,7 @@ int GetXCR0() { #define GetXCR0() 0 #endif // defined(_M_IX86) || defined(_M_X64) .. // Return optimization to previous setting. -#if defined(_M_IX86) && (_MSC_VER < 1900) +#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900) #pragma optimize("g", on) #endif @@ -137,13 +136,14 @@ int GetXCR0() { // For Arm, but public to allow testing on any CPU LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { char cpuinfo_line[512]; - FILE* f = fopen(cpuinfo_name, "r"); + FILE* f = fopen(cpuinfo_name, "re"); if (!f) { // Assume Neon if /proc/cpuinfo is unavailable. // This will occur for Chrome sandbox for Pepper or Render process. return kCpuHasNEON; } - while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { + memset(cpuinfo_line, 0, sizeof(cpuinfo_line)); + while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) { if (memcmp(cpuinfo_line, "Features", 8) == 0) { char* p = strstr(cpuinfo_line, " neon"); if (p && (p[5] == ' ' || p[5] == '\n')) { @@ -162,17 +162,90 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { return 0; } -// TODO(fbarchard): Consider read_msa_ir(). +LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) { + char cpuinfo_line[512]; + int flag = 0; + FILE* f = fopen(cpuinfo_name, "re"); + if (!f) { +#if defined(__riscv_vector) + // Assume RVV if /proc/cpuinfo is unavailable. + // This will occur for Chrome sandbox for Pepper or Render process. + return kCpuHasRVV; +#else + return 0; +#endif + } + memset(cpuinfo_line, 0, sizeof(cpuinfo_line)); + while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) { + if (memcmp(cpuinfo_line, "isa", 3) == 0) { + // ISA string must begin with rv64{i,e,g} for a 64-bit processor. + char* isa = strstr(cpuinfo_line, "rv64"); + if (isa) { + size_t isa_len = strlen(isa); + char* extensions; + size_t extensions_len = 0; + size_t std_isa_len; + // Remove the new-line character at the end of string + if (isa[isa_len - 1] == '\n') { + isa[--isa_len] = '\0'; + } + // 5 ISA characters + if (isa_len < 5) { + fclose(f); + return 0; + } + // Skip {i,e,g} canonical checking. + // Skip rvxxx + isa += 5; + // Find the very first occurrence of 's', 'x' or 'z'. + // To detect multi-letter standard, non-standard, and + // supervisor-level extensions. + extensions = strpbrk(isa, "zxs"); + if (extensions) { + // Multi-letter extensions are seperated by a single underscore + // as described in RISC-V User-Level ISA V2.2. + char* ext = strtok(extensions, "_"); + extensions_len = strlen(extensions); + while (ext) { + // Search for the ZVFH (Vector FP16) extension. + if (!strcmp(ext, "zvfh")) { + flag |= kCpuHasRVVZVFH; + } + ext = strtok(NULL, "_"); + } + } + std_isa_len = isa_len - extensions_len - 5; + // Detect the v in the standard single-letter extensions. + if (memchr(isa, 'v', std_isa_len)) { + // The RVV implied the F extension. + flag |= kCpuHasRVV; + } + } + } +#if defined(__riscv_vector) + // Assume RVV if /proc/cpuinfo is from x86 host running QEMU. + else if ((memcmp(cpuinfo_line, "vendor_id\t: GenuineIntel", 24) == 0) || + (memcmp(cpuinfo_line, "vendor_id\t: AuthenticAMD", 24) == 0)) { + fclose(f); + return kCpuHasRVV; + } +#endif + } + fclose(f); + return flag; +} + LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) { char cpuinfo_line[512]; - int flag = 0x0; - FILE* f = fopen(cpuinfo_name, "r"); + int flag = 0; + FILE* f = fopen(cpuinfo_name, "re"); if (!f) { // Assume nothing if /proc/cpuinfo is unavailable. // This will occur for Chrome sandbox for Pepper or Render process. return 0; } - while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { + memset(cpuinfo_line, 0, sizeof(cpuinfo_line)); + while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) { if (memcmp(cpuinfo_line, "cpu model", 9) == 0) { // Workaround early kernel without MSA in ASEs line. if (strstr(cpuinfo_line, "Loongson-2K")) { @@ -191,14 +264,13 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) { return flag; } -// TODO(fbarchard): Consider read_loongarch_ir(). #define LOONGARCH_CFG2 0x2 #define LOONGARCH_CFG2_LSX (1 << 6) #define LOONGARCH_CFG2_LASX (1 << 7) #if defined(__loongarch__) LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) { - int flag = 0x0; + int flag = 0; uint32_t cfg2 = 0; __asm__ volatile("cpucfg %0, %1 \n\t" : "+&r"(cfg2) : "r"(LOONGARCH_CFG2)); @@ -220,10 +292,12 @@ static SAFEBUFFERS int GetCpuFlags(void) { int cpu_info0[4] = {0, 0, 0, 0}; int cpu_info1[4] = {0, 0, 0, 0}; int cpu_info7[4] = {0, 0, 0, 0}; + int cpu_einfo7[4] = {0, 0, 0, 0}; CpuId(0, 0, cpu_info0); CpuId(1, 0, cpu_info1); if (cpu_info0[0] >= 7) { CpuId(7, 0, cpu_info7); + CpuId(7, 1, cpu_einfo7); } cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | @@ -236,7 +310,9 @@ static SAFEBUFFERS int GetCpuFlags(void) { ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | - ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0); + ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0) | + ((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) | + ((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0); // Detect AVX512bw if ((GetXCR0() & 0xe0) == 0xe0) { @@ -246,8 +322,7 @@ static SAFEBUFFERS int GetCpuFlags(void) { cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0; cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0; cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0; - cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0; - cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0; + cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0; } } #endif @@ -277,6 +352,10 @@ static SAFEBUFFERS int GetCpuFlags(void) { #endif cpu_info |= kCpuHasARM; #endif // __arm__ +#if defined(__riscv) && defined(__linux__) + cpu_info = RiscvCpuCaps("/proc/cpuinfo"); + cpu_info |= kCpuHasRISCV; +#endif // __riscv cpu_info |= kCpuInitialized; return cpu_info; } diff --git a/files/source/mjpeg_decoder.cc b/source/mjpeg_decoder.cc index 4ccf00a3..0141da8a 100644 --- a/files/source/mjpeg_decoder.cc +++ b/source/mjpeg_decoder.cc @@ -109,7 +109,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) { } buf_.data = src; - buf_.len = static_cast<int>(src_len); + buf_.len = (int)src_len; buf_vec_.pos = 0; decompress_struct_->client_data = &buf_vec_; #ifdef HAVE_SETJMP @@ -428,7 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) { void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT jpeg_source_mgr* src = cinfo->src; - size_t bytes = static_cast<size_t>(num_bytes); + size_t bytes = (size_t)num_bytes; if (bytes > src->bytes_in_buffer) { src->next_input_byte = nullptr; src->bytes_in_buffer = 0; diff --git a/files/source/mjpeg_validate.cc b/source/mjpeg_validate.cc index ba0a03ab..ba0a03ab 100644 --- a/files/source/mjpeg_validate.cc +++ b/source/mjpeg_validate.cc diff --git a/files/source/planar_functions.cc b/source/planar_functions.cc index 169d4a8f..1c94e260 100644 --- a/files/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -75,6 +75,11 @@ void CopyPlane(const uint8_t* src_y, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif +#if defined(HAS_COPYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + CopyRow = CopyRow_RVV; + } +#endif // Copy plane for (y = 0; y < height; ++y) { @@ -162,7 +167,7 @@ void Convert8To16Plane(const uint8_t* src_y, int src_stride_y, uint16_t* dst_y, int dst_stride_y, - int scale, // 16384 for 10 bits + int scale, // 1024 for 10 bits int width, int height) { int y; @@ -333,6 +338,45 @@ int I210Copy(const uint16_t* src_y, return 0; } +// Copy I410. +LIBYUV_API +int I410Copy(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + if (dst_y) { + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; +} + // Copy I400. LIBYUV_API int I400ToI400(const uint8_t* src_y, @@ -385,6 +429,7 @@ int I420ToI400(const uint8_t* src_y, } // Copy NV12. Supports inverting. +LIBYUV_API int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, @@ -418,6 +463,7 @@ int NV12Copy(const uint8_t* src_y, } // Copy NV21. Supports inverting. +LIBYUV_API int NV21Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, @@ -504,6 +550,11 @@ void SplitUVPlane(const uint8_t* src_uv, } } #endif +#if defined(HAS_SPLITUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitUVRow = SplitUVRow_RVV; + } +#endif for (y = 0; y < height; ++y) { // Copy a row of UV. @@ -553,11 +604,19 @@ void MergeUVPlane(const uint8_t* src_u, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { + if (IS_ALIGNED(width, 16)) { MergeUVRow = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 32)) { + MergeUVRow = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow = MergeUVRow_Any_NEON; @@ -582,6 +641,11 @@ void MergeUVPlane(const uint8_t* src_u, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow = MergeUVRow_RVV; + } +#endif for (y = 0; y < height; ++y) { // Merge a row of U and V into a row of UV. @@ -687,7 +751,7 @@ void MergeUVPlane_16(const uint16_t* src_u, #if defined(HAS_MERGEUVROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_16 = MergeUVRow_16_Any_AVX2; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { MergeUVRow_16 = MergeUVRow_16_AVX2; } } @@ -911,31 +975,31 @@ int NV21ToNV12(const uint8_t* src_y, return 0; } +// Test if tile_height is a power of 2 (16 or 32) +#define IS_POWEROFTWO(x) (!((x) & ((x)-1))) + // Detile a plane of data // tile width is 16 and assumed. // tile_height is 16 or 32 for MM21. // src_stride_y is bytes per row of source ignoring tiling. e.g. 640 // TODO: More detile row functions. - LIBYUV_API -void DetilePlane(const uint8_t* src_y, - int src_stride_y, - uint8_t* dst_y, - int dst_stride_y, - int width, - int height, - int tile_height) { +int DetilePlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height) { const ptrdiff_t src_tile_stride = 16 * tile_height; int y; void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, int width) = DetileRow_C; - assert(src_stride_y >= 0); - assert(tile_height > 0); - assert(src_stride_y > 0); - - if (width <= 0 || height == 0) { - return; + if (!src_y || !dst_y || width <= 0 || height == 0 || + !IS_POWEROFTWO(tile_height)) { + return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; @@ -970,6 +1034,72 @@ void DetilePlane(const uint8_t* src_y, src_y = src_y - src_tile_stride + src_stride_y * tile_height; } } + return 0; +} + +// Convert a plane of 16 bit tiles of 16 x H to linear. +// tile width is 16 and assumed. +// tile_height is 16 or 32 for MT2T. +LIBYUV_API +int DetilePlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height) { + const ptrdiff_t src_tile_stride = 16 * tile_height; + int y; + void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride, + uint16_t* dst, int width) = DetileRow_16_C; + if (!src_y || !dst_y || width <= 0 || height == 0 || + !IS_POWEROFTWO(tile_height)) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + +#if defined(HAS_DETILEROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + DetileRow_16 = DetileRow_16_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + DetileRow_16 = DetileRow_16_SSE2; + } + } +#endif +#if defined(HAS_DETILEROW_16_AVX) + if (TestCpuFlag(kCpuHasAVX)) { + DetileRow_16 = DetileRow_16_Any_AVX; + if (IS_ALIGNED(width, 16)) { + DetileRow_16 = DetileRow_16_AVX; + } + } +#endif +#if defined(HAS_DETILEROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + DetileRow_16 = DetileRow_16_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileRow_16 = DetileRow_16_NEON; + } + } +#endif + + // Detile plane + for (y = 0; y < height; ++y) { + DetileRow_16(src_y, src_tile_stride, dst_y, width); + dst_y += dst_stride_y; + src_y += 16; + // Advance to next row of tiles. + if ((y & (tile_height - 1)) == (tile_height - 1)) { + src_y = src_y - src_tile_stride + src_stride_y * tile_height; + } + } + return 0; } LIBYUV_API @@ -1033,6 +1163,74 @@ void DetileSplitUVPlane(const uint8_t* src_uv, } } +LIBYUV_API +void DetileToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height, + int tile_height) { + const ptrdiff_t src_y_tile_stride = 16 * tile_height; + const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2; + int y; + void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, int width) = DetileToYUY2_C; + assert(src_stride_y >= 0); + assert(src_stride_y > 0); + assert(src_stride_uv >= 0); + assert(src_stride_uv > 0); + assert(tile_height > 0); + + if (width <= 0 || height == 0 || tile_height <= 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; + dst_stride_yuy2 = -dst_stride_yuy2; + } + +#if defined(HAS_DETILETOYUY2_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + DetileToYUY2 = DetileToYUY2_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileToYUY2 = DetileToYUY2_NEON; + } + } +#endif + +#if defined(HAS_DETILETOYUY2_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + DetileToYUY2 = DetileToYUY2_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + DetileToYUY2 = DetileToYUY2_SSE2; + } + } +#endif + + // Detile plane + for (y = 0; y < height; ++y) { + DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, + width); + dst_yuy2 += dst_stride_yuy2; + src_y += 16; + + if (y & 0x1) + src_uv += 16; + + // Advance to next row of tiles. + if ((y & (tile_height - 1)) == (tile_height - 1)) { + src_y = src_y - src_y_tile_stride + src_stride_y * tile_height; + src_uv = src_uv - src_uv_tile_stride + src_stride_uv * (tile_height / 2); + } + } +} + // Support function for NV12 etc RGB channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API @@ -1085,6 +1283,11 @@ void SplitRGBPlane(const uint8_t* src_rgb, } } #endif +#if defined(HAS_SPLITRGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitRGBRow = SplitRGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { // Copy a row of RGB. @@ -1144,6 +1347,11 @@ void MergeRGBPlane(const uint8_t* src_r, } } #endif +#if defined(HAS_MERGERGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeRGBRow = MergeRGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { // Merge a row of U and V into a row of RGB. @@ -1156,18 +1364,18 @@ void MergeRGBPlane(const uint8_t* src_r, } LIBYUV_NOINLINE -void SplitARGBPlaneAlpha(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_r, - int dst_stride_r, - uint8_t* dst_g, - int dst_stride_g, - uint8_t* dst_b, - int dst_stride_b, - uint8_t* dst_a, - int dst_stride_a, - int width, - int height) { +static void SplitARGBPlaneAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { int y; void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, uint8_t* dst_a, int width) = @@ -1175,6 +1383,9 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb, assert(height > 0); + if (width <= 0 || height == 0) { + return; + } if (src_stride_argb == width * 4 && dst_stride_r == width && dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) { width *= height; @@ -1215,6 +1426,11 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb, } } #endif +#if defined(HAS_SPLITARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitARGBRow = SplitARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width); @@ -1227,21 +1443,24 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb, } LIBYUV_NOINLINE -void SplitARGBPlaneOpaque(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_r, - int dst_stride_r, - uint8_t* dst_g, - int dst_stride_g, - uint8_t* dst_b, - int dst_stride_b, - int width, - int height) { +static void SplitARGBPlaneOpaque(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { int y; void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitXRGBRow_C; assert(height > 0); + if (width <= 0 || height == 0) { + return; + } if (src_stride_argb == width * 4 && dst_stride_r == width && dst_stride_g == width && dst_stride_b == width) { width *= height; @@ -1281,6 +1500,11 @@ void SplitARGBPlaneOpaque(const uint8_t* src_argb, } } #endif +#if defined(HAS_SPLITXRGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitXRGBRow = SplitXRGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width); @@ -1328,18 +1552,18 @@ void SplitARGBPlane(const uint8_t* src_argb, } LIBYUV_NOINLINE -void MergeARGBPlaneAlpha(const uint8_t* src_r, - int src_stride_r, - const uint8_t* src_g, - int src_stride_g, - const uint8_t* src_b, - int src_stride_b, - const uint8_t* src_a, - int src_stride_a, - uint8_t* dst_argb, - int dst_stride_argb, - int width, - int height) { +static void MergeARGBPlaneAlpha(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, const uint8_t* src_a, @@ -1347,6 +1571,9 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r, assert(height > 0); + if (width <= 0 || height == 0) { + return; + } if (src_stride_r == width && src_stride_g == width && src_stride_b == width && src_stride_a == width && dst_stride_argb == width * 4) { width *= height; @@ -1378,6 +1605,11 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r, } } #endif +#if defined(HAS_MERGEARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeARGBRow = MergeARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width); @@ -1390,16 +1622,16 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r, } LIBYUV_NOINLINE -void MergeARGBPlaneOpaque(const uint8_t* src_r, - int src_stride_r, - const uint8_t* src_g, - int src_stride_g, - const uint8_t* src_b, - int src_stride_b, - uint8_t* dst_argb, - int dst_stride_argb, - int width, - int height) { +static void MergeARGBPlaneOpaque(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_argb, int width) = @@ -1407,6 +1639,9 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r, assert(height > 0); + if (width <= 0 || height == 0) { + return; + } if (src_stride_r == width && src_stride_g == width && src_stride_b == width && dst_stride_argb == width * 4) { width *= height; @@ -1437,6 +1672,11 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r, } } #endif +#if defined(HAS_MERGEXRGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeXRGBRow = MergeXRGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { MergeXRGBRow(src_r, src_g, src_b, dst_argb, width); @@ -1888,6 +2128,16 @@ int YUY2ToI422(const uint8_t* src_yuy2, } } #endif +#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + YUY2ToYRow = YUY2ToYRow_Any_LSX; + YUY2ToUV422Row = YUY2ToUV422Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + YUY2ToYRow = YUY2ToYRow_LSX; + YUY2ToUV422Row = YUY2ToUV422Row_LSX; + } + } +#endif #if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { YUY2ToYRow = YUY2ToYRow_Any_LASX; @@ -1984,6 +2234,16 @@ int UYVYToI422(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_UYVYTOYROW_LSX) && defined(HAS_UYVYTOUV422ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + UYVYToYRow = UYVYToYRow_Any_LSX; + UYVYToUV422Row = UYVYToUV422Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_LSX; + UYVYToUV422Row = UYVYToUV422Row_LSX; + } + } +#endif #if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { UYVYToYRow = UYVYToYRow_Any_LASX; @@ -2131,6 +2391,14 @@ int UYVYToY(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_UYVYTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + UYVYToYRow = UYVYToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_LSX; + } + } +#endif for (y = 0; y < height; ++y) { UYVYToYRow(src_uyvy, dst_y, width); @@ -2189,6 +2457,14 @@ void MirrorPlane(const uint8_t* src_y, } } #endif +#if defined(HAS_MIRRORROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MirrorRow = MirrorRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + MirrorRow = MirrorRow_LSX; + } + } +#endif #if defined(HAS_MIRRORROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { MirrorRow = MirrorRow_Any_LASX; @@ -2255,6 +2531,14 @@ void MirrorUVPlane(const uint8_t* src_uv, } } #endif +#if defined(HAS_MIRRORUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MirrorUVRow = MirrorUVRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + MirrorUVRow = MirrorUVRow_LSX; + } + } +#endif #if defined(HAS_MIRRORUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { MirrorUVRow = MirrorUVRow_Any_LASX; @@ -2427,6 +2711,14 @@ int ARGBMirror(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBMIRRORROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBMirrorRow = ARGBMirrorRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBMirrorRow = ARGBMirrorRow_LSX; + } + } +#endif #if defined(HAS_ARGBMIRRORROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBMirrorRow = ARGBMirrorRow_Any_LASX; @@ -2491,37 +2783,6 @@ int RGB24Mirror(const uint8_t* src_rgb24, return 0; } -// Get a blender that optimized for the CPU and pixel count. -// As there are 6 blenders to choose from, the caller should try to use -// the same blend function for all pixels if possible. -LIBYUV_API -ARGBBlendRow GetARGBBlend() { - void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, - uint8_t* dst_argb, int width) = ARGBBlendRow_C; -#if defined(HAS_ARGBBLENDROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBBlendRow = ARGBBlendRow_SSSE3; - return ARGBBlendRow; - } -#endif -#if defined(HAS_ARGBBLENDROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBBlendRow = ARGBBlendRow_NEON; - } -#endif -#if defined(HAS_ARGBBLENDROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBBlendRow = ARGBBlendRow_MSA; - } -#endif -#if defined(HAS_ARGBBLENDROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBBlendRow = ARGBBlendRow_LSX; - } -#endif - return ARGBBlendRow; -} - // Alpha Blend 2 ARGB images and store to destination. LIBYUV_API int ARGBBlend(const uint8_t* src_argb0, @@ -2534,7 +2795,7 @@ int ARGBBlend(const uint8_t* src_argb0, int height) { int y; void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, - uint8_t* dst_argb, int width) = GetARGBBlend(); + uint8_t* dst_argb, int width) = ARGBBlendRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -2551,7 +2812,31 @@ int ARGBBlend(const uint8_t* src_argb0, height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; } - +#if defined(HAS_ARGBBLENDROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBBlendRow = ARGBBlendRow_SSSE3; + } +#endif +#if defined(HAS_ARGBBLENDROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBBlendRow = ARGBBlendRow_NEON; + } +#endif +#if defined(HAS_ARGBBLENDROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBBlendRow = ARGBBlendRow_MSA; + } +#endif +#if defined(HAS_ARGBBLENDROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBBlendRow = ARGBBlendRow_LSX; + } +#endif +#if defined(HAS_ARGBBLENDROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBBlendRow = ARGBBlendRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); src_argb0 += src_stride_argb0; @@ -2611,6 +2896,11 @@ int BlendPlane(const uint8_t* src_y0, } } #endif +#if defined(HAS_BLENDPLANEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + BlendPlaneRow = BlendPlaneRow_RVV; + } +#endif for (y = 0; y < height; ++y) { BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width); @@ -2688,6 +2978,11 @@ int I420Blend(const uint8_t* src_y0, } } #endif +#if defined(HAS_BLENDPLANEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + BlendPlaneRow = BlendPlaneRow_RVV; + } +#endif if (!IS_ALIGNED(width, 2)) { ScaleRowDown2 = ScaleRowDown2Box_Odd_C; } @@ -2724,9 +3019,16 @@ int I420Blend(const uint8_t* src_y0, } } #endif +#if defined(HAS_SCALEROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowDown2 = ScaleRowDown2Box_RVV; + } +#endif // Row buffer for intermediate alpha pixels. align_buffer_64(halfalpha, halfwidth); + if (!halfalpha) + return 1; for (y = 0; y < height; y += 2) { // last row of odd height image use 1 row of alpha instead of 2. if (y == (height - 1)) { @@ -2809,6 +3111,14 @@ int ARGBMultiply(const uint8_t* src_argb0, } } #endif +#if defined(HAS_ARGBMULTIPLYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBMultiplyRow = ARGBMultiplyRow_Any_LSX; + if (IS_ALIGNED(width, 4)) { + ARGBMultiplyRow = ARGBMultiplyRow_LSX; + } + } +#endif #if defined(HAS_ARGBMULTIPLYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX; @@ -2894,6 +3204,14 @@ int ARGBAdd(const uint8_t* src_argb0, } } #endif +#if defined(HAS_ARGBADDROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBAddRow = ARGBAddRow_Any_LSX; + if (IS_ALIGNED(width, 4)) { + ARGBAddRow = ARGBAddRow_LSX; + } + } +#endif #if defined(HAS_ARGBADDROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBAddRow = ARGBAddRow_Any_LASX; @@ -2974,6 +3292,14 @@ int ARGBSubtract(const uint8_t* src_argb0, } } #endif +#if defined(HAS_ARGBSUBTRACTROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBSubtractRow = ARGBSubtractRow_Any_LSX; + if (IS_ALIGNED(width, 4)) { + ARGBSubtractRow = ARGBSubtractRow_LSX; + } + } +#endif #if defined(HAS_ARGBSUBTRACTROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBSubtractRow = ARGBSubtractRow_Any_LASX; @@ -3051,6 +3377,11 @@ int RAWToRGB24(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToRGB24Row = RAWToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { RAWToRGB24Row(src_raw, dst_rgb24, width); @@ -3060,6 +3391,7 @@ int RAWToRGB24(const uint8_t* src_raw, return 0; } +// TODO(fbarchard): Consider uint8_t value LIBYUV_API void SetPlane(uint8_t* dst_y, int dst_stride_y, @@ -3067,7 +3399,7 @@ void SetPlane(uint8_t* dst_y, int height, uint32_t value) { int y; - void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C; + void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C; if (width <= 0 || height == 0) { return; @@ -3120,7 +3452,7 @@ void SetPlane(uint8_t* dst_y, // Set plane for (y = 0; y < height; ++y) { - SetRow(dst_y, value, width); + SetRow(dst_y, (uint8_t)value, width); dst_y += dst_stride_y; } } @@ -3168,7 +3500,7 @@ int ARGBRect(uint8_t* dst_argb, int height, uint32_t value) { int y; - void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) = + void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) = ARGBSetRow_C; if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { return -1; @@ -3293,6 +3625,14 @@ int ARGBAttenuate(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBATTENUATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_LSX; + } + } +#endif #if defined(HAS_ARGBATTENUATEROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX; @@ -3301,6 +3641,11 @@ int ARGBAttenuate(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBAttenuateRow(src_argb, dst_argb, width); @@ -3401,6 +3746,11 @@ int ARGBGrayTo(const uint8_t* src_argb, ARGBGrayRow = ARGBGrayRow_MSA; } #endif +#if defined(HAS_ARGBGRAYROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBGrayRow = ARGBGrayRow_LSX; + } +#endif #if defined(HAS_ARGBGRAYROW_LASX) if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { ARGBGrayRow = ARGBGrayRow_LASX; @@ -3451,6 +3801,11 @@ int ARGBGray(uint8_t* dst_argb, ARGBGrayRow = ARGBGrayRow_MSA; } #endif +#if defined(HAS_ARGBGRAYROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBGrayRow = ARGBGrayRow_LSX; + } +#endif #if defined(HAS_ARGBGRAYROW_LASX) if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { ARGBGrayRow = ARGBGrayRow_LASX; @@ -3473,7 +3828,7 @@ int ARGBSepia(uint8_t* dst_argb, int width, int height) { int y; - void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C; + void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; @@ -3499,6 +3854,11 @@ int ARGBSepia(uint8_t* dst_argb, ARGBSepiaRow = ARGBSepiaRow_MSA; } #endif +#if defined(HAS_ARGBSEPIAROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBSepiaRow = ARGBSepiaRow_LSX; + } +#endif #if defined(HAS_ARGBSEPIAROW_LASX) if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { ARGBSepiaRow = ARGBSepiaRow_LASX; @@ -3616,7 +3976,7 @@ int ARGBColorTable(uint8_t* dst_argb, int width, int height) { int y; - void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, + void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb, int width) = ARGBColorTableRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || @@ -3652,7 +4012,7 @@ int RGBColorTable(uint8_t* dst_argb, int width, int height) { int y; - void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, + void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb, int width) = RGBColorTableRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || @@ -3697,7 +4057,7 @@ int ARGBQuantize(uint8_t* dst_argb, int width, int height) { int y; - void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size, + void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) = ARGBQuantizeRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || @@ -3924,6 +4284,11 @@ int ARGBShade(const uint8_t* src_argb, ARGBShadeRow = ARGBShadeRow_MSA; } #endif +#if defined(HAS_ARGBSHADEROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 4)) { + ARGBShadeRow = ARGBShadeRow_LSX; + } +#endif #if defined(HAS_ARGBSHADEROW_LASX) if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) { ARGBShadeRow = ARGBShadeRow_LASX; @@ -3950,7 +4315,7 @@ int InterpolatePlane(const uint8_t* src0, int height, int interpolation) { int y; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src0 || !src1 || !dst || width <= 0 || height == 0) { @@ -4008,6 +4373,11 @@ int InterpolatePlane(const uint8_t* src0, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { InterpolateRow(dst, src0, src1 - src0, width, interpolation); @@ -4030,7 +4400,7 @@ int InterpolatePlane_16(const uint16_t* src0, int height, int interpolation) { int y; - void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr, + void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; if (!src0 || !src1 || !dst || width <= 0 || height == 0) { @@ -4213,6 +4583,14 @@ int ARGBShuffle(const uint8_t* src_bgra, } } #endif +#if defined(HAS_ARGBSHUFFLEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBShuffleRow = ARGBShuffleRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBShuffleRow = ARGBShuffleRow_LSX; + } + } +#endif #if defined(HAS_ARGBSHUFFLEROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBShuffleRow = ARGBShuffleRow_Any_LASX; @@ -4334,6 +4712,8 @@ int GaussPlane_F32(const float* src, { // 2 pixels on each side, but aligned out to 16 bytes. align_buffer_64(rowbuf, (4 + width + 4) * 4); + if (!rowbuf) + return 1; memset(rowbuf, 0, 16); memset(rowbuf + (4 + width) * 4, 0, 16); float* row = (float*)(rowbuf + 16); @@ -4444,6 +4824,11 @@ static int ARGBSobelize(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif #if defined(HAS_SOBELYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { @@ -4477,16 +4862,18 @@ static int ARGBSobelize(const uint8_t* src_argb, #endif { // 3 rows with edges before/after. - const int kRowSize = (width + kEdge + 31) & ~31; - align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); + const int row_size = (width + kEdge + 31) & ~31; + align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge)); uint8_t* row_sobelx = rows; - uint8_t* row_sobely = rows + kRowSize; - uint8_t* row_y = rows + kRowSize * 2; + uint8_t* row_sobely = rows + row_size; + uint8_t* row_y = rows + row_size * 2; // Convert first row. uint8_t* row_y0 = row_y + kEdge; - uint8_t* row_y1 = row_y0 + kRowSize; - uint8_t* row_y2 = row_y1 + kRowSize; + uint8_t* row_y1 = row_y0 + row_size; + uint8_t* row_y2 = row_y1 + row_size; + if (!rows) + return 1; ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. @@ -4967,6 +5354,11 @@ int ARGBExtractAlpha(const uint8_t* src_argb, : ARGBExtractAlphaRow_Any_LSX; } #endif +#if defined(HAS_ARGBEXTRACTALPHAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV; + } +#endif for (int y = 0; y < height; ++y) { ARGBExtractAlphaRow(src_argb, dst_a, width); @@ -5018,6 +5410,11 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, } } #endif +#if defined(HAS_ARGBCOPYYTOALPHAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBCopyYToAlphaRow(src_y, dst_argb, width); @@ -5027,9 +5424,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, return 0; } -// TODO(fbarchard): Consider if width is even Y channel can be split -// directly. A SplitUVRow_Odd function could copy the remaining chroma. - LIBYUV_API int YUY2ToNV12(const uint8_t* src_yuy2, int src_stride_yuy2, @@ -5040,13 +5434,10 @@ int YUY2ToNV12(const uint8_t* src_yuy2, int width, int height) { int y; - int halfwidth = (width + 1) >> 1; - void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, - int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) = InterpolateRow_C; - + void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = + YUY2ToYRow_C; + void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2, + uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C; if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -5057,109 +5448,91 @@ int YUY2ToNV12(const uint8_t* src_yuy2, src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } -#if defined(HAS_SPLITUVROW_SSE2) +#if defined(HAS_YUY2TOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - SplitUVRow = SplitUVRow_Any_SSE2; + YUY2ToYRow = YUY2ToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { - SplitUVRow = SplitUVRow_SSE2; + YUY2ToYRow = YUY2ToYRow_SSE2; } } #endif -#if defined(HAS_SPLITUVROW_AVX2) +#if defined(HAS_YUY2TOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - SplitUVRow = SplitUVRow_Any_AVX2; + YUY2ToYRow = YUY2ToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - SplitUVRow = SplitUVRow_AVX2; + YUY2ToYRow = YUY2ToYRow_AVX2; } } #endif -#if defined(HAS_SPLITUVROW_NEON) +#if defined(HAS_YUY2TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - SplitUVRow = SplitUVRow_Any_NEON; + YUY2ToYRow = YUY2ToYRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - SplitUVRow = SplitUVRow_NEON; + YUY2ToYRow = YUY2ToYRow_NEON; } } #endif -#if defined(HAS_SPLITUVROW_MSA) +#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - SplitUVRow = SplitUVRow_Any_MSA; + YUY2ToYRow = YUY2ToYRow_Any_MSA; if (IS_ALIGNED(width, 32)) { - SplitUVRow = SplitUVRow_MSA; + YUY2ToYRow = YUY2ToYRow_MSA; } } #endif -#if defined(HAS_SPLITUVROW_LSX) +#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { - SplitUVRow = SplitUVRow_Any_LSX; - if (IS_ALIGNED(width, 32)) { - SplitUVRow = SplitUVRow_LSX; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_Any_SSSE3; + YUY2ToYRow = YUY2ToYRow_Any_LSX; if (IS_ALIGNED(width, 16)) { - InterpolateRow = InterpolateRow_SSSE3; + YUY2ToYRow = YUY2ToYRow_LSX; } } #endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_Any_AVX2; +#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + YUY2ToYRow = YUY2ToYRow_Any_LASX; if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_AVX2; + YUY2ToYRow = YUY2ToYRow_LASX; } } #endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_Any_NEON; + +#if defined(HAS_YUY2TONVUVROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + YUY2ToNVUVRow = YUY2ToNVUVRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { - InterpolateRow = InterpolateRow_NEON; + YUY2ToNVUVRow = YUY2ToNVUVRow_SSE2; } } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; +#if defined(HAS_YUY2TONVUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + YUY2ToNVUVRow = YUY2ToNVUVRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_MSA; + YUY2ToNVUVRow = YUY2ToNVUVRow_AVX2; } } #endif -#if defined(HAS_INTERPOLATEROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - InterpolateRow = InterpolateRow_Any_LSX; - if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_LSX; +#if defined(HAS_YUY2TONVUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + YUY2ToNVUVRow = YUY2ToNVUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + YUY2ToNVUVRow = YUY2ToNVUVRow_NEON; } } #endif - { - int awidth = halfwidth * 2; - // row of y and 2 rows of uv - align_buffer_64(rows, awidth * 3); - - for (y = 0; y < height - 1; y += 2) { - // Split Y from UV. - SplitUVRow(src_yuy2, rows, rows + awidth, awidth); - memcpy(dst_y, rows, width); - SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth); - memcpy(dst_y + dst_stride_y, rows, width); - InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128); - src_yuy2 += src_stride_yuy2 * 2; - dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; - } - if (height & 1) { - // Split Y from UV. - SplitUVRow(src_yuy2, rows, dst_uv, awidth); - memcpy(dst_y, rows, width); - } - free_aligned_buffer_64(rows); + for (y = 0; y < height - 1; y += 2) { + YUY2ToYRow(src_yuy2, dst_y, width); + YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width); + YUY2ToNVUVRow(src_yuy2, src_stride_yuy2, dst_uv, width); + src_yuy2 += src_stride_yuy2 * 2; + dst_y += dst_stride_y * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + YUY2ToYRow(src_yuy2, dst_y, width); + YUY2ToNVUVRow(src_yuy2, 0, dst_uv, width); } return 0; } @@ -5177,7 +5550,7 @@ int UYVYToNV12(const uint8_t* src_uyvy, int halfwidth = (width + 1) >> 1; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; @@ -5231,6 +5604,12 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_SPLITUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitUVRow = SplitUVRow_RVV; + } +#endif + #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -5271,11 +5650,18 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif { int awidth = halfwidth * 2; // row of y and 2 rows of uv align_buffer_64(rows, awidth * 3); + if (!rows) + return 1; for (y = 0; y < height - 1; y += 2) { // Split Y from UV. @@ -5336,6 +5722,7 @@ void HalfMergeUVPlane(const uint8_t* src_u, HalfMergeUVRow = HalfMergeUVRow_AVX2; } #endif + for (y = 0; y < height - 1; y += 2) { // Merge a row of U and V into a row of UV. HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width); diff --git a/files/source/rotate.cc b/source/rotate.cc index f1e83cbd..3f8332c3 100644 --- a/files/source/rotate.cc +++ b/source/rotate.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> + #include "libyuv/rotate.h" #include "libyuv/convert.h" @@ -138,8 +140,11 @@ void RotatePlane180(const uint8_t* src, int dst_stride, int width, int height) { - // Swap first and last row and mirror the content. Uses a temporary row. + // Swap top and bottom row and mirror the content. Uses a temporary row. align_buffer_64(row, width); + assert(row); + if (!row) + return; const uint8_t* src_bot = src + src_stride * (height - 1); uint8_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; @@ -178,6 +183,14 @@ void RotatePlane180(const uint8_t* src, } } #endif +#if defined(HAS_MIRRORROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MirrorRow = MirrorRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + MirrorRow = MirrorRow_LSX; + } + } +#endif #if defined(HAS_MIRRORROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { MirrorRow = MirrorRow_Any_LASX; @@ -206,12 +219,17 @@ void RotatePlane180(const uint8_t* src, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif +#if defined(HAS_COPYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + CopyRow = CopyRow_RVV; + } +#endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { - CopyRow(src, row, width); // Copy first row into buffer - MirrorRow(src_bot, dst, width); // Mirror last row into first row - MirrorRow(row, dst_bot, width); // Mirror buffer into last row + CopyRow(src, row, width); // Copy top row into buffer + MirrorRow(src_bot, dst, width); // Mirror bottom row into top row + MirrorRow(row, dst_bot, width); // Mirror buffer into bottom row src += src_stride; dst += dst_stride; src_bot -= src_stride; @@ -476,6 +494,124 @@ int RotatePlane(const uint8_t* src, return -1; } +static void TransposePlane_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + int i = height; + // Work across the source in 8x8 tiles + while (i >= 8) { + TransposeWx8_16_C(src, src_stride, dst, dst_stride, width); + src += 8 * src_stride; // Go down 8 rows. + dst += 8; // Move over 8 columns. + i -= 8; + } + + if (i > 0) { + TransposeWxH_16_C(src, src_stride, dst, dst_stride, width, i); + } +} + +static void RotatePlane90_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + // Rotate by 90 is a transpose with the source read + // from bottom to top. So set the source pointer to the end + // of the buffer and flip the sign of the source stride. + src += src_stride * (height - 1); + src_stride = -src_stride; + TransposePlane_16(src, src_stride, dst, dst_stride, width, height); +} + +static void RotatePlane270_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + // Rotate by 270 is a transpose with the destination written + // from bottom to top. So set the destination pointer to the end + // of the buffer and flip the sign of the destination stride. + dst += dst_stride * (width - 1); + dst_stride = -dst_stride; + TransposePlane_16(src, src_stride, dst, dst_stride, width, height); +} + +static void RotatePlane180_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + const uint16_t* src_bot = src + src_stride * (height - 1); + uint16_t* dst_bot = dst + dst_stride * (height - 1); + int half_height = (height + 1) >> 1; + int y; + + // Swap top and bottom row and mirror the content. Uses a temporary row. + align_buffer_64(row, width * 2); + uint16_t* row_tmp = (uint16_t*)row; + assert(row); + if (!row) + return; + + // Odd height will harmlessly mirror the middle row twice. + for (y = 0; y < half_height; ++y) { + CopyRow_16_C(src, row_tmp, width); // Copy top row into buffer + MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row + MirrorRow_16_C(row_tmp, dst_bot, width); // Mirror buffer into bottom row + src += src_stride; + dst += dst_stride; + src_bot -= src_stride; + dst_bot -= dst_stride; + } + free_aligned_buffer_64(row); +} + +LIBYUV_API +int RotatePlane_16(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height, + enum RotationMode mode) { + if (!src || width <= 0 || height == 0 || !dst) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + src = src + (height - 1) * src_stride; + src_stride = -src_stride; + } + + switch (mode) { + case kRotate0: + // copy frame + CopyPlane_16(src, src_stride, dst, dst_stride, width, height); + return 0; + case kRotate90: + RotatePlane90_16(src, src_stride, dst, dst_stride, width, height); + return 0; + case kRotate270: + RotatePlane270_16(src, src_stride, dst, dst_stride, width, height); + return 0; + case kRotate180: + RotatePlane180_16(src, src_stride, dst, dst_stride, width, height); + return 0; + default: + break; + } + return -1; +} + LIBYUV_API int I420Rotate(const uint8_t* src_y, int src_stride_y, @@ -544,6 +680,8 @@ int I420Rotate(const uint8_t* src_y, return -1; } +// I422 has half width x full height UV planes, so rotate by 90 and 270 +// require scaling to maintain 422 subsampling. LIBYUV_API int I422Rotate(const uint8_t* src_y, int src_stride_y, @@ -562,6 +700,7 @@ int I422Rotate(const uint8_t* src_y, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + int r; if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || !dst_u || !dst_v) { return -1; @@ -579,31 +718,54 @@ int I422Rotate(const uint8_t* src_y, switch (mode) { case kRotate0: - // copy frame + // Copy frame CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); return 0; + + // Note on temporary Y plane for UV. + // Rotation of UV first fits within the Y destination plane rows. + // Y plane is width x height + // Y plane rotated is height x width + // UV plane is (width / 2) x height + // UV plane rotated is height x (width / 2) + // UV plane rotated+scaled is (height / 2) x width. + // UV plane rotated is a temporary that fits within the Y plane rotated. + case kRotate90: - // We need to rotate and rescale, we use plane Y as temporal storage. - RotatePlane90(src_u, src_stride_u, dst_y, height, halfwidth, height); - ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight, - halfheight, width, kFilterBilinear); - RotatePlane90(src_v, src_stride_v, dst_y, height, halfwidth, height); - ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight, - halfheight, width, kFilterLinear); + RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, + height); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } + RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, + height); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; case kRotate270: - // We need to rotate and rescale, we use plane Y as temporal storage. - RotatePlane270(src_u, src_stride_u, dst_y, height, halfwidth, height); - ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight, - halfheight, width, kFilterBilinear); - RotatePlane270(src_v, src_stride_v, dst_y, height, halfwidth, height); - ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight, - halfheight, width, kFilterLinear); + RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, + height); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } + RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, + height); + r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - return 0; case kRotate180: RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height); @@ -828,6 +990,241 @@ int Android420ToI420Rotate(const uint8_t* src_y, return -1; } +LIBYUV_API +int I010Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v || dst_stride_y < 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + switch (mode) { + case kRotate0: + // copy frame + return I010Copy(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height); + case kRotate90: + RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); + return 0; + case kRotate270: + RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); + return 0; + case kRotate180: + RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); + return 0; + default: + break; + } + return -1; +} + +// I210 has half width x full height UV planes, so rotate by 90 and 270 +// require scaling to maintain 422 subsampling. +LIBYUV_API +int I210Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + int r; + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + switch (mode) { + case kRotate0: + // Copy frame + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); + return 0; + + // Note on temporary Y plane for UV. + // Rotation of UV first fits within the Y destination plane rows. + // Y plane is width x height + // Y plane rotated is height x width + // UV plane is (width / 2) x height + // UV plane rotated is height x (width / 2) + // UV plane rotated+scaled is (height / 2) x width. + // UV plane rotated is a temporary that fits within the Y plane rotated. + + case kRotate90: + RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, + height); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } + RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, + height); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } + RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + return 0; + case kRotate270: + RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth, + height); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, + dst_stride_u, halfheight, width, kFilterBilinear); + if (r != 0) { + return r; + } + RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth, + height); + r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, + dst_stride_v, halfheight, width, kFilterLinear); + if (r != 0) { + return r; + } + RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + return 0; + case kRotate180: + RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + height); + RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + height); + return 0; + default: + break; + } + return -1; +} + +LIBYUV_API +int I410Rotate(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v || dst_stride_y < 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + switch (mode) { + case kRotate0: + // copy frame + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; + case kRotate90: + RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; + case kRotate270: + RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, width, + height); + RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, width, + height); + return 0; + case kRotate180: + RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width, + height); + RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, width, + height); + RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, width, + height); + return 0; + default: + break; + } + return -1; +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/source/rotate_any.cc b/source/rotate_any.cc index 88ca7876..88ca7876 100644 --- a/files/source/rotate_any.cc +++ b/source/rotate_any.cc diff --git a/files/source/rotate_argb.cc b/source/rotate_argb.cc index 539cf98d..d55fac4f 100644 --- a/files/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -8,11 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/rotate.h" +#include "libyuv/rotate_argb.h" #include "libyuv/convert.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" #include "libyuv/row.h" #include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */ @@ -68,6 +69,11 @@ static int ARGBTranspose(const uint8_t* src_argb, } } #endif +#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV; + } +#endif for (i = 0; i < width; ++i) { // column of source to row of dest. ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height); @@ -114,7 +120,6 @@ static int ARGBRotate180(const uint8_t* src_argb, int width, int height) { // Swap first and last row and mirror the content. Uses a temporary row. - align_buffer_64(row, width * 4); const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1); uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1); int half_height = (height + 1) >> 1; @@ -123,6 +128,9 @@ static int ARGBRotate180(const uint8_t* src_argb, ARGBMirrorRow_C; void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = CopyRow_C; + align_buffer_64(row, width * 4); + if (!row) + return 1; #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; @@ -155,6 +163,14 @@ static int ARGBRotate180(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBMIRRORROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBMirrorRow = ARGBMirrorRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBMirrorRow = ARGBMirrorRow_LSX; + } + } +#endif #if defined(HAS_ARGBMIRRORROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBMirrorRow = ARGBMirrorRow_Any_LASX; @@ -183,6 +199,11 @@ static int ARGBRotate180(const uint8_t* src_argb, CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif +#if defined(HAS_COPYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + CopyRow = CopyRow_RVV; + } +#endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { diff --git a/source/rotate_common.cc b/source/rotate_common.cc new file mode 100644 index 00000000..e72608e9 --- /dev/null +++ b/source/rotate_common.cc @@ -0,0 +1,198 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/rotate_row.h" +#include "libyuv/row.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +void TransposeWx8_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + int i; + for (i = 0; i < width; ++i) { + dst[0] = src[0 * src_stride]; + dst[1] = src[1 * src_stride]; + dst[2] = src[2 * src_stride]; + dst[3] = src[3 * src_stride]; + dst[4] = src[4 * src_stride]; + dst[5] = src[5 * src_stride]; + dst[6] = src[6 * src_stride]; + dst[7] = src[7 * src_stride]; + ++src; + dst += dst_stride; + } +} + +void TransposeUVWx8_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width) { + int i; + for (i = 0; i < width; ++i) { + dst_a[0] = src[0 * src_stride + 0]; + dst_b[0] = src[0 * src_stride + 1]; + dst_a[1] = src[1 * src_stride + 0]; + dst_b[1] = src[1 * src_stride + 1]; + dst_a[2] = src[2 * src_stride + 0]; + dst_b[2] = src[2 * src_stride + 1]; + dst_a[3] = src[3 * src_stride + 0]; + dst_b[3] = src[3 * src_stride + 1]; + dst_a[4] = src[4 * src_stride + 0]; + dst_b[4] = src[4 * src_stride + 1]; + dst_a[5] = src[5 * src_stride + 0]; + dst_b[5] = src[5 * src_stride + 1]; + dst_a[6] = src[6 * src_stride + 0]; + dst_b[6] = src[6 * src_stride + 1]; + dst_a[7] = src[7 * src_stride + 0]; + dst_b[7] = src[7 * src_stride + 1]; + src += 2; + dst_a += dst_stride_a; + dst_b += dst_stride_b; + } +} + +void TransposeWxH_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height) { + int i; + for (i = 0; i < width; ++i) { + int j; + for (j = 0; j < height; ++j) { + dst[i * dst_stride + j] = src[j * src_stride + i]; + } + } +} + +void TransposeUVWxH_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { + int i; + for (i = 0; i < width * 2; i += 2) { + int j; + for (j = 0; j < height; ++j) { + dst_a[((i >> 1) * dst_stride_a) + j] = src[i + (j * src_stride)]; + dst_b[((i >> 1) * dst_stride_b) + j] = src[i + (j * src_stride) + 1]; + } + } +} + +void TransposeWx8_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width) { + int i; + for (i = 0; i < width; ++i) { + dst[0] = src[0 * src_stride]; + dst[1] = src[1 * src_stride]; + dst[2] = src[2 * src_stride]; + dst[3] = src[3 * src_stride]; + dst[4] = src[4 * src_stride]; + dst[5] = src[5 * src_stride]; + dst[6] = src[6 * src_stride]; + dst[7] = src[7 * src_stride]; + ++src; + dst += dst_stride; + } +} + +void TransposeWxH_16_C(const uint16_t* src, + int src_stride, + uint16_t* dst, + int dst_stride, + int width, + int height) { + int i; + for (i = 0; i < width; ++i) { + int j; + for (j = 0; j < height; ++j) { + dst[i * dst_stride + j] = src[j * src_stride + i]; + } + } +} + +// Transpose 32 bit values (ARGB) +void Transpose4x4_32_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + const uint8_t* src1 = src + src_stride; + const uint8_t* src2 = src1 + src_stride; + const uint8_t* src3 = src2 + src_stride; + uint8_t* dst1 = dst + dst_stride; + uint8_t* dst2 = dst1 + dst_stride; + uint8_t* dst3 = dst2 + dst_stride; + int i; + for (i = 0; i < width; i += 4) { + uint32_t p00 = ((uint32_t*)(src))[0]; + uint32_t p10 = ((uint32_t*)(src))[1]; + uint32_t p20 = ((uint32_t*)(src))[2]; + uint32_t p30 = ((uint32_t*)(src))[3]; + uint32_t p01 = ((uint32_t*)(src1))[0]; + uint32_t p11 = ((uint32_t*)(src1))[1]; + uint32_t p21 = ((uint32_t*)(src1))[2]; + uint32_t p31 = ((uint32_t*)(src1))[3]; + uint32_t p02 = ((uint32_t*)(src2))[0]; + uint32_t p12 = ((uint32_t*)(src2))[1]; + uint32_t p22 = ((uint32_t*)(src2))[2]; + uint32_t p32 = ((uint32_t*)(src2))[3]; + uint32_t p03 = ((uint32_t*)(src3))[0]; + uint32_t p13 = ((uint32_t*)(src3))[1]; + uint32_t p23 = ((uint32_t*)(src3))[2]; + uint32_t p33 = ((uint32_t*)(src3))[3]; + ((uint32_t*)(dst))[0] = p00; + ((uint32_t*)(dst))[1] = p01; + ((uint32_t*)(dst))[2] = p02; + ((uint32_t*)(dst))[3] = p03; + ((uint32_t*)(dst1))[0] = p10; + ((uint32_t*)(dst1))[1] = p11; + ((uint32_t*)(dst1))[2] = p12; + ((uint32_t*)(dst1))[3] = p13; + ((uint32_t*)(dst2))[0] = p20; + ((uint32_t*)(dst2))[1] = p21; + ((uint32_t*)(dst2))[2] = p22; + ((uint32_t*)(dst2))[3] = p23; + ((uint32_t*)(dst3))[0] = p30; + ((uint32_t*)(dst3))[1] = p31; + ((uint32_t*)(dst3))[2] = p32; + ((uint32_t*)(dst3))[3] = p33; + src += src_stride * 4; // advance 4 rows + src1 += src_stride * 4; + src2 += src_stride * 4; + src3 += src_stride * 4; + dst += 4 * 4; // advance 4 columns + dst1 += 4 * 4; + dst2 += 4 * 4; + dst3 += 4 * 4; + } +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif diff --git a/files/source/rotate_gcc.cc b/source/rotate_gcc.cc index 1a3f8cbb..fd5eee05 100644 --- a/files/source/rotate_gcc.cc +++ b/source/rotate_gcc.cc @@ -365,6 +365,136 @@ void TransposeUVWx8_SSE2(const uint8_t* src, "xmm7", "xmm8", "xmm9"); } #endif // defined(HAS_TRANSPOSEUVWX8_SSE2) + +#if defined(HAS_TRANSPOSE4X4_32_SSE2) +// 4 values, little endian view +// a b c d +// e f g h +// i j k l +// m n o p + +// transpose 2x2 +// a e b f from row 0, 1 +// i m j n from row 2, 3 +// c g d h from row 0, 1 +// k o l p from row 2, 3 + +// transpose 4x4 +// a e i m from row 0, 1 +// b f j n from row 0, 1 +// c g k o from row 2, 3 +// d h l p from row 2, 3 + +// Transpose 32 bit values (ARGB) +void Transpose4x4_32_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + asm volatile( + // Main loop transpose 4x4. Read a column, write a row. + "1: \n" + "movdqu (%0),%%xmm0 \n" // a b c d + "movdqu (%0,%3),%%xmm1 \n" // e f g h + "lea (%0,%3,2),%0 \n" // src += stride * 2 + "movdqu (%0),%%xmm2 \n" // i j k l + "movdqu (%0,%3),%%xmm3 \n" // m n o p + "lea (%0,%3,2),%0 \n" // src += stride * 2 + + // Transpose 2x2 + "movdqa %%xmm0,%%xmm4 \n" + "movdqa %%xmm2,%%xmm5 \n" + "movdqa %%xmm0,%%xmm6 \n" + "movdqa %%xmm2,%%xmm7 \n" + "punpckldq %%xmm1,%%xmm4 \n" // a e b f from row 0, 1 + "punpckldq %%xmm3,%%xmm5 \n" // i m j n from row 2, 3 + "punpckhdq %%xmm1,%%xmm6 \n" // c g d h from row 0, 1 + "punpckhdq %%xmm3,%%xmm7 \n" // k o l p from row 2, 3 + + // Transpose 4x4 + "movdqa %%xmm4,%%xmm0 \n" + "movdqa %%xmm4,%%xmm1 \n" + "movdqa %%xmm6,%%xmm2 \n" + "movdqa %%xmm6,%%xmm3 \n" + "punpcklqdq %%xmm5,%%xmm0 \n" // a e i m from row 0, 1 + "punpckhqdq %%xmm5,%%xmm1 \n" // b f j n from row 0, 1 + "punpcklqdq %%xmm7,%%xmm2 \n" // c g k o from row 2, 3 + "punpckhqdq %%xmm7,%%xmm3 \n" // d h l p from row 2, 3 + + "movdqu %%xmm0,(%1) \n" + "lea 16(%1,%4),%1 \n" // dst += stride + 16 + "movdqu %%xmm1,-16(%1) \n" + "movdqu %%xmm2,-16(%1,%4) \n" + "movdqu %%xmm3,-16(%1,%4,2) \n" + "sub %4,%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+rm"(width) // %2 + : "r"((ptrdiff_t)(src_stride)), // %3 + "r"((ptrdiff_t)(dst_stride)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); +} +#endif // defined(HAS_TRANSPOSE4X4_32_SSE2) + +#if defined(HAS_TRANSPOSE4X4_32_AVX2) + +// Transpose 32 bit values (ARGB) +void Transpose4x4_32_AVX2(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + asm volatile( + // Main loop transpose 2 blocks of 4x4. Read a column, write a row. + "1: \n" + "vmovdqu (%0),%%xmm0 \n" // a b c d + "vmovdqu (%0,%3),%%xmm1 \n" // e f g h + "lea (%0,%3,2),%0 \n" // src += stride * 2 + "vmovdqu (%0),%%xmm2 \n" // i j k l + "vmovdqu (%0,%3),%%xmm3 \n" // m n o p + "lea (%0,%3,2),%0 \n" // src += stride * 2 + + "vinserti128 $1,(%0),%%ymm0,%%ymm0 \n" // a b c d + "vinserti128 $1,(%0,%3),%%ymm1,%%ymm1 \n" // e f g h + "lea (%0,%3,2),%0 \n" // src += stride * 2 + "vinserti128 $1,(%0),%%ymm2,%%ymm2 \n" // i j k l + "vinserti128 $1,(%0,%3),%%ymm3,%%ymm3 \n" // m n o p + "lea (%0,%3,2),%0 \n" // src += stride * 2 + + // Transpose 2x2 + "vpunpckldq %%ymm1,%%ymm0,%%ymm4 \n" // a e b f from row 0, 1 + "vpunpckldq %%ymm3,%%ymm2,%%ymm5 \n" // i m j n from row 2, 3 + "vpunpckhdq %%ymm1,%%ymm0,%%ymm6 \n" // c g d h from row 0, 1 + "vpunpckhdq %%ymm3,%%ymm2,%%ymm7 \n" // k o l p from row 2, 3 + + // Transpose 4x4 + "vpunpcklqdq %%ymm5,%%ymm4,%%ymm0 \n" // a e i m from row 0, 1 + "vpunpckhqdq %%ymm5,%%ymm4,%%ymm1 \n" // b f j n from row 0, 1 + "vpunpcklqdq %%ymm7,%%ymm6,%%ymm2 \n" // c g k o from row 2, 3 + "vpunpckhqdq %%ymm7,%%ymm6,%%ymm3 \n" // d h l p from row 2, 3 + + "vmovdqu %%ymm0,(%1) \n" + "lea 32(%1,%4),%1 \n" // dst += stride + 32 + "vmovdqu %%ymm1,-32(%1) \n" + "vmovdqu %%ymm2,-32(%1,%4) \n" + "vmovdqu %%ymm3,-32(%1,%4,2) \n" + "sub %4,%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+rm"(width) // %2 + : "r"((ptrdiff_t)(src_stride)), // %3 + "r"((ptrdiff_t)(dst_stride)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); +} +#endif // defined(HAS_TRANSPOSE4X4_32_AVX2) + #endif // defined(__x86_64__) || defined(__i386__) #ifdef __cplusplus diff --git a/files/source/rotate_lsx.cc b/source/rotate_lsx.cc index 94a2b91c..94a2b91c 100644 --- a/files/source/rotate_lsx.cc +++ b/source/rotate_lsx.cc diff --git a/files/source/rotate_msa.cc b/source/rotate_msa.cc index 99bdca65..99bdca65 100644 --- a/files/source/rotate_msa.cc +++ b/source/rotate_msa.cc diff --git a/files/source/rotate_neon.cc b/source/rotate_neon.cc index 844df2bf..569a7318 100644 --- a/files/source/rotate_neon.cc +++ b/source/rotate_neon.cc @@ -410,6 +410,46 @@ void TransposeUVWx8_NEON(const uint8_t* src, : "r"(&kVTbl4x4TransposeDi) // %8 : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); } + +// Transpose 32 bit values (ARGB) +void Transpose4x4_32_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + const uint8_t* src1 = src + src_stride; + const uint8_t* src2 = src1 + src_stride; + const uint8_t* src3 = src2 + src_stride; + uint8_t* dst1 = dst + dst_stride; + uint8_t* dst2 = dst1 + dst_stride; + uint8_t* dst3 = dst2 + dst_stride; + asm volatile( + // Main loop transpose 4x4. Read a column, write a row. + "1: \n" + "vld4.32 {d0[0], d2[0], d4[0], d6[0]}, [%0], %9 \n" + "vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [%1], %9 \n" + "vld4.32 {d1[0], d3[0], d5[0], d7[0]}, [%2], %9 \n" + "vld4.32 {d1[1], d3[1], d5[1], d7[1]}, [%3], %9 \n" + "subs %8, %8, #4 \n" // w -= 4 + "vst1.8 {q0}, [%4]! \n" + "vst1.8 {q1}, [%5]! \n" + "vst1.8 {q2}, [%6]! \n" + "vst1.8 {q3}, [%7]! \n" + "bgt 1b \n" + + : "+r"(src), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(dst), // %4 + "+r"(dst1), // %5 + "+r"(dst2), // %6 + "+r"(dst3), // %7 + "+r"(width) // %8 + : "r"((ptrdiff_t)(src_stride * 4)) // %9 + : "memory", "cc", "q0", "q1", "q2", "q3"); +} + #endif // defined(__ARM_NEON__) && !defined(__aarch64__) #ifdef __cplusplus diff --git a/files/source/rotate_neon64.cc b/source/rotate_neon64.cc index 43c15817..95047fa7 100644 --- a/files/source/rotate_neon64.cc +++ b/source/rotate_neon64.cc @@ -201,13 +201,13 @@ void TransposeWx8_NEON(const uint8_t* src, "4: \n" - : "=&r"(src_temp), // %0 - "+r"(src), // %1 - "+r"(dst), // %2 - "+r"(width) // %3 - : "r"(&kVTbl4x4Transpose), // %4 - "r"(static_cast<ptrdiff_t>(src_stride)), // %5 - "r"(static_cast<ptrdiff_t>(dst_stride)) // %6 + : "=&r"(src_temp), // %0 + "+r"(src), // %1 + "+r"(dst), // %2 + "+r"(width) // %3 + : "r"(&kVTbl4x4Transpose), // %4 + "r"((ptrdiff_t)src_stride), // %5 + "r"((ptrdiff_t)dst_stride) // %6 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } @@ -423,18 +423,57 @@ void TransposeUVWx8_NEON(const uint8_t* src, "4: \n" - : "=&r"(src_temp), // %0 - "+r"(src), // %1 - "+r"(dst_a), // %2 - "+r"(dst_b), // %3 - "+r"(width) // %4 - : "r"(static_cast<ptrdiff_t>(src_stride)), // %5 - "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6 - "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7 - "r"(&kVTbl4x4TransposeDi) // %8 + : "=&r"(src_temp), // %0 + "+r"(src), // %1 + "+r"(dst_a), // %2 + "+r"(dst_b), // %3 + "+r"(width) // %4 + : "r"((ptrdiff_t)src_stride), // %5 + "r"((ptrdiff_t)dst_stride_a), // %6 + "r"((ptrdiff_t)dst_stride_b), // %7 + "r"(&kVTbl4x4TransposeDi) // %8 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31"); } + +// Transpose 32 bit values (ARGB) +void Transpose4x4_32_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + const uint8_t* src1 = src + src_stride; + const uint8_t* src2 = src1 + src_stride; + const uint8_t* src3 = src2 + src_stride; + uint8_t* dst1 = dst + dst_stride; + uint8_t* dst2 = dst1 + dst_stride; + uint8_t* dst3 = dst2 + dst_stride; + asm volatile( + // Main loop transpose 4x4. Read a column, write a row. + "1: \n" + "ld4 {v0.s, v1.s, v2.s, v3.s}[0], [%0], %9 \n" + "ld4 {v0.s, v1.s, v2.s, v3.s}[1], [%1], %9 \n" + "ld4 {v0.s, v1.s, v2.s, v3.s}[2], [%2], %9 \n" + "ld4 {v0.s, v1.s, v2.s, v3.s}[3], [%3], %9 \n" + "subs %w8, %w8, #4 \n" // w -= 4 + "st1 {v0.4s}, [%4], 16 \n" + "st1 {v1.4s}, [%5], 16 \n" + "st1 {v2.4s}, [%6], 16 \n" + "st1 {v3.4s}, [%7], 16 \n" + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(dst), // %4 + "+r"(dst1), // %5 + "+r"(dst2), // %6 + "+r"(dst3), // %7 + "+r"(width) // %8 + : "r"((ptrdiff_t)(src_stride * 4)) // %9 + : "memory", "cc", "v0", "v1", "v2", "v3"); +} + #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus diff --git a/files/source/rotate_win.cc b/source/rotate_win.cc index a78873f8..a78873f8 100644 --- a/files/source/rotate_win.cc +++ b/source/rotate_win.cc diff --git a/files/source/row_any.cc b/source/row_any.cc index 3781a9f2..e574543c 100644 --- a/files/source/row_any.cc +++ b/source/row_any.cc @@ -19,7 +19,7 @@ namespace libyuv { extern "C" { #endif -// memset for temp is meant to clear the source buffer (not dest) so that +// memset for vin is meant to clear the source buffer so that // SIMD that reads full multiple of 16 bytes will not trigger msan errors. // memset is not needed for production, as the garbage values are processed but // not used, although there may be edge cases for subsampling. @@ -35,20 +35,20 @@ extern "C" { void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ int width) { \ - SIMD_ALIGNED(uint8_t temp[64 * 5]); \ - memset(temp, 0, 64 * 4); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[64 * 4]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n); \ } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 192, a_buf + n, r); \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ - SS(r, DUVSHIFT) * BPP); \ + memcpy(vin, y_buf + n, r); \ + memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(vin + 192, a_buf + n, r); \ + ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_MERGEARGBROW_SSE2 @@ -68,25 +68,25 @@ ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15) void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8_t temp[64 * 5]); \ - memset(temp, 0, 64 * 4); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[64 * 4]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 192, a_buf + n, r); \ + memcpy(vin, y_buf + n, r); \ + memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(vin + 192, a_buf + n, r); \ if (width & 1) { \ - temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \ - temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ + vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1]; \ + vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \ } \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ - yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ - SS(r, DUVSHIFT) * BPP); \ + ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants, \ + MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_I444ALPHATOARGBROW_SSSE3 @@ -113,6 +113,9 @@ ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7) #ifdef HAS_I422ALPHATOARGBROW_MSA ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7) #endif +#ifdef HAS_I422ALPHATOARGBROW_LSX +ANY41C(I422AlphaToARGBRow_Any_LSX, I422AlphaToARGBRow_LSX, 1, 0, 4, 15) +#endif #ifdef HAS_I422ALPHATOARGBROW_LASX ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15) #endif @@ -123,21 +126,20 @@ ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15) void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \ uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ int width) { \ - SIMD_ALIGNED(T temp[16 * 4]); \ - SIMD_ALIGNED(uint8_t out[64]); \ - memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \ + SIMD_ALIGNED(T vin[16 * 4]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ } \ - memcpy(temp, y_buf + n, r * SBPP); \ - memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 48, a_buf + n, r * SBPP); \ - ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, yuvconstants, \ - MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ + memcpy(vin, y_buf + n, r * SBPP); \ + memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + memcpy(vin + 48, a_buf + n, r * SBPP); \ + ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_I210ALPHATOARGBROW_SSSE3 @@ -190,20 +192,20 @@ ANY41CT(I410AlphaToARGBRow_Any_AVX2, #define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \ void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \ const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \ - SIMD_ALIGNED(STYPE temp[16 * 4]); \ - SIMD_ALIGNED(DTYPE out[64]); \ - memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \ + SIMD_ALIGNED(STYPE vin[16 * 4]); \ + SIMD_ALIGNED(DTYPE vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n); \ } \ - memcpy(temp, r_buf + n, r * SBPP); \ - memcpy(temp + 16, g_buf + n, r * SBPP); \ - memcpy(temp + 32, b_buf + n, r * SBPP); \ - memcpy(temp + 48, a_buf + n, r * SBPP); \ - ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, depth, MASK + 1); \ - memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \ + memcpy(vin, r_buf + n, r * SBPP); \ + memcpy(vin + 16, g_buf + n, r * SBPP); \ + memcpy(vin + 32, b_buf + n, r * SBPP); \ + memcpy(vin + 48, a_buf + n, r * SBPP); \ + ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, depth, MASK + 1); \ + memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \ } #ifdef HAS_MERGEAR64ROW_AVX2 @@ -237,22 +239,22 @@ ANY41PT(MergeARGB16To8Row_Any_NEON, #undef ANY41PT // Any 3 planes to 1. -#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ - const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8_t temp[64 * 4]); \ - memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ - SS(r, DUVSHIFT) * BPP); \ +#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ + const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t vin[64 * 3]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ + } \ + memcpy(vin, y_buf + n, r); \ + memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + ANY_SIMD(vin, vin + 64, vin + 128, vout, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ } // Merge functions. @@ -285,6 +287,9 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) #ifdef HAS_I422TOYUY2ROW_MSA ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) #endif +#ifdef HAS_I422TOYUY2ROW_LSX +ANY31(I422ToYUY2Row_Any_LSX, I422ToYUY2Row_LSX, 1, 1, 4, 15) +#endif #ifdef HAS_I422TOYUY2ROW_LASX ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31) #endif @@ -294,6 +299,9 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) #ifdef HAS_I422TOUYVYROW_MSA ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) #endif +#ifdef HAS_I422TOUYVYROW_LSX +ANY31(I422ToUYVYRow_Any_LSX, I422ToUYVYRow_LSX, 1, 1, 4, 15) +#endif #ifdef HAS_I422TOUYVYROW_LASX ANY31(I422ToUYVYRow_Any_LASX, I422ToUYVYRow_LASX, 1, 1, 4, 31) #endif @@ -308,28 +316,27 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) // Note that odd width replication includes 444 due to implementation // on arm that subsamples 444 to 422 internally. // Any 3 planes to 1 with yuvconstants -#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ - const uint8_t* v_buf, uint8_t* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8_t temp[128 * 4]); \ - memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - if (width & 1) { \ - temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ - temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \ - } \ - ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \ - MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \ - SS(r, DUVSHIFT) * BPP); \ +#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ + const uint8_t* v_buf, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t vin[128 * 3]); \ + SIMD_ALIGNED(uint8_t vout[128]); \ + memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(vin, y_buf + n, r); \ + memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + if (width & 1) { \ + vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \ + vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1]; \ + } \ + ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_I422TOARGBROW_SSSE3 @@ -359,6 +366,9 @@ ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15) #ifdef HAS_I444TOARGBROW_SSSE3 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) #endif +#ifdef HAS_I444TORGB24ROW_SSSE3 +ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15) +#endif #ifdef HAS_I422TORGB24ROW_AVX2 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) #endif @@ -374,6 +384,9 @@ ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) #ifdef HAS_I444TOARGBROW_AVX2 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) #endif +#ifdef HAS_I444TORGB24ROW_AVX2 +ANY31C(I444ToRGB24Row_Any_AVX2, I444ToRGB24Row_AVX2, 0, 0, 3, 31) +#endif #ifdef HAS_I422TOARGB4444ROW_AVX2 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15) #endif @@ -383,6 +396,9 @@ ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15) #ifdef HAS_I422TORGB565ROW_AVX2 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15) #endif +#ifdef HAS_I444TORGB24ROW_NEON +ANY31C(I444ToRGB24Row_Any_NEON, I444ToRGB24Row_NEON, 0, 0, 3, 7) +#endif #ifdef HAS_I422TOARGBROW_NEON ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) @@ -401,6 +417,14 @@ ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) #endif +#ifdef HAS_I422TOARGBROW_LSX +ANY31C(I422ToARGBRow_Any_LSX, I422ToARGBRow_LSX, 1, 0, 4, 15) +ANY31C(I422ToRGBARow_Any_LSX, I422ToRGBARow_LSX, 1, 0, 4, 15) +ANY31C(I422ToRGB24Row_Any_LSX, I422ToRGB24Row_LSX, 1, 0, 3, 15) +ANY31C(I422ToRGB565Row_Any_LSX, I422ToRGB565Row_LSX, 1, 0, 2, 15) +ANY31C(I422ToARGB4444Row_Any_LSX, I422ToARGB4444Row_LSX, 1, 0, 2, 15) +ANY31C(I422ToARGB1555Row_Any_LSX, I422ToARGB1555Row_LSX, 1, 0, 2, 15) +#endif #ifdef HAS_I422TOARGBROW_LASX ANY31C(I422ToARGBRow_Any_LASX, I422ToARGBRow_LASX, 1, 0, 4, 31) ANY31C(I422ToRGBARow_Any_LASX, I422ToRGBARow_LASX, 1, 0, 4, 31) @@ -420,19 +444,19 @@ ANY31C(I444ToARGBRow_Any_LSX, I444ToARGBRow_LSX, 0, 0, 4, 15) void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \ uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ int width) { \ - SIMD_ALIGNED(T temp[16 * 3]); \ - SIMD_ALIGNED(uint8_t out[64]); \ - memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ + SIMD_ALIGNED(T vin[16 * 3]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ } \ - memcpy(temp, y_buf + n, r * SBPP); \ - memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ - ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ + memcpy(vin, y_buf + n, r * SBPP); \ + memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_I210TOAR30ROW_SSSE3 @@ -477,19 +501,19 @@ ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) #define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \ void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \ DTYPE* dst_ptr, int depth, int width) { \ - SIMD_ALIGNED(STYPE temp[16 * 3]); \ - SIMD_ALIGNED(DTYPE out[64]); \ - memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ + SIMD_ALIGNED(STYPE vin[16 * 3]); \ + SIMD_ALIGNED(DTYPE vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n); \ } \ - memcpy(temp, r_buf + n, r * SBPP); \ - memcpy(temp + 16, g_buf + n, r * SBPP); \ - memcpy(temp + 32, b_buf + n, r * SBPP); \ - ANY_SIMD(temp, temp + 16, temp + 32, out, depth, MASK + 1); \ - memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \ + memcpy(vin, r_buf + n, r * SBPP); \ + memcpy(vin + 16, g_buf + n, r * SBPP); \ + memcpy(vin + 32, b_buf + n, r * SBPP); \ + ANY_SIMD(vin, vin + 16, vin + 32, vout, depth, MASK + 1); \ + memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \ } #ifdef HAS_MERGEXR30ROW_AVX2 @@ -541,18 +565,19 @@ ANY31PT(MergeXRGB16To8Row_Any_NEON, #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ int width) { \ - SIMD_ALIGNED(uint8_t temp[128 * 3]); \ - memset(temp, 0, 128 * 2); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[128 * 2]); \ + SIMD_ALIGNED(uint8_t vout[128]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ } \ - memcpy(temp, y_buf + n * SBPP, r * SBPP); \ - memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ + memcpy(vin, y_buf + n * SBPP, r * SBPP); \ + memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ SS(r, UVSHIFT) * SBPP2); \ - ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \ + ANY_SIMD(vin, vin + 128, vout, MASK + 1); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP); \ } // Merge functions. @@ -560,7 +585,10 @@ ANY31PT(MergeXRGB16To8Row_Any_NEON, ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) #endif #ifdef HAS_MERGEUVROW_AVX2 -ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31) +ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 15) +#endif +#ifdef HAS_MERGEUVROW_AVX512BW +ANY21(MergeUVRow_Any_AVX512BW, MergeUVRow_AVX512BW, 0, 1, 1, 2, 31) #endif #ifdef HAS_MERGEUVROW_NEON ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) @@ -611,18 +639,27 @@ ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) #ifdef HAS_ARGBMULTIPLYROW_MSA ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) #endif +#ifdef HAS_ARGBMULTIPLYROW_LSX +ANY21(ARGBMultiplyRow_Any_LSX, ARGBMultiplyRow_LSX, 0, 4, 4, 4, 3) +#endif #ifdef HAS_ARGBMULTIPLYROW_LASX ANY21(ARGBMultiplyRow_Any_LASX, ARGBMultiplyRow_LASX, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBADDROW_MSA ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) #endif +#ifdef HAS_ARGBADDROW_LSX +ANY21(ARGBAddRow_Any_LSX, ARGBAddRow_LSX, 0, 4, 4, 4, 3) +#endif #ifdef HAS_ARGBADDROW_LASX ANY21(ARGBAddRow_Any_LASX, ARGBAddRow_LASX, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBSUBTRACTROW_MSA ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) #endif +#ifdef HAS_ARGBSUBTRACTROW_LSX +ANY21(ARGBSubtractRow_Any_LSX, ARGBSubtractRow_LSX, 0, 4, 4, 4, 3) +#endif #ifdef HAS_ARGBSUBTRACTROW_LASX ANY21(ARGBSubtractRow_Any_LASX, ARGBSubtractRow_LASX, 0, 4, 4, 4, 7) #endif @@ -664,22 +701,53 @@ ANY21(SobelXYRow_Any_LSX, SobelXYRow_LSX, 0, 1, 1, 4, 15) #endif #undef ANY21 +// Any 2 planes to 1 with stride +// width is measured in source pixels. 4 bytes contains 2 pixels +#define ANY21S(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ + void NAMEANY(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_uv, \ + int width) { \ + SIMD_ALIGNED(uint8_t vin[32 * 2]); \ + SIMD_ALIGNED(uint8_t vout[32]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int awidth = (width + 1) / 2; \ + int r = awidth & MASK; \ + int n = awidth & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_yuy2, stride_yuy2, dst_uv, n * 2); \ + } \ + memcpy(vin, src_yuy2 + n * SBPP, r * SBPP); \ + memcpy(vin + 32, src_yuy2 + stride_yuy2 + n * SBPP, r * SBPP); \ + ANY_SIMD(vin, 32, vout, MASK + 1); \ + memcpy(dst_uv + n * BPP, vout, r * BPP); \ + } + +#ifdef HAS_YUY2TONVUVROW_NEON +ANY21S(YUY2ToNVUVRow_Any_NEON, YUY2ToNVUVRow_NEON, 4, 2, 7) +#endif +#ifdef HAS_YUY2TONVUVROW_SSE2 +ANY21S(YUY2ToNVUVRow_Any_SSE2, YUY2ToNVUVRow_SSE2, 4, 2, 7) +#endif +#ifdef HAS_YUY2TONVUVROW_AVX2 +ANY21S(YUY2ToNVUVRow_Any_AVX2, YUY2ToNVUVRow_AVX2, 4, 2, 15) +#endif + // Any 2 planes to 1 with yuvconstants #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8_t temp[128 * 3]); \ - memset(temp, 0, 128 * 2); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[128 * 2]); \ + SIMD_ALIGNED(uint8_t vout[128]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ } \ - memcpy(temp, y_buf + n * SBPP, r * SBPP); \ - memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ + memcpy(vin, y_buf + n * SBPP, r * SBPP); \ + memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ SS(r, UVSHIFT) * SBPP2); \ - ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \ + ANY_SIMD(vin, vin + 128, vout, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP); \ } // Biplanar to RGB. @@ -758,21 +826,21 @@ ANY21C(NV12ToRGB565Row_Any_LASX, NV12ToRGB565Row_LASX, 1, 1, 2, 2, 15) #undef ANY21C // Any 2 planes of 16 bit to 1 with yuvconstants -#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ - void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(T temp[16 * 3]); \ - SIMD_ALIGNED(uint8_t out[64]); \ - memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n, r * SBPP); \ - memcpy(temp + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \ - ANY_SIMD(temp, temp + 16, out, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ +#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ + void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(T vin[16 * 2]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(vin, y_buf + n, r * SBPP); \ + memcpy(vin + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \ + ANY_SIMD(vin, vin + 16, vout, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_P210TOAR30ROW_SSSE3 @@ -806,21 +874,22 @@ ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15) #define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \ void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \ int width) { \ - SIMD_ALIGNED(T temp[16 * 4]); \ - memset(temp, 0, 16 * 4 * BPP); /* for msan */ \ + SIMD_ALIGNED(T vin[16 * 2]); \ + SIMD_ALIGNED(T vout[16]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_u, src_v, dst_uv, depth, n); \ } \ - memcpy(temp, src_u + n, r * BPP); \ - memcpy(temp + 16, src_v + n, r * BPP); \ - ANY_SIMD(temp, temp + 16, temp + 32, depth, MASK + 1); \ - memcpy(dst_uv + n * 2, temp + 32, r * BPP * 2); \ + memcpy(vin, src_u + n, r * BPP); \ + memcpy(vin + 16, src_v + n, r * BPP); \ + ANY_SIMD(vin, vin + 16, vout, depth, MASK + 1); \ + memcpy(dst_uv + n * 2, vout, r * BPP * 2); \ } #ifdef HAS_MERGEUVROW_16_AVX2 -ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 15) +ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 7) #endif #ifdef HAS_MERGEUVROW_16_NEON ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7) @@ -829,18 +898,19 @@ ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7) #undef ANY21CT // Any 1 to 1. -#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8_t temp[128 * 2]); \ - memset(temp, 0, 128); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - ANY_SIMD(temp, temp + 128, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ +#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t vin[128]); \ + SIMD_ALIGNED(uint8_t vout[128]); \ + memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, n); \ + } \ + memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(vin, vout, MASK + 1); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP); \ } #ifdef HAS_COPYROW_AVX @@ -931,6 +1001,13 @@ ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7) ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) #endif +#if defined(HAS_ARGBTORGB24ROW_LSX) +ANY11(ARGBToRGB24Row_Any_LSX, ARGBToRGB24Row_LSX, 0, 4, 3, 15) +ANY11(ARGBToRAWRow_Any_LSX, ARGBToRAWRow_LSX, 0, 4, 3, 15) +ANY11(ARGBToRGB565Row_Any_LSX, ARGBToRGB565Row_LSX, 0, 4, 2, 7) +ANY11(ARGBToARGB1555Row_Any_LSX, ARGBToARGB1555Row_LSX, 0, 4, 2, 7) +ANY11(ARGBToARGB4444Row_Any_LSX, ARGBToARGB4444Row_LSX, 0, 4, 2, 7) +#endif #if defined(HAS_ARGBTORGB24ROW_LASX) ANY11(ARGBToRGB24Row_Any_LASX, ARGBToRGB24Row_LASX, 0, 4, 3, 31) ANY11(ARGBToRAWRow_Any_LASX, ARGBToRAWRow_LASX, 0, 4, 3, 31) @@ -959,6 +1036,9 @@ ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31) #ifdef HAS_ARGBTOYJROW_AVX2 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31) #endif +#ifdef HAS_ABGRTOYJROW_AVX2 +ANY11(ABGRToYJRow_Any_AVX2, ABGRToYJRow_AVX2, 0, 4, 1, 31) +#endif #ifdef HAS_RGBATOYJROW_AVX2 ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31) #endif @@ -983,6 +1063,9 @@ ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15) #ifdef HAS_ARGBTOYJROW_SSSE3 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15) #endif +#ifdef HAS_ABGRTOYJROW_SSSE3 +ANY11(ABGRToYJRow_Any_SSSE3, ABGRToYJRow_SSSE3, 0, 4, 1, 15) +#endif #ifdef HAS_RGBATOYJROW_SSSE3 ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15) #endif @@ -992,12 +1075,18 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15) #ifdef HAS_ARGBTOYROW_MSA ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) #endif +#ifdef HAS_ARGBTOYROW_LSX +ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15) +#endif #ifdef HAS_ARGBTOYROW_LASX ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31) #endif #ifdef HAS_ARGBTOYJROW_NEON ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 15) #endif +#ifdef HAS_ABGRTOYJROW_NEON +ANY11(ABGRToYJRow_Any_NEON, ABGRToYJRow_NEON, 0, 4, 1, 15) +#endif #ifdef HAS_RGBATOYJROW_NEON ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 15) #endif @@ -1007,9 +1096,21 @@ ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) #ifdef HAS_ARGBTOYJROW_LSX ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15) #endif +#ifdef HAS_RGBATOYJROW_LSX +ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15) +#endif +#ifdef HAS_ABGRTOYJROW_LSX +ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15) +#endif +#ifdef HAS_RGBATOYJROW_LASX +ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31) +#endif #ifdef HAS_ARGBTOYJROW_LASX ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31) #endif +#ifdef HAS_ABGRTOYJROW_LASX +ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31) +#endif #ifdef HAS_BGRATOYROW_NEON ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15) #endif @@ -1019,6 +1120,9 @@ ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) #ifdef HAS_BGRATOYROW_LSX ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15) #endif +#ifdef HAS_BGRATOYROW_LASX +ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31) +#endif #ifdef HAS_ABGRTOYROW_NEON ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15) #endif @@ -1028,6 +1132,9 @@ ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) #ifdef HAS_ABGRTOYROW_LSX ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15) #endif +#ifdef HAS_ABGRTOYROW_LASX +ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31) +#endif #ifdef HAS_RGBATOYROW_NEON ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15) #endif @@ -1037,6 +1144,9 @@ ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) #ifdef HAS_RGBATOYROW_LSX ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15) #endif +#ifdef HAS_RGBATOYROW_LASX +ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31) +#endif #ifdef HAS_RGB24TOYROW_NEON ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15) #endif @@ -1055,6 +1165,12 @@ ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) #ifdef HAS_RGB24TOYROW_LSX ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15) #endif +#ifdef HAS_RGB24TOYJROW_LSX +ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15) +#endif +#ifdef HAS_RGB24TOYJROW_LASX +ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31) +#endif #ifdef HAS_RGB24TOYROW_LASX ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31) #endif @@ -1079,6 +1195,12 @@ ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15) #ifdef HAS_RAWTOYROW_LASX ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31) #endif +#ifdef HAS_RAWTOYJROW_LSX +ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15) +#endif +#ifdef HAS_RAWTOYJROW_LASX +ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31) +#endif #ifdef HAS_RGB565TOYROW_NEON ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) #endif @@ -1115,12 +1237,18 @@ ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15) #ifdef HAS_YUY2TOYROW_MSA ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) #endif +#ifdef HAS_YUY2TOYROW_LSX +ANY11(YUY2ToYRow_Any_LSX, YUY2ToYRow_LSX, 1, 4, 1, 15) +#endif #ifdef HAS_YUY2TOYROW_LASX ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31) #endif #ifdef HAS_UYVYTOYROW_MSA ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) #endif +#ifdef HAS_UYVYTOYROW_LSX +ANY11(UYVYToYRow_Any_LSX, UYVYToYRow_LSX, 1, 4, 1, 15) +#endif #ifdef HAS_UYVYTOYROW_LASX ANY11(UYVYToYRow_Any_LASX, UYVYToYRow_LASX, 1, 4, 1, 31) #endif @@ -1217,6 +1345,9 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) #ifdef HAS_ARGBATTENUATEROW_MSA ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) #endif +#ifdef HAS_ARGBATTENUATEROW_LSX +ANY11(ARGBAttenuateRow_Any_LSX, ARGBAttenuateRow_LSX, 0, 4, 4, 7) +#endif #ifdef HAS_ARGBATTENUATEROW_LASX ANY11(ARGBAttenuateRow_Any_LASX, ARGBAttenuateRow_LASX, 0, 4, 4, 15) #endif @@ -1238,19 +1369,21 @@ ANY11(ARGBExtractAlphaRow_Any_LSX, ARGBExtractAlphaRow_LSX, 0, 4, 1, 15) #undef ANY11 // Any 1 to 1 blended. Destination is read, modify, write. -#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8_t temp[64 * 2]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \ - ANY_SIMD(temp, temp + 64, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ +#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t vin[64]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + memset(vout, 0, sizeof(vout)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, n); \ + } \ + memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ + memcpy(vout, dst_ptr + n * BPP, r * BPP); \ + ANY_SIMD(vin, vout, MASK + 1); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP); \ } #ifdef HAS_ARGBCOPYALPHAROW_AVX2 @@ -1270,16 +1403,17 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) // Any 1 to 1 with parameter. #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ - SIMD_ALIGNED(uint8_t temp[64 * 2]); \ - memset(temp, 0, 64); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[64]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, param, n); \ } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ - ANY_SIMD(temp, temp + 64, param, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ + memcpy(vin, src_ptr + n * SBPP, r * SBPP); \ + ANY_SIMD(vin, vout, param, MASK + 1); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP); \ } #if defined(HAS_I400TOARGBROW_SSE2) @@ -1355,6 +1489,14 @@ ANY11P(ARGBToRGB565DitherRow_Any_MSA, 2, 7) #endif +#if defined(HAS_ARGBTORGB565DITHERROW_LSX) +ANY11P(ARGBToRGB565DitherRow_Any_LSX, + ARGBToRGB565DitherRow_LSX, + const uint32_t, + 4, + 2, + 7) +#endif #if defined(HAS_ARGBTORGB565DITHERROW_LASX) ANY11P(ARGBToRGB565DitherRow_Any_LASX, ARGBToRGB565DitherRow_LASX, @@ -1375,6 +1517,9 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) #ifdef HAS_ARGBSHUFFLEROW_MSA ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) #endif +#ifdef HAS_ARGBSHUFFLEROW_LSX +ANY11P(ARGBShuffleRow_Any_LSX, ARGBShuffleRow_LSX, const uint8_t*, 4, 4, 7) +#endif #ifdef HAS_ARGBSHUFFLEROW_LASX ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15) #endif @@ -1384,17 +1529,17 @@ ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15) // Any 1 to 1 with type #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8_t temp[(MASK + 1) * SBPP]); \ - SIMD_ALIGNED(uint8_t out[(MASK + 1) * BPP]); \ - memset(temp, 0, (MASK + 1) * SBPP); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[(MASK + 1) * SBPP]); \ + SIMD_ALIGNED(uint8_t vout[(MASK + 1) * BPP]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, n); \ } \ - memcpy(temp, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \ - ANY_SIMD((STYPE*)temp, (DTYPE*)out, MASK + 1); \ - memcpy((uint8_t*)(dst_ptr) + n * BPP, out, r * BPP); \ + memcpy(vin, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \ + ANY_SIMD((STYPE*)vin, (DTYPE*)vout, MASK + 1); \ + memcpy((uint8_t*)(dst_ptr) + n * BPP, vout, r * BPP); \ } #ifdef HAS_ARGBTOAR64ROW_SSSE3 @@ -1450,17 +1595,17 @@ ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7) // Any 1 to 1 with parameter and shorts. BPP measures in shorts. #define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \ - SIMD_ALIGNED(STYPE temp[32]); \ - SIMD_ALIGNED(DTYPE out[32]); \ - memset(temp, 0, 32 * SBPP); /* for msan */ \ + SIMD_ALIGNED(STYPE vin[32]); \ + SIMD_ALIGNED(DTYPE vout[32]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, scale, n); \ } \ - memcpy(temp, src_ptr + n, r * SBPP); \ - ANY_SIMD(temp, out, scale, MASK + 1); \ - memcpy(dst_ptr + n, out, r * BPP); \ + memcpy(vin, src_ptr + n, r * SBPP); \ + ANY_SIMD(vin, vout, scale, MASK + 1); \ + memcpy(dst_ptr + n, vout, r * BPP); \ } #ifdef HAS_CONVERT16TO8ROW_SSSE3 @@ -1537,17 +1682,17 @@ ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15) // Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. #define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \ void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \ - SIMD_ALIGNED(ST temp[32]); \ - SIMD_ALIGNED(T out[32]); \ - memset(temp, 0, SBPP * 32); /* for msan */ \ + SIMD_ALIGNED(ST vin[32]); \ + SIMD_ALIGNED(T vout[32]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, param, n); \ } \ - memcpy(temp, src_ptr + n, r * SBPP); \ - ANY_SIMD(temp, out, param, MASK + 1); \ - memcpy(dst_ptr + n, out, r * BPP); \ + memcpy(vin, src_ptr + n, r * SBPP); \ + ANY_SIMD(vin, vout, param, MASK + 1); \ + memcpy(dst_ptr + n, vout, r * BPP); \ } #ifdef HAS_HALFFLOATROW_SSE2 @@ -1588,20 +1733,22 @@ ANY11P16(HalfFloatRow_Any_LSX, HalfFloatRow_LSX, uint16_t, uint16_t, 2, 2, 31) #undef ANY11P16 // Any 1 to 1 with yuvconstants -#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8_t temp[128 * 2]); \ - memset(temp, 0, 128); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ +#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t vin[128]); \ + SIMD_ALIGNED(uint8_t vout[128]); \ + memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ + } \ + memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(vin, vout, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP); \ } + #if defined(HAS_YUY2TOARGBROW_SSSE3) ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) @@ -1628,21 +1775,21 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7) #define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \ void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \ int width, int source_y_fraction) { \ - SIMD_ALIGNED(TS temps[64 * 2]); \ - SIMD_ALIGNED(TD tempd[64]); \ - memset(temps, 0, sizeof(temps)); /* for msan */ \ + SIMD_ALIGNED(TS vin[64 * 2]); \ + SIMD_ALIGNED(TD vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \ } \ - memcpy(temps, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \ + memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \ if (source_y_fraction) { \ - memcpy(temps + 64, src_ptr + src_stride + n * SBPP, \ + memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \ r * SBPP * sizeof(TS)); \ } \ - ANY_SIMD(tempd, temps, 64, MASK + 1, source_y_fraction); \ - memcpy(dst_ptr + n * BPP, tempd, r * BPP * sizeof(TD)); \ + ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \ } #ifdef HAS_INTERPOLATEROW_AVX2 @@ -1682,21 +1829,21 @@ ANY11I(InterpolateRow_16_Any_NEON, #define ANY11IS(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \ void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \ int scale, int width, int source_y_fraction) { \ - SIMD_ALIGNED(TS temps[64 * 2]); \ - SIMD_ALIGNED(TD tempd[64]); \ - memset(temps, 0, sizeof(temps)); /* for msan */ \ + SIMD_ALIGNED(TS vin[64 * 2]); \ + SIMD_ALIGNED(TD vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \ } \ - memcpy(temps, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \ + memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \ if (source_y_fraction) { \ - memcpy(temps + 64, src_ptr + src_stride + n * SBPP, \ + memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \ r * SBPP * sizeof(TS)); \ } \ - ANY_SIMD(tempd, temps, 64, scale, MASK + 1, source_y_fraction); \ - memcpy(dst_ptr + n * BPP, tempd, r * BPP * sizeof(TD)); \ + ANY_SIMD(vout, vin, 64, scale, MASK + 1, source_y_fraction); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \ } #ifdef HAS_INTERPOLATEROW_16TO8_NEON @@ -1721,18 +1868,19 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2, #undef ANY11IS // Any 1 to 1 mirror. -#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8_t temp[64 * 2]); \ - memset(temp, 0, 64); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ - } \ - memcpy(temp, src_ptr, r* BPP); \ - ANY_SIMD(temp, temp + 64, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \ +#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t vin[64]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ + } \ + memcpy(vin, src_ptr, r* BPP); \ + ANY_SIMD(vin, vout, MASK + 1); \ + memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \ } #ifdef HAS_MIRRORROW_AVX2 @@ -1747,6 +1895,9 @@ ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31) #ifdef HAS_MIRRORROW_MSA ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) #endif +#ifdef HAS_MIRRORROW_LSX +ANY11M(MirrorRow_Any_LSX, MirrorRow_LSX, 1, 31) +#endif #ifdef HAS_MIRRORROW_LASX ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63) #endif @@ -1762,6 +1913,9 @@ ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31) #ifdef HAS_MIRRORUVROW_MSA ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7) #endif +#ifdef HAS_MIRRORUVROW_LSX +ANY11M(MirrorUVRow_Any_LSX, MirrorUVRow_LSX, 2, 7) +#endif #ifdef HAS_MIRRORUVROW_LASX ANY11M(MirrorUVRow_Any_LASX, MirrorUVRow_LASX, 2, 15) #endif @@ -1777,6 +1931,9 @@ ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7) #ifdef HAS_ARGBMIRRORROW_MSA ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) #endif +#ifdef HAS_ARGBMIRRORROW_LSX +ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7) +#endif #ifdef HAS_ARGBMIRRORROW_LASX ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15) #endif @@ -1791,15 +1948,14 @@ ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15) // Any 1 plane. (memset) #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \ - SIMD_ALIGNED(uint8_t temp[64]); \ - memset(temp, 0, 64); /* for msan */ \ + SIMD_ALIGNED(uint8_t vout[64]); \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(dst_ptr, v32, n); \ } \ - ANY_SIMD(temp, v32, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp, r * BPP); \ + ANY_SIMD(vout, v32, MASK + 1); \ + memcpy(dst_ptr + n * BPP, vout, r * BPP); \ } #ifdef HAS_SETROW_X86 @@ -1823,20 +1979,21 @@ ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3) #undef ANY1 // Any 1 to 2. Outputs UV planes. -#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ - int width) { \ - SIMD_ALIGNED(uint8_t temp[128 * 3]); \ - memset(temp, 0, 128); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_u, dst_v, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ - memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ - memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ +#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ + int width) { \ + SIMD_ALIGNED(uint8_t vin[128]); \ + SIMD_ALIGNED(uint8_t vout[128 * 2]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_u, dst_v, n); \ + } \ + memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ + ANY_SIMD(vin, vout, vout + 128, MASK + 1); \ + memcpy(dst_u + (n >> DUVSHIFT), vout, SS(r, DUVSHIFT)); \ + memcpy(dst_v + (n >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT)); \ } #ifdef HAS_SPLITUVROW_SSE2 @@ -1875,6 +2032,11 @@ ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15) ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) #endif +#ifdef HAS_YUY2TOUV422ROW_LSX +ANY12(ARGBToUV444Row_Any_LSX, ARGBToUV444Row_LSX, 0, 4, 0, 15) +ANY12(YUY2ToUV422Row_Any_LSX, YUY2ToUV422Row_LSX, 1, 4, 1, 15) +ANY12(UYVYToUV422Row_Any_LSX, UYVYToUV422Row_LSX, 1, 4, 1, 15) +#endif #ifdef HAS_YUY2TOUV422ROW_LASX ANY12(ARGBToUV444Row_Any_LASX, ARGBToUV444Row_LASX, 0, 4, 0, 31) ANY12(YUY2ToUV422Row_Any_LASX, YUY2ToUV422Row_LASX, 1, 4, 1, 31) @@ -1885,17 +2047,18 @@ ANY12(UYVYToUV422Row_Any_LASX, UYVYToUV422Row_LASX, 1, 4, 1, 31) // Any 2 16 bit planes with parameter to 1 #define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \ void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \ - SIMD_ALIGNED(T temp[16 * 4]); \ - memset(temp, 0, 16 * 4 * BPP); /* for msan */ \ + SIMD_ALIGNED(T vin[16 * 2]); \ + SIMD_ALIGNED(T vout[16 * 2]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \ } \ - memcpy(temp, src_uv + n * 2, r * BPP * 2); \ - ANY_SIMD(temp, temp + 32, temp + 48, depth, MASK + 1); \ - memcpy(dst_u + n, temp + 32, r * BPP); \ - memcpy(dst_v + n, temp + 48, r * BPP); \ + memcpy(vin, src_uv + n * 2, r * BPP * 2); \ + ANY_SIMD(vin, vout, vout + 16, depth, MASK + 1); \ + memcpy(dst_u + n, vout, r * BPP); \ + memcpy(dst_v + n, vout + 16, r * BPP); \ } #ifdef HAS_SPLITUVROW_16_AVX2 @@ -1909,21 +2072,22 @@ ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7) #undef ANY21CT // Any 1 to 3. Outputs RGB planes. -#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ - uint8_t* dst_b, int width) { \ - SIMD_ALIGNED(uint8_t temp[16 * 6]); \ - memset(temp, 0, 16 * 3); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ - } \ - memcpy(temp, src_ptr + n * BPP, r * BPP); \ - ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \ - memcpy(dst_r + n, temp + 16 * 3, r); \ - memcpy(dst_g + n, temp + 16 * 4, r); \ - memcpy(dst_b + n, temp + 16 * 5, r); \ +#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ + uint8_t* dst_b, int width) { \ + SIMD_ALIGNED(uint8_t vin[16 * 3]); \ + SIMD_ALIGNED(uint8_t vout[16 * 3]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ + } \ + memcpy(vin, src_ptr + n * BPP, r * BPP); \ + ANY_SIMD(vin, vout, vout + 16, vout + 32, MASK + 1); \ + memcpy(dst_r + n, vout, r); \ + memcpy(dst_g + n, vout + 16, r); \ + memcpy(dst_b + n, vout + 32, r); \ } #ifdef HAS_SPLITRGBROW_SSSE3 @@ -1946,23 +2110,23 @@ ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15) #endif // Any 1 to 4. Outputs ARGB planes. -#define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ - uint8_t* dst_b, uint8_t* dst_a, int width) { \ - SIMD_ALIGNED(uint8_t temp[16 * 8]); \ - memset(temp, 0, 16 * 4); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \ - } \ - memcpy(temp, src_ptr + n * BPP, r * BPP); \ - ANY_SIMD(temp, temp + 16 * 4, temp + 16 * 5, temp + 16 * 6, temp + 16 * 7, \ - MASK + 1); \ - memcpy(dst_r + n, temp + 16 * 4, r); \ - memcpy(dst_g + n, temp + 16 * 5, r); \ - memcpy(dst_b + n, temp + 16 * 6, r); \ - memcpy(dst_a + n, temp + 16 * 7, r); \ +#define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ + uint8_t* dst_b, uint8_t* dst_a, int width) { \ + SIMD_ALIGNED(uint8_t vin[16 * 4]); \ + SIMD_ALIGNED(uint8_t vout[16 * 4]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \ + } \ + memcpy(vin, src_ptr + n * BPP, r * BPP); \ + ANY_SIMD(vin, vout, vout + 16, vout + 32, vout + 48, MASK + 1); \ + memcpy(dst_r + n, vout, r); \ + memcpy(dst_g + n, vout + 16, r); \ + memcpy(dst_b + n, vout + 32, r); \ + memcpy(dst_a + n, vout + 48, r); \ } #ifdef HAS_SPLITARGBROW_SSE2 @@ -1983,25 +2147,26 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15) #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \ uint8_t* dst_v, int width) { \ - SIMD_ALIGNED(uint8_t temp[128 * 4]); \ - memset(temp, 0, 128 * 2); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[128 * 2]); \ + SIMD_ALIGNED(uint8_t vout[128 * 2]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \ } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \ + memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ + memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \ SS(r, UVSHIFT) * BPP); \ if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ - memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ + memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \ BPP); \ - memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ - temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ + memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \ + vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ } \ - ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ - memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ - memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \ + ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1); \ + memcpy(dst_u + (n >> 1), vout, SS(r, 1)); \ + memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1)); \ } #ifdef HAS_ARGBTOUVROW_AVX2 @@ -2013,9 +2178,17 @@ ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31) #ifdef HAS_ARGBTOUVJROW_AVX2 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31) #endif +#ifdef HAS_ABGRTOUVJROW_AVX2 +ANY12S(ABGRToUVJRow_Any_AVX2, ABGRToUVJRow_AVX2, 0, 4, 31) +#endif +#ifdef HAS_ARGBTOUVJROW_SSSE3 +ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) +#endif +#ifdef HAS_ABGRTOUVJROW_SSSE3 +ANY12S(ABGRToUVJRow_Any_SSSE3, ABGRToUVJRow_SSSE3, 0, 4, 15) +#endif #ifdef HAS_ARGBTOUVROW_SSSE3 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) -ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15) ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15) ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15) @@ -2034,12 +2207,18 @@ ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15) #ifdef HAS_ARGBTOUVROW_MSA ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) #endif +#ifdef HAS_ARGBTOUVROW_LSX +ANY12S(ARGBToUVRow_Any_LSX, ARGBToUVRow_LSX, 0, 4, 15) +#endif #ifdef HAS_ARGBTOUVROW_LASX ANY12S(ARGBToUVRow_Any_LASX, ARGBToUVRow_LASX, 0, 4, 31) #endif #ifdef HAS_ARGBTOUVJROW_NEON ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) #endif +#ifdef HAS_ABGRTOUVJROW_NEON +ANY12S(ABGRToUVJRow_Any_NEON, ABGRToUVJRow_NEON, 0, 4, 15) +#endif #ifdef HAS_ARGBTOUVJROW_MSA ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) #endif @@ -2142,12 +2321,18 @@ ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) #ifdef HAS_YUY2TOUVROW_MSA ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) #endif +#ifdef HAS_YUY2TOUVROW_LSX +ANY12S(YUY2ToUVRow_Any_LSX, YUY2ToUVRow_LSX, 1, 4, 15) +#endif #ifdef HAS_YUY2TOUVROW_LASX ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31) #endif #ifdef HAS_UYVYTOUVROW_MSA ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) #endif +#ifdef HAS_UYVYTOUVROW_LSX +ANY12S(UYVYToUVRow_Any_LSX, UYVYToUVRow_LSX, 1, 4, 15) +#endif #ifdef HAS_UYVYTOUVROW_LASX ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31) #endif @@ -2158,24 +2343,25 @@ ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31) #define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu, \ int width) { \ - SIMD_ALIGNED(uint8_t temp[128 * 3]); \ - memset(temp, 0, 128 * 2); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[128 * 2]); \ + SIMD_ALIGNED(uint8_t vout[128]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, src_stride, dst_vu, n); \ } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \ + memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ + memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \ SS(r, UVSHIFT) * BPP); \ if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ - memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ + memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \ BPP); \ - memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ - temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ + memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \ + vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ } \ - ANY_SIMD(temp, 128, temp + 256, MASK + 1); \ - memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \ + ANY_SIMD(vin, 128, vout, MASK + 1); \ + memcpy(dst_vu + (n >> 1) * 2, vout, SS(r, 1) * 2); \ } #ifdef HAS_AYUVTOVUROW_NEON @@ -2184,42 +2370,53 @@ ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15) #endif #undef ANY11S -#define ANYDETILE(NAMEANY, ANY_SIMD, MASK) \ - void NAMEANY(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, \ - int width) { \ - SIMD_ALIGNED(uint8_t temp[16 * 2]); \ - memset(temp, 0, 16); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src, src_tile_stride, dst, n); \ - } \ - memcpy(temp, src + (n / 16) * src_tile_stride, r); \ - ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1); \ - memcpy(dst + n, temp + 16, r); \ +#define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK) \ + void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \ + SIMD_ALIGNED(T vin[16]); \ + SIMD_ALIGNED(T vout[16]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src, src_tile_stride, dst, n); \ + } \ + memcpy(vin, src + (n / 16) * src_tile_stride, r * BPP); \ + ANY_SIMD(vin, src_tile_stride, vout, MASK + 1); \ + memcpy(dst + n, vout, r * BPP); \ } #ifdef HAS_DETILEROW_NEON -ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, 15) +ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15) #endif #ifdef HAS_DETILEROW_SSE2 -ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, 15) +ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15) +#endif +#ifdef HAS_DETILEROW_16_NEON +ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15) +#endif +#ifdef HAS_DETILEROW_16_SSE2 +ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15) +#endif +#ifdef HAS_DETILEROW_16_AVX +ANYDETILE(DetileRow_16_Any_AVX, DetileRow_16_AVX, uint16_t, 2, 15) #endif +// DetileSplitUVRow width is in bytes #define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK) \ void NAMEANY(const uint8_t* src_uv, ptrdiff_t src_tile_stride, \ uint8_t* dst_u, uint8_t* dst_v, int width) { \ - SIMD_ALIGNED(uint8_t temp[16 * 2]); \ - memset(temp, 0, 16 * 2); /* for msan */ \ + SIMD_ALIGNED(uint8_t vin[16]); \ + SIMD_ALIGNED(uint8_t vout[8 * 2]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n); \ } \ - memcpy(temp, src_uv + (n / 16) * src_tile_stride, r); \ - ANY_SIMD(temp, src_tile_stride, temp + 16, temp + 24, r); \ - memcpy(dst_u + n / 2, temp + 16, (r + 1) / 2); \ - memcpy(dst_v + n / 2, temp + 24, (r + 1) / 2); \ + memcpy(vin, src_uv + (n / 16) * src_tile_stride, r); \ + ANY_SIMD(vin, src_tile_stride, vout, vout + 8, r); \ + memcpy(dst_u + n / 2, vout, (r + 1) / 2); \ + memcpy(dst_v + n / 2, vout + 8, (r + 1) / 2); \ } #ifdef HAS_DETILESPLITUVROW_NEON @@ -2229,6 +2426,33 @@ ANYDETILESPLITUV(DetileSplitUVRow_Any_NEON, DetileSplitUVRow_NEON, 15) ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15) #endif +#define ANYDETILEMERGE(NAMEANY, ANY_SIMD, MASK) \ + void NAMEANY(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, \ + const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, \ + uint8_t* dst_yuy2, int width) { \ + SIMD_ALIGNED(uint8_t vin[16 * 2]); \ + SIMD_ALIGNED(uint8_t vout[16 * 2]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \ + n); \ + } \ + memcpy(vin, src_y + (n / 16) * src_y_tile_stride, r); \ + memcpy(vin + 16, src_uv + (n / 16) * src_uv_tile_stride, r); \ + ANY_SIMD(vin, src_y_tile_stride, vin + 16, src_uv_tile_stride, vout, r); \ + memcpy(dst_yuy2 + 2 * n, vout, 2 * r); \ + } + +#ifdef HAS_DETILETOYUY2_NEON +ANYDETILEMERGE(DetileToYUY2_Any_NEON, DetileToYUY2_NEON, 15) +#endif + +#ifdef HAS_DETILETOYUY2_SSE2 +ANYDETILEMERGE(DetileToYUY2_Any_SSE2, DetileToYUY2_SSE2, 15) +#endif + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/source/row_common.cc b/source/row_common.cc index 83442496..3afc4b4d 100644 --- a/files/source/row_common.cc +++ b/source/row_common.cc @@ -21,6 +21,12 @@ namespace libyuv { extern "C" { #endif +#ifdef __cplusplus +#define STATIC_CAST(type, expr) static_cast<type>(expr) +#else +#define STATIC_CAST(type, expr) (type)(expr) +#endif + // This macro controls YUV to RGB using unsigned math to extend range of // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B: // LIBYUV_UNLIMITED_DATA @@ -42,7 +48,6 @@ extern "C" { defined(__i386__) || defined(_M_IX86)) #define LIBYUV_ARGBTOUV_PAVGB 1 #define LIBYUV_RGBTOU_TRUNCATE 1 -#define LIBYUV_ATTENUATE_DUP 1 #endif #if defined(LIBYUV_BIT_EXACT) #define LIBYUV_UNATTENUATE_DUP 1 @@ -182,12 +187,13 @@ void RGB565ToARGBRow_C(const uint8_t* src_rgb565, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_rgb565[0] & 0x1f; - uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r = src_rgb565[1] >> 3; - dst_argb[0] = (b << 3) | (b >> 2); - dst_argb[1] = (g << 2) | (g >> 4); - dst_argb[2] = (r << 3) | (r >> 2); + uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4)); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_argb[3] = 255u; dst_argb += 4; src_rgb565 += 2; @@ -199,13 +205,14 @@ void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_argb1555[0] & 0x1f; - uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r = (src_argb1555[1] & 0x7c) >> 2; - uint8_t a = src_argb1555[1] >> 7; - dst_argb[0] = (b << 3) | (b >> 2); - dst_argb[1] = (g << 3) | (g >> 2); - dst_argb[2] = (r << 3) | (r >> 2); + uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2)); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_argb[3] = -a; dst_argb += 4; src_argb1555 += 2; @@ -217,14 +224,14 @@ void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, int width) { int x; for (x = 0; x < width; ++x) { - uint8_t b = src_argb4444[0] & 0x0f; - uint8_t g = src_argb4444[0] >> 4; - uint8_t r = src_argb4444[1] & 0x0f; - uint8_t a = src_argb4444[1] >> 4; - dst_argb[0] = (b << 4) | b; - dst_argb[1] = (g << 4) | g; - dst_argb[2] = (r << 4) | r; - dst_argb[3] = (a << 4) | a; + uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f); + uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4); + uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f); + uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4); + dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b); + dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g); + dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r); + dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a); dst_argb += 4; src_argb4444 += 2; } @@ -274,6 +281,54 @@ void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) { } } +void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_abgr[0] = r; + dst_abgr[1] = g; + dst_abgr[2] = b; + dst_abgr[3] = a; + dst_abgr += 4; + src_argb += 4; + } +} + +void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_bgra[0] = a; + dst_bgra[1] = r; + dst_bgra[2] = g; + dst_bgra[3] = b; + dst_bgra += 4; + src_argb += 4; + } +} + +void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; + uint8_t a = src_argb[3]; + dst_rgba[0] = a; + dst_rgba[1] = b; + dst_rgba[2] = g; + dst_rgba[3] = r; + dst_rgba += 4; + src_argb += 4; + } +} + void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { @@ -302,6 +357,22 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { } } +void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t a = src_rgba[0]; + uint8_t b = src_rgba[1]; + uint8_t g = src_rgba[2]; + uint8_t r = src_rgba[3]; + dst_argb[0] = b; + dst_argb[1] = g; + dst_argb[2] = r; + dst_argb[3] = a; + dst_argb += 4; + src_rgba += 4; + } +} + void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { @@ -320,7 +391,7 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t b0 = src_argb[0] >> 3; uint8_t g0 = src_argb[1] >> 2; uint8_t r0 = src_argb[2] >> 3; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -334,29 +405,31 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { // or the upper byte for big endian. void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { int x; for (x = 0; x < width - 1; x += 2) { int dither0 = ((const unsigned char*)(&dither4))[x & 3]; int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; - uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; - uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3; - uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2; - uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11); + uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3); + uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2); + uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3); + uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3); + uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2); + uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11)); dst_rgb += 4; src_argb += 8; } if (width & 1) { int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; - uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3); + uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2); + uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3); + *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -371,8 +444,10 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g1 = src_argb[5] >> 3; uint8_t r1 = src_argb[6] >> 3; uint8_t a1 = src_argb[7] >> 7; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15)); dst_rgb += 4; src_argb += 8; } @@ -381,7 +456,8 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g0 = src_argb[1] >> 3; uint8_t r0 = src_argb[2] >> 3; uint8_t a0 = src_argb[3] >> 7; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); + *(uint16_t*)(dst_rgb) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15)); } } @@ -396,8 +472,10 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g1 = src_argb[5] >> 4; uint8_t r1 = src_argb[6] >> 4; uint8_t a1 = src_argb[7] >> 4; - *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); - *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12); + *(uint16_t*)(dst_rgb + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12)); + *(uint16_t*)(dst_rgb + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12)); dst_rgb += 4; src_argb += 8; } @@ -406,18 +484,20 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { uint8_t g0 = src_argb[1] >> 4; uint8_t r0 = src_argb[2] >> 4; uint8_t a0 = src_argb[3] >> 4; - *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); + *(uint16_t*)(dst_rgb) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12)); } } void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) { int x; for (x = 0; x < width; ++x) { - uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2); + uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2); uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2); - uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2); + uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2); uint32_t a0 = (src_abgr[3] >> 6); - *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30); + *(uint32_t*)(dst_ar30) = + STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30)); dst_ar30 += 4; src_abgr += 4; } @@ -430,7 +510,8 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2); uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2); uint32_t a0 = (src_argb[3] >> 6); - *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); + *(uint32_t*)(dst_ar30) = + STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30)); dst_ar30 += 4; src_argb += 4; } @@ -439,10 +520,14 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { int x; for (x = 0; x < width; ++x) { - dst_ar64[0] = src_argb[0] * 0x0101; - dst_ar64[1] = src_argb[1] * 0x0101; - dst_ar64[2] = src_argb[2] * 0x0101; - dst_ar64[3] = src_argb[3] * 0x0101; + uint16_t b = src_argb[0] * 0x0101; + uint16_t g = src_argb[1] * 0x0101; + uint16_t r = src_argb[2] * 0x0101; + uint16_t a = src_argb[3] * 0x0101; + dst_ar64[0] = b; + dst_ar64[1] = g; + dst_ar64[2] = r; + dst_ar64[3] = a; dst_ar64 += 4; src_argb += 4; } @@ -451,10 +536,14 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { int x; for (x = 0; x < width; ++x) { - dst_ab64[0] = src_argb[2] * 0x0101; - dst_ab64[1] = src_argb[1] * 0x0101; - dst_ab64[2] = src_argb[0] * 0x0101; - dst_ab64[3] = src_argb[3] * 0x0101; + uint16_t b = src_argb[0] * 0x0101; + uint16_t g = src_argb[1] * 0x0101; + uint16_t r = src_argb[2] * 0x0101; + uint16_t a = src_argb[3] * 0x0101; + dst_ab64[0] = r; + dst_ab64[1] = g; + dst_ab64[2] = b; + dst_ab64[3] = a; dst_ab64 += 4; src_argb += 4; } @@ -463,10 +552,14 @@ void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - dst_argb[0] = src_ar64[0] >> 8; - dst_argb[1] = src_ar64[1] >> 8; - dst_argb[2] = src_ar64[2] >> 8; - dst_argb[3] = src_ar64[3] >> 8; + uint8_t b = src_ar64[0] >> 8; + uint8_t g = src_ar64[1] >> 8; + uint8_t r = src_ar64[2] >> 8; + uint8_t a = src_ar64[3] >> 8; + dst_argb[0] = b; + dst_argb[1] = g; + dst_argb[2] = r; + dst_argb[3] = a; dst_argb += 4; src_ar64 += 4; } @@ -475,15 +568,35 @@ void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - dst_argb[0] = src_ab64[2] >> 8; - dst_argb[1] = src_ab64[1] >> 8; - dst_argb[2] = src_ab64[0] >> 8; - dst_argb[3] = src_ab64[3] >> 8; + uint8_t r = src_ab64[0] >> 8; + uint8_t g = src_ab64[1] >> 8; + uint8_t b = src_ab64[2] >> 8; + uint8_t a = src_ab64[3] >> 8; + dst_argb[0] = b; + dst_argb[1] = g; + dst_argb[2] = r; + dst_argb[3] = a; dst_argb += 4; src_ab64 += 4; } } +void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) { + int x; + for (x = 0; x < width; ++x) { + uint16_t b = src_ar64[0]; + uint16_t g = src_ar64[1]; + uint16_t r = src_ar64[2]; + uint16_t a = src_ar64[3]; + dst_ab64[0] = r; + dst_ab64[1] = g; + dst_ab64[2] = b; + dst_ab64[3] = a; + dst_ab64 += 4; + src_ar64 += 4; + } +} + // TODO(fbarchard): Make shuffle compatible with SIMD versions void AR64ShuffleRow_C(const uint8_t* src_ar64, uint8_t* dst_ar64, @@ -514,8 +627,8 @@ void AR64ShuffleRow_C(const uint8_t* src_ar64, #ifdef LIBYUV_RGB7 // Old 7 bit math for compatibility on unsupported platforms. -static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { - return ((33 * r + 65 * g + 13 * b) >> 7) + 16; +static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16); } #else // 8 bit @@ -524,8 +637,8 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) + // 0x7e80) >> 8; -static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { - return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; +static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8); } #endif @@ -533,29 +646,31 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { // LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round. #ifdef LIBYUV_RGBTOU_TRUNCATE -static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { - return (112 * b - 74 * g - 38 * r + 0x8000) >> 8; +static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8); } -static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { - return (112 * r - 94 * g - 18 * b + 0x8000) >> 8; +static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8); } #else // TODO(fbarchard): Add rounding to x86 SIMD and use this -static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { - return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; +static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8); } -static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { - return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; +static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) { + return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8); } #endif // LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb. #if !defined(LIBYUV_ARGBTOUV_PAVGB) static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) { - return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8; + return STATIC_CAST( + uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8); } static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) { - return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8; + return STATIC_CAST( + uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8); } #endif @@ -674,28 +789,28 @@ MAKEROWY(RAW, 0, 1, 2, 3) #ifdef LIBYUV_RGB7 // Old 7 bit math for compatibility on unsupported platforms. -static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { return (38 * r + 75 * g + 15 * b + 64) >> 7; } #else // 8 bit -static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { return (77 * r + 150 * g + 29 * b + 128) >> 8; } #endif #if defined(LIBYUV_ARGBTOUV_PAVGB) -static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; } -static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { +static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; } #else -static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) { +static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) { return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8; } -static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { +static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8; } #endif @@ -782,6 +897,7 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) { #endif MAKEROWYJ(ARGB, 2, 1, 0, 4) +MAKEROWYJ(ABGR, 0, 1, 2, 4) MAKEROWYJ(RGBA, 3, 2, 1, 4) MAKEROWYJ(RGB24, 2, 1, 0, 3) MAKEROWYJ(RAW, 0, 1, 2, 3) @@ -791,11 +907,12 @@ void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_rgb565[0] & 0x1f; - uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t g = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); uint8_t r = src_rgb565[1] >> 3; - b = (b << 3) | (b >> 2); - g = (g << 2) | (g >> 4); - r = (r << 3) | (r >> 2); + b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4)); + r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_y[0] = RGBToY(r, g, b); src_rgb565 += 2; dst_y += 1; @@ -806,11 +923,12 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb1555[0] & 0x1f; - uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t g = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); uint8_t r = (src_argb1555[1] & 0x7c) >> 2; - b = (b << 3) | (b >> 2); - g = (g << 3) | (g >> 2); - r = (r << 3) | (r >> 2); + b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2)); + g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2)); + r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2)); dst_y[0] = RGBToY(r, g, b); src_argb1555 += 2; dst_y += 1; @@ -823,9 +941,9 @@ void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { uint8_t b = src_argb4444[0] & 0x0f; uint8_t g = src_argb4444[0] >> 4; uint8_t r = src_argb4444[1] & 0x0f; - b = (b << 4) | b; - g = (g << 4) | g; - r = (r << 4) | r; + b = STATIC_CAST(uint8_t, (b << 4) | b); + g = STATIC_CAST(uint8_t, (g << 4) | g); + r = STATIC_CAST(uint8_t, (r << 4) | r); dst_y[0] = RGBToY(r, g, b); src_argb4444 += 2; dst_y += 1; @@ -840,31 +958,35 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565, const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565; int x; for (x = 0; x < width - 1; x += 2) { - uint8_t b0 = src_rgb565[0] & 0x1f; - uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r0 = src_rgb565[1] >> 3; - uint8_t b1 = src_rgb565[2] & 0x1f; - uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); - uint8_t r1 = src_rgb565[3] >> 3; - uint8_t b2 = next_rgb565[0] & 0x1f; - uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8_t r2 = next_rgb565[1] >> 3; - uint8_t b3 = next_rgb565[2] & 0x1f; - uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); - uint8_t r3 = next_rgb565[3] >> 3; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 2) | (g0 >> 4); - r0 = (r0 << 3) | (r0 >> 2); - b1 = (b1 << 3) | (b1 >> 2); - g1 = (g1 << 2) | (g1 >> 4); - r1 = (r1 << 3) | (r1 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 2) | (g2 >> 4); - r2 = (r2 << 3) | (r2 >> 2); - b3 = (b3 << 3) | (b3 >> 2); - g3 = (g3 << 2) | (g3 >> 4); - r3 = (r3 << 3) | (r3 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f); + uint8_t g1 = STATIC_CAST( + uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3)); + uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3); + uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3); + uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f); + uint8_t g3 = STATIC_CAST( + uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3)); + uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2)); + g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4)); + r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); + b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2)); + g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4)); + r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -886,19 +1008,20 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565, dst_v += 1; } if (width & 1) { - uint8_t b0 = src_rgb565[0] & 0x1f; - uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8_t r0 = src_rgb565[1] >> 3; - uint8_t b2 = next_rgb565[0] & 0x1f; - uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8_t r2 = next_rgb565[1] >> 3; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 2) | (g0 >> 4); - r0 = (r0 << 3) | (r0 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 2) | (g2 >> 4); - r2 = (r2 << 3) | (r2 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3); + uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3); + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -924,31 +1047,35 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555; int x; for (x = 0; x < width - 1; x += 2) { - uint8_t b0 = src_argb1555[0] & 0x1f; - uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8_t b1 = src_argb1555[2] & 0x1f; - uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); - uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2; - uint8_t b2 = next_argb1555[0] & 0x1f; - uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; - uint8_t b3 = next_argb1555[2] & 0x1f; - uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); - uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 3) | (g0 >> 2); - r0 = (r0 << 3) | (r0 >> 2); - b1 = (b1 << 3) | (b1 >> 2); - g1 = (g1 << 3) | (g1 >> 2); - r1 = (r1 << 3) | (r1 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - r2 = (r2 << 3) | (r2 >> 2); - b3 = (b3 << 3) | (b3 >> 2); - g3 = (g3 << 3) | (g3 >> 2); - r3 = (r3 << 3) | (r3 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f); + uint8_t g1 = STATIC_CAST( + uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3)); + uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2); + uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2); + uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f); + uint8_t g3 = STATIC_CAST( + uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3)); + uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2)); + g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2)); + r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); + b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2)); + g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2)); + r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -970,19 +1097,21 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, dst_v += 1; } if (width & 1) { - uint8_t b0 = src_argb1555[0] & 0x1f; - uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8_t b2 = next_argb1555[0] & 0x1f; - uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; - - b0 = (b0 << 3) | (b0 >> 2); - g0 = (g0 << 3) | (g0 >> 2); - r0 = (r0 << 3) | (r0 >> 2); - b2 = (b2 << 3) | (b2 >> 2); - g2 = (g2 << 3) | (g2 >> 2); - r2 = (r2 << 3) | (r2 >> 2); + uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f); + uint8_t g0 = STATIC_CAST( + uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3)); + uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2); + uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f); + uint8_t g2 = STATIC_CAST( + uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3)); + uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2); + + b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2)); + g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2)); + r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2)); + b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2)); + g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2)); + r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2)); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -1021,18 +1150,18 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, uint8_t g3 = next_argb4444[2] >> 4; uint8_t r3 = next_argb4444[3] & 0x0f; - b0 = (b0 << 4) | b0; - g0 = (g0 << 4) | g0; - r0 = (r0 << 4) | r0; - b1 = (b1 << 4) | b1; - g1 = (g1 << 4) | g1; - r1 = (r1 << 4) | r1; - b2 = (b2 << 4) | b2; - g2 = (g2 << 4) | g2; - r2 = (r2 << 4) | r2; - b3 = (b3 << 4) | b3; - g3 = (g3 << 4) | g3; - r3 = (r3 << 4) | r3; + b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0); + g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0); + r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0); + b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1); + g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1); + r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1); + b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2); + g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2); + r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2); + b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3); + g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3); + r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3)); @@ -1061,12 +1190,12 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, uint8_t g2 = next_argb4444[0] >> 4; uint8_t r2 = next_argb4444[1] & 0x0f; - b0 = (b0 << 4) | b0; - g0 = (g0 << 4) | g0; - r0 = (r0 << 4) | r0; - b2 = (b2 << 4) | b2; - g2 = (g2 << 4) | g2; - r2 = (r2 << 4) | r2; + b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0); + g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0); + r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0); + b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2); + g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2); + r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2); #if LIBYUV_ARGBTOUV_PAVGB uint8_t ab = AVGB(b0, b2); @@ -1123,9 +1252,9 @@ void ARGBSepiaRow_C(uint8_t* dst_argb, int width) { int sg = (b * 22 + g * 88 + r * 45) >> 7; int sr = (b * 24 + g * 98 + r * 50) >> 7; // b does not over flow. a is preserved from original. - dst_argb[0] = sb; - dst_argb[1] = clamp255(sg); - dst_argb[2] = clamp255(sr); + dst_argb[0] = STATIC_CAST(uint8_t, sb); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr)); dst_argb += 4; } } @@ -1154,10 +1283,10 @@ void ARGBColorMatrixRow_C(const uint8_t* src_argb, int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] + a * matrix_argb[15]) >> 6; - dst_argb[0] = Clamp(sb); - dst_argb[1] = Clamp(sg); - dst_argb[2] = Clamp(sr); - dst_argb[3] = Clamp(sa); + dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb)); + dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg)); + dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr)); + dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa)); src_argb += 4; dst_argb += 4; } @@ -1207,9 +1336,12 @@ void ARGBQuantizeRow_C(uint8_t* dst_argb, int b = dst_argb[0]; int g = dst_argb[1]; int r = dst_argb[2]; - dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; - dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; - dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; + dst_argb[0] = STATIC_CAST( + uint8_t, (b * scale >> 16) * interval_size + interval_offset); + dst_argb[1] = STATIC_CAST( + uint8_t, (g * scale >> 16) * interval_size + interval_offset); + dst_argb[2] = STATIC_CAST( + uint8_t, (r * scale >> 16) * interval_size + interval_offset); dst_argb += 4; } } @@ -1260,10 +1392,10 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb, const uint32_t g_scale = src_argb1[1]; const uint32_t r_scale = src_argb1[2]; const uint32_t a_scale = src_argb1[3]; - dst_argb[0] = SHADE(b, b_scale); - dst_argb[1] = SHADE(g, g_scale); - dst_argb[2] = SHADE(r, r_scale); - dst_argb[3] = SHADE(a, a_scale); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1288,10 +1420,10 @@ void ARGBAddRow_C(const uint8_t* src_argb, const int g_add = src_argb1[1]; const int r_add = src_argb1[2]; const int a_add = src_argb1[3]; - dst_argb[0] = SHADE(b, b_add); - dst_argb[1] = SHADE(g, g_add); - dst_argb[2] = SHADE(r, r_add); - dst_argb[3] = SHADE(a, a_add); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1315,10 +1447,10 @@ void ARGBSubtractRow_C(const uint8_t* src_argb, const int g_sub = src_argb1[1]; const int r_sub = src_argb1[2]; const int a_sub = src_argb1[3]; - dst_argb[0] = SHADE(b, b_sub); - dst_argb[1] = SHADE(g, g_sub); - dst_argb[2] = SHADE(r, r_sub); - dst_argb[3] = SHADE(a, a_sub); + dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub)); + dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub)); + dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub)); + dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub)); src_argb += 4; src_argb1 += 4; dst_argb += 4; @@ -1431,7 +1563,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // clang-format off -#if defined(__aarch64__) || defined(__arm__) +#if defined(__aarch64__) || defined(__arm__) || defined(__riscv) // Bias values include subtract 128 from U and V, bias from Y and rounding. // For B and R bias is negative. For G bias is positive. #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \ @@ -1627,7 +1759,7 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR) #undef MAKEYUVCONSTANTS -#if defined(__aarch64__) || defined(__arm__) +#if defined(__aarch64__) || defined(__arm__) || defined(__riscv) #define LOAD_YUV_CONSTANTS \ int ub = yuvconstants->kUVCoeff[0]; \ int vr = yuvconstants->kUVCoeff[1]; \ @@ -1675,9 +1807,9 @@ static __inline void YuvPixel(uint8_t y, LOAD_YUV_CONSTANTS; uint32_t y32 = y * 0x0101; CALC_RGB16; - *b = Clamp((int32_t)(b16) >> 6); - *g = Clamp((int32_t)(g16) >> 6); - *r = Clamp((int32_t)(r16) >> 6); + *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6)); + *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6)); + *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6)); } // Reads 8 bit YUV and leaves result as 16 bit. @@ -1706,9 +1838,9 @@ static __inline void YuvPixel10_16(uint16_t y, int* r, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; - uint32_t y32 = y << 6; - u = clamp255(u >> 2); - v = clamp255(v >> 2); + uint32_t y32 = (y << 6) | (y >> 4); + u = STATIC_CAST(uint8_t, clamp255(u >> 2)); + v = STATIC_CAST(uint8_t, clamp255(v >> 2)); CALC_RGB16; *b = b16; *g = g16; @@ -1725,9 +1857,9 @@ static __inline void YuvPixel12_16(int16_t y, int* r, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; - uint32_t y32 = y << 4; - u = clamp255(u >> 4); - v = clamp255(v >> 4); + uint32_t y32 = (y << 4) | (y >> 8); + u = STATIC_CAST(uint8_t, clamp255(u >> 4)); + v = STATIC_CAST(uint8_t, clamp255(v >> 4)); CALC_RGB16; *b = b16; *g = g16; @@ -1747,9 +1879,9 @@ static __inline void YuvPixel10(uint16_t y, int g16; int r16; YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants); - *b = Clamp(b16 >> 6); - *g = Clamp(g16 >> 6); - *r = Clamp(r16 >> 6); + *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6)); + *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6)); + *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6)); } // C reference code that mimics the YUV 12 bit assembly. @@ -1765,9 +1897,9 @@ static __inline void YuvPixel12(uint16_t y, int g16; int r16; YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants); - *b = Clamp(b16 >> 6); - *g = Clamp(g16 >> 6); - *r = Clamp(r16 >> 6); + *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6)); + *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6)); + *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6)); } // C reference code that mimics the YUV 16 bit assembly. @@ -1781,12 +1913,12 @@ static __inline void YuvPixel16_8(uint16_t y, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; uint32_t y32 = y; - u = clamp255(u >> 8); - v = clamp255(v >> 8); + u = STATIC_CAST(uint16_t, clamp255(u >> 8)); + v = STATIC_CAST(uint16_t, clamp255(v >> 8)); CALC_RGB16; - *b = Clamp((int32_t)(b16) >> 6); - *g = Clamp((int32_t)(g16) >> 6); - *r = Clamp((int32_t)(r16) >> 6); + *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6)); + *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6)); + *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6)); } // C reference code that mimics the YUV 16 bit assembly. @@ -1800,8 +1932,8 @@ static __inline void YuvPixel16_16(uint16_t y, const struct YuvConstants* yuvconstants) { LOAD_YUV_CONSTANTS; uint32_t y32 = y; - u = clamp255(u >> 8); - v = clamp255(v >> 8); + u = STATIC_CAST(uint16_t, clamp255(u >> 8)); + v = STATIC_CAST(uint16_t, clamp255(v >> 8)); CALC_RGB16; *b = b16; *g = g16; @@ -1815,7 +1947,7 @@ static __inline void YPixel(uint8_t y, uint8_t* g, uint8_t* r, const struct YuvConstants* yuvconstants) { -#if defined(__aarch64__) || defined(__arm__) +#if defined(__aarch64__) || defined(__arm__) || defined(__riscv) int yg = yuvconstants->kRGBCoeffBias[0]; int ygb = yuvconstants->kRGBCoeffBias[4]; #else @@ -1823,9 +1955,10 @@ static __inline void YPixel(uint8_t y, int yg = yuvconstants->kYToRgb[0]; #endif uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; - *b = Clamp(((int32_t)(y1) + ygb) >> 6); - *g = Clamp(((int32_t)(y1) + ygb) >> 6); - *r = Clamp(((int32_t)(y1) + ygb) >> 6); + uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6)); + *b = b8; + *g = b8; + *r = b8; } void I444ToARGBRow_C(const uint8_t* src_y, @@ -1846,6 +1979,23 @@ void I444ToARGBRow_C(const uint8_t* src_y, } } +void I444ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width; ++x) { + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + src_y += 1; + src_u += 1; + src_v += 1; + rgb_buf += 3; // Advance 1 pixel. + } +} + // Also used for 420 void I422ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, @@ -1929,10 +2079,10 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y, for (x = 0; x < width - 1; x += 2) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); - rgb_buf[7] = clamp255(src_a[1] >> 2); + rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2)); src_y += 2; src_u += 1; src_v += 1; @@ -1942,7 +2092,7 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y, if (width & 1) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); } } @@ -1957,7 +2107,7 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y, for (x = 0; x < width; ++x) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = clamp255(src_a[0] >> 2); + rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2)); src_y += 1; src_u += 1; src_v += 1; @@ -2283,8 +2433,10 @@ void I422ToARGB4444Row_C(const uint8_t* src_y, b1 = b1 >> 4; g1 = g1 >> 4; r1 = r1 >> 4; - *(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; - *(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000; + *(uint16_t*)(dst_argb4444 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000); + *(uint16_t*)(dst_argb4444 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000); src_y += 2; src_u += 1; src_v += 1; @@ -2295,7 +2447,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y, b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; - *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; + *(uint16_t*)(dst_argb4444) = + STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000); } } @@ -2321,8 +2474,10 @@ void I422ToARGB1555Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 3; r1 = r1 >> 3; - *(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; - *(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000; + *(uint16_t*)(dst_argb1555 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000); + *(uint16_t*)(dst_argb1555 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000); src_y += 2; src_u += 1; src_v += 1; @@ -2333,7 +2488,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; - *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; + *(uint16_t*)(dst_argb1555) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000); } } @@ -2359,8 +2515,10 @@ void I422ToRGB565Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); // for ubsan - *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11); + *(uint16_t*)(dst_rgb565 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); + *(uint16_t*)(dst_rgb565 + 2) = + STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11)); src_y += 2; src_u += 1; src_v += 1; @@ -2371,7 +2529,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565 + 0) = + STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11)); } } @@ -2486,8 +2645,12 @@ void NV12ToRGB565Row_C(const uint8_t* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); - *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11); + *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) | + STATIC_CAST(uint16_t, g0 << 5) | + STATIC_CAST(uint16_t, r0 << 11); + *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) | + STATIC_CAST(uint16_t, g1 << 5) | + STATIC_CAST(uint16_t, r1 << 11); src_y += 2; src_uv += 2; dst_rgb565 += 4; // Advance 2 pixels. @@ -2497,7 +2660,9 @@ void NV12ToRGB565Row_C(const uint8_t* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) | + STATIC_CAST(uint16_t, g0 << 5) | + STATIC_CAST(uint16_t, r0 << 11); } } @@ -2603,6 +2768,19 @@ void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { } } +void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) { + int x; + src += width - 1; + for (x = 0; x < width - 1; x += 2) { + dst[x] = src[0]; + dst[x + 1] = src[-1]; + src -= 2; + } + if (width & 1) { + dst[width - 1] = src[0]; + } +} + void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) { int x; src_uv += (width - 1) << 1; @@ -2714,6 +2892,21 @@ void DetileRow_C(const uint8_t* src, } } +void DetileRow_16_C(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + int x; + for (x = 0; x < width - 15; x += 16) { + memcpy(dst, src, 16 * sizeof(uint16_t)); + dst += 16; + src += src_tile_stride; + } + if (width & 15) { + memcpy(dst, src, (width & 15) * sizeof(uint16_t)); + } +} + void DetileSplitUVRow_C(const uint8_t* src_uv, ptrdiff_t src_tile_stride, uint8_t* dst_u, @@ -2731,6 +2924,51 @@ void DetileSplitUVRow_C(const uint8_t* src_uv, } } +void DetileToYUY2_C(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { + for (int x = 0; x < width - 15; x += 16) { + for (int i = 0; i < 8; i++) { + dst_yuy2[0] = src_y[0]; + dst_yuy2[1] = src_uv[0]; + dst_yuy2[2] = src_y[1]; + dst_yuy2[3] = src_uv[1]; + dst_yuy2 += 4; + src_y += 2; + src_uv += 2; + } + src_y += src_y_tile_stride - 16; + src_uv += src_uv_tile_stride - 16; + } +} + +// Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded +// in 80 byte blocks representing 64 pixels each. The first 16 bytes of the +// block contain all of the lower 2 bits of each pixel packed together, and the +// next 64 bytes represent all the upper 8 bits of the pixel. The lower bits are +// packed into 1x4 blocks, whereas the upper bits are packed in normal raster +// order. +void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) { + for (size_t i = 0; i < size; i += 80) { + const uint8_t* src_lower_bits = src; + const uint8_t* src_upper_bits = src + 16; + + for (int j = 0; j < 4; j++) { + for (int k = 0; k < 16; k++) { + *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 | + (uint16_t)*src_upper_bits << 8 | + (uint16_t)*src_upper_bits >> 2; + src_upper_bits++; + } + } + + src += 80; + } +} + void SplitRGBRow_C(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, @@ -2823,10 +3061,10 @@ void MergeAR64Row_C(const uint16_t* src_r, int shift = 16 - depth; int max = (1 << depth) - 1; for (x = 0; x < width; ++x) { - dst_ar64[0] = ClampMax(src_b[x], max) << shift; - dst_ar64[1] = ClampMax(src_g[x], max) << shift; - dst_ar64[2] = ClampMax(src_r[x], max) << shift; - dst_ar64[3] = ClampMax(src_a[x], max) << shift; + dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift); + dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift); + dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift); + dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift); dst_ar64 += 4; } } @@ -2843,10 +3081,10 @@ void MergeARGB16To8Row_C(const uint16_t* src_r, int x; int shift = depth - 8; for (x = 0; x < width; ++x) { - dst_argb[0] = clamp255(src_b[x] >> shift); - dst_argb[1] = clamp255(src_g[x] >> shift); - dst_argb[2] = clamp255(src_r[x] >> shift); - dst_argb[3] = clamp255(src_a[x] >> shift); + dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift)); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift)); + dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift)); dst_argb += 4; } } @@ -2863,9 +3101,9 @@ void MergeXR64Row_C(const uint16_t* src_r, int shift = 16 - depth; int max = (1 << depth) - 1; for (x = 0; x < width; ++x) { - dst_ar64[0] = ClampMax(src_b[x], max) << shift; - dst_ar64[1] = ClampMax(src_g[x], max) << shift; - dst_ar64[2] = ClampMax(src_r[x], max) << shift; + dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift); + dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift); + dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift); dst_ar64[3] = 0xffff; dst_ar64 += 4; } @@ -2882,9 +3120,9 @@ void MergeXRGB16To8Row_C(const uint16_t* src_r, int x; int shift = depth - 8; for (x = 0; x < width; ++x) { - dst_argb[0] = clamp255(src_b[x] >> shift); - dst_argb[1] = clamp255(src_g[x] >> shift); - dst_argb[2] = clamp255(src_r[x] >> shift); + dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift)); + dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift)); + dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift)); dst_argb[3] = 0xff; dst_argb += 4; } @@ -2930,8 +3168,8 @@ void MergeUVRow_16_C(const uint16_t* src_u, assert(depth <= 16); int x; for (x = 0; x < width; ++x) { - dst_uv[0] = src_u[x] << shift; - dst_uv[1] = src_v[x] << shift; + dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift); + dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift); dst_uv += 2; } } @@ -2959,7 +3197,7 @@ void MultiplyRow_16_C(const uint16_t* src_y, int width) { int x; for (x = 0; x < width; ++x) { - dst_y[x] = src_y[x] * scale; + dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale); } } @@ -2990,7 +3228,7 @@ void Convert16To8Row_C(const uint16_t* src_y, assert(scale <= 32768); for (x = 0; x < width; ++x) { - dst_y[x] = C16TO8(src_y[x], scale); + dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale)); } } @@ -3043,6 +3281,21 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2, } } +// Filter 2 rows of YUY2 UV's (422) into UV (NV12). +void YUY2ToNVUVRow_C(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_uv, + int width) { + // Output a row of UV values, filtering 2 rows of YUY2. + int x; + for (x = 0; x < width; x += 2) { + dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; + dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; + src_yuy2 += 4; + dst_uv += 2; + } +} + // Copy row of YUY2 UV's (422) into U and V (422). void YUY2ToUV422Row_C(const uint8_t* src_yuy2, uint8_t* dst_u, @@ -3138,9 +3391,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; - dst_argb[0] = BLEND(fb, bb, a); - dst_argb[1] = BLEND(fg, bg, a); - dst_argb[2] = BLEND(fr, br, a); + dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[3] = 255u; fb = src_argb[4 + 0]; @@ -3150,9 +3403,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, bb = src_argb1[4 + 0]; bg = src_argb1[4 + 1]; br = src_argb1[4 + 2]; - dst_argb[4 + 0] = BLEND(fb, bb, a); - dst_argb[4 + 1] = BLEND(fg, bg, a); - dst_argb[4 + 2] = BLEND(fr, br, a); + dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[4 + 3] = 255u; src_argb += 8; src_argb1 += 8; @@ -3167,9 +3420,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb, uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; - dst_argb[0] = BLEND(fb, bb, a); - dst_argb[1] = BLEND(fg, bg, a); - dst_argb[2] = BLEND(fr, br, a); + dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a)); + dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a)); + dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a)); dst_argb[3] = 255u; } } @@ -3196,12 +3449,7 @@ void BlendPlaneRow_C(const uint8_t* src0, } #undef UBLEND -#if LIBYUV_ATTENUATE_DUP -// This code mimics the SSSE3 version for better testability. -#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 -#else -#define ATTENUATE(f, a) (f * a + 128) >> 8 -#endif +#define ATTENUATE(f, a) (f * a + 255) >> 8 // Multiply source RGB by alpha and store to destination. void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { @@ -3214,7 +3462,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); - dst_argb[3] = a; + dst_argb[3] = STATIC_CAST(uint8_t, a); b = src_argb[4]; g = src_argb[5]; r = src_argb[6]; @@ -3222,7 +3470,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[4] = ATTENUATE(b, a); dst_argb[5] = ATTENUATE(g, a); dst_argb[6] = ATTENUATE(r, a); - dst_argb[7] = a; + dst_argb[7] = STATIC_CAST(uint8_t, a); src_argb += 8; dst_argb += 8; } @@ -3235,7 +3483,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); - dst_argb[3] = a; + dst_argb[3] = STATIC_CAST(uint8_t, a); } } #undef ATTENUATE @@ -3307,10 +3555,10 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb, const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point // Clamping should not be necessary but is free in assembly. - dst_argb[0] = UNATTENUATE(b, ia); - dst_argb[1] = UNATTENUATE(g, ia); - dst_argb[2] = UNATTENUATE(r, ia); - dst_argb[3] = a; + dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia)); + dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia)); + dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia)); + dst_argb[3] = STATIC_CAST(uint8_t, a); src_argb += 4; dst_argb += 4; } @@ -3344,12 +3592,20 @@ void CumulativeSumToAverageRow_C(const int32_t* tl, int i; assert(area != 0); - ooa = 1.0f / area; + ooa = 1.0f / STATIC_CAST(float, area); for (i = 0; i < count; ++i) { - dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); - dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); - dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); - dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); + dst[0] = + (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * + ooa); + dst[1] = + (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * + ooa); + dst[2] = + (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * + ooa); + dst[3] = + (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * + ooa); dst += 4; tl += 4; bl += 4; @@ -3407,7 +3663,9 @@ static void HalfRow_16To8_C(const uint16_t* src_uv, int width) { int x; for (x = 0; x < width; ++x) { - dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale); + dst_uv[x] = STATIC_CAST( + uint8_t, + C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale)); } } @@ -3433,8 +3691,9 @@ void InterpolateRow_C(uint8_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; + dst_ptr[0] = STATIC_CAST( + uint8_t, + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8); ++src_ptr; ++src_ptr1; ++dst_ptr; @@ -3463,8 +3722,9 @@ void InterpolateRow_16_C(uint16_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; + dst_ptr[0] = STATIC_CAST( + uint16_t, + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8); ++src_ptr; ++src_ptr1; ++dst_ptr; @@ -3501,9 +3761,11 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr, return; } for (x = 0; x < width; ++x) { - dst_ptr[0] = C16TO8( - (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8, - scale); + dst_ptr[0] = STATIC_CAST( + uint8_t, + C16TO8( + (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8, + scale)); src_ptr += 1; src_ptr1 += 1; dst_ptr += 1; @@ -3615,10 +3877,10 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb, dr += poly[14] * r3; da += poly[15] * a3; - dst_argb[0] = Clamp((int32_t)(db)); - dst_argb[1] = Clamp((int32_t)(dg)); - dst_argb[2] = Clamp((int32_t)(dr)); - dst_argb[3] = Clamp((int32_t)(da)); + dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db))); + dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg))); + dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr))); + dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da))); src_argb += 4; dst_argb += 4; } @@ -4023,6 +4285,32 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y, } #endif +#if defined(HAS_I444TORGB24ROW_AVX2) +void I444ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) + ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif + src_y += twidth; + src_u += twidth; + src_v += twidth; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + #if defined(HAS_NV12TORGB565ROW_AVX2) void NV12ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, @@ -4164,8 +4452,9 @@ void ScaleSamples_C(const float* src, float* dst, float scale, int width) { void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) { int i; for (i = 0; i < width; ++i) { - *dst++ = - (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8; + *dst++ = STATIC_CAST( + uint16_t, + (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8); ++src; } } @@ -4325,6 +4614,8 @@ void HalfMergeUVRow_C(const uint8_t* src_u, } } +#undef STATIC_CAST + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/source/row_gcc.cc b/source/row_gcc.cc index dce8c439..d8074987 100644 --- a/files/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -27,6 +27,9 @@ static const uvec8 kARGBToY = {25u, 129u, 66u, 0u, 25u, 129u, 66u, 0u, static const uvec8 kARGBToYJ = {29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u}; +static const uvec8 kABGRToYJ = {77u, 150u, 29u, 0u, 77u, 150u, 29u, 0u, + 77u, 150u, 29u, 0u, 77u, 150u, 29u, 0u}; + static const uvec8 kRGBAToYJ = {0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u}; #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) @@ -39,12 +42,18 @@ static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0, static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0}; +static const vec8 kABGRToUJ = {-43, -84, 127, 0, -43, -84, 127, 0, + -43, -84, 127, 0, -43, -84, 127, 0}; + static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0}; static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0}; +static const vec8 kABGRToVJ = {127, -107, -20, 0, 127, -107, -20, 0, + 127, -107, -20, 0, 127, -107, -20, 0}; + // Constants for BGRA static const uvec8 kBGRAToY = {0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u}; @@ -729,7 +738,7 @@ void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, uint8_t* dst, - const uint32_t dither4, + uint32_t dither4, int width) { asm volatile( "movd %3,%%xmm6 \n" @@ -777,7 +786,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, uint8_t* dst, - const uint32_t dither4, + uint32_t dither4, int width) { asm volatile( "vbroadcastss %3,%%xmm6 \n" @@ -1201,6 +1210,7 @@ void ARGBToAR64Row_AVX2(const uint8_t* src_argb, "lea 0x40(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" + "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_ar64), // %1 "+r"(width) // %2 @@ -1228,6 +1238,7 @@ void ARGBToAB64Row_AVX2(const uint8_t* src_argb, "lea 0x40(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" + "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_ab64), // %1 "+r"(width) // %2 @@ -1256,6 +1267,7 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64, "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" + "vzeroupper \n" : "+r"(src_ar64), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 @@ -1284,6 +1296,7 @@ void AB64ToARGBRow_AVX2(const uint16_t* src_ab64, "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" + "vzeroupper \n" : "+r"(src_ab64), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 @@ -1398,6 +1411,24 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { } #endif // HAS_ARGBTOYJROW_SSSE3 +#ifdef HAS_ABGRTOYJROW_SSSE3 +// Convert 16 ABGR pixels (64 bytes) to 16 YJ values. +// Same as ABGRToYRow but different coefficients, no add 16. +void ABGRToYJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" + + LABELALIGN RGBTOY(xmm5) + : "+r"(src_abgr), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kABGRToYJ), // %3 + "m"(kSub128) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} +#endif // HAS_ABGRTOYJROW_SSSE3 + #ifdef HAS_RGBATOYJROW_SSSE3 // Convert 16 ARGB pixels (64 bytes) to 16 YJ values. // Same as ARGBToYRow but different coefficients, no add 16. @@ -1416,7 +1447,8 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) { } #endif // HAS_RGBATOYJROW_SSSE3 -#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ARGBEXTRACTALPHAROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ABGRTOYROW_AVX2) || \ + defined(HAS_ARGBEXTRACTALPHAROW_AVX2) // vpermd for vphaddw + vpackuswb vpermd. static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7}; #endif @@ -1429,9 +1461,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { "vbroadcastf128 %3,%%ymm4 \n" "vbroadcastf128 %4,%%ymm5 \n" "vbroadcastf128 %5,%%ymm7 \n" - "vmovdqu %6,%%ymm6 \n" - - LABELALIGN RGBTOY_AVX2(ymm7) + "vmovdqu %6,%%ymm6 \n" LABELALIGN RGBTOY_AVX2( + ymm7) "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 @@ -1451,9 +1482,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) { "vbroadcastf128 %3,%%ymm4 \n" "vbroadcastf128 %4,%%ymm5 \n" "vbroadcastf128 %5,%%ymm7 \n" - "vmovdqu %6,%%ymm6 \n" - - LABELALIGN RGBTOY_AVX2(ymm7) + "vmovdqu %6,%%ymm6 \n" LABELALIGN RGBTOY_AVX2( + ymm7) "vzeroupper \n" : "+r"(src_abgr), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 @@ -1472,9 +1502,8 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "vbroadcastf128 %3,%%ymm4 \n" "vbroadcastf128 %4,%%ymm5 \n" - "vmovdqu %5,%%ymm6 \n" - - LABELALIGN RGBTOY_AVX2(ymm5) + "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2( + ymm5) "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 @@ -1486,15 +1515,32 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { } #endif // HAS_ARGBTOYJROW_AVX2 +#ifdef HAS_ABGRTOYJROW_AVX2 +// Convert 32 ABGR pixels (128 bytes) to 32 Y values. +void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm4 \n" + "vbroadcastf128 %4,%%ymm5 \n" + "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2( + ymm5) "vzeroupper \n" + : "+r"(src_abgr), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kABGRToYJ), // %3 + "m"(kSub128), // %4 + "m"(kPermdARGBToY_AVX) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); +} +#endif // HAS_ABGRTOYJROW_AVX2 + #ifdef HAS_RGBATOYJROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) { asm volatile( "vbroadcastf128 %3,%%ymm4 \n" "vbroadcastf128 %4,%%ymm5 \n" - "vmovdqu %5,%%ymm6 \n" - - LABELALIGN RGBTOY_AVX2( + "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2( ymm5) "vzeroupper \n" : "+r"(src_rgba), // %0 "+r"(dst_y), // %1 @@ -1571,11 +1617,15 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb, } #endif // HAS_ARGBTOUVROW_SSSE3 -#ifdef HAS_ARGBTOUVROW_AVX2 +#if defined(HAS_ARGBTOUVROW_AVX2) || defined(HAS_ABGRTOUVROW_AVX2) || \ + defined(HAS_ARGBTOUVJROW_AVX2) || defined(HAS_ABGRTOUVJROW_AVX2) // vpshufb for vphaddw + vpackuswb packed to shorts. static const lvec8 kShufARGBToUV_AVX = { 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15}; +#endif + +#if defined(HAS_ARGBTOUVROW_AVX2) void ARGBToUVRow_AVX2(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -1765,6 +1815,71 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb, } #endif // HAS_ARGBTOUVJROW_AVX2 +// TODO(fbarchard): Pass kABGRToVJ / kABGRToUJ as matrix +#ifdef HAS_ABGRTOUVJROW_AVX2 +void ABGRToUVJRow_AVX2(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "vbroadcastf128 %5,%%ymm5 \n" + "vbroadcastf128 %6,%%ymm6 \n" + "vbroadcastf128 %7,%%ymm7 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n" + "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" + "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" + "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" + "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" + "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" + "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" + + "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" + "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" + "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" + "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm5,%%ymm1,%%ymm1 \n" + "vpsraw $0x8,%%ymm1,%%ymm1 \n" + "vpsraw $0x8,%%ymm0,%%ymm0 \n" + "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpshufb %8,%%ymm0,%%ymm0 \n" + + "vextractf128 $0x0,%%ymm0,(%1) \n" + "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_abgr), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_abgr)), // %4 + "m"(kSub128), // %5 + "m"(kABGRToVJ), // %6 + "m"(kABGRToUJ), // %7 + "m"(kShufARGBToUV_AVX) // %8 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); +} +#endif // HAS_ABGRTOUVJROW_AVX2 + #ifdef HAS_ARGBTOUVJROW_SSSE3 void ARGBToUVJRow_SSSE3(const uint8_t* src_argb, int src_stride_argb, @@ -1831,6 +1946,72 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb, } #endif // HAS_ARGBTOUVJROW_SSSE3 +#ifdef HAS_ABGRTOUVJROW_SSSE3 +void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "paddw %%xmm5,%%xmm0 \n" + "paddw %%xmm5,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_abgr), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_abgr)), // %4 + "m"(kABGRToVJ), // %5 + "m"(kABGRToUJ), // %6 + "m"(kSub128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); +} +#endif // HAS_ABGRTOUVJROW_SSSE3 + #ifdef HAS_ARGBTOUV444ROW_SSSE3 void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, @@ -2153,9 +2334,6 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422 10 bit, upsample to 8 UV -// TODO(fbarchard): Consider shufb to replace pack/unpack -// TODO(fbarchard): Consider pmulhuw to replace psraw -// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits. #define READYUV210 \ "movq (%[u_buf]),%%xmm3 \n" \ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ @@ -2165,7 +2343,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "packuswb %%xmm3,%%xmm3 \n" \ "punpcklwd %%xmm3,%%xmm3 \n" \ "movdqu (%[y_buf]),%%xmm4 \n" \ + "movdqa %%xmm4,%%xmm2 \n" \ "psllw $6,%%xmm4 \n" \ + "psrlw $4,%%xmm2 \n" \ + "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" #define READYUVA210 \ @@ -2177,7 +2358,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "packuswb %%xmm3,%%xmm3 \n" \ "punpcklwd %%xmm3,%%xmm3 \n" \ "movdqu (%[y_buf]),%%xmm4 \n" \ + "movdqa %%xmm4,%%xmm2 \n" \ "psllw $6,%%xmm4 \n" \ + "psrlw $4,%%xmm2 \n" \ + "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" \ "movdqu (%[a_buf]),%%xmm5 \n" \ "psraw $2,%%xmm5 \n" \ @@ -2196,7 +2380,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "punpckhwd %%xmm2,%%xmm1 \n" \ "packuswb %%xmm1,%%xmm3 \n" \ "movdqu (%[y_buf]),%%xmm4 \n" \ + "movdqa %%xmm4,%%xmm2 \n" \ "psllw $6,%%xmm4 \n" \ + "psrlw $4,%%xmm2 \n" \ + "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 UV from 444 10 bit. With 8 Alpha. @@ -2211,7 +2398,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "punpckhwd %%xmm2,%%xmm1 \n" \ "packuswb %%xmm1,%%xmm3 \n" \ "movdqu (%[y_buf]),%%xmm4 \n" \ - "psllw $0x6,%%xmm4 \n" \ + "movdqa %%xmm4,%%xmm2 \n" \ + "psllw $6,%%xmm4 \n" \ + "psrlw $4,%%xmm2 \n" \ + "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" \ "movdqu (%[a_buf]),%%xmm5 \n" \ "psraw $2,%%xmm5 \n" \ @@ -2228,7 +2418,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "packuswb %%xmm3,%%xmm3 \n" \ "punpcklwd %%xmm3,%%xmm3 \n" \ "movdqu (%[y_buf]),%%xmm4 \n" \ - "psllw $0x4,%%xmm4 \n" \ + "movdqa %%xmm4,%%xmm2 \n" \ + "psllw $4,%%xmm4 \n" \ + "psrlw $8,%%xmm2 \n" \ + "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. @@ -2399,6 +2592,20 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "movdqu %%xmm0,0x10(%[dst_rgba]) \n" \ "lea 0x20(%[dst_rgba]),%[dst_rgba] \n" +// Store 8 RGB24 values. +#define STORERGB24 \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "punpcklbw %%xmm2,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm1 \n" \ + "punpcklwd %%xmm2,%%xmm0 \n" \ + "punpckhwd %%xmm2,%%xmm1 \n" \ + "pshufb %%xmm5,%%xmm0 \n" \ + "pshufb %%xmm6,%%xmm1 \n" \ + "palignr $0xc,%%xmm0,%%xmm1 \n" \ + "movq %%xmm0,(%[dst_rgb24]) \n" \ + "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" \ + "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n" + // Store 8 AR30 values. #define STOREAR30 \ "psraw $0x4,%%xmm0 \n" \ @@ -2508,17 +2715,43 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf, "1: \n" READYUV422 YUVTORGB(yuvconstants) - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "movq %%xmm0,(%[dst_rgb24]) \n" - "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" - "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n" + STORERGB24 + "subl $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] +#if defined(__i386__) + [width]"+m"(width) // %[width] +#else + [width]"+rm"(width) // %[width] +#endif + : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] + [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), + [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) + : "memory", "cc", YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" + ); +} + +void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" + "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" + "sub %[u_buf],%[v_buf] \n" + + LABELALIGN + "1: \n" + READYUV444 + YUVTORGB(yuvconstants) + STORERGB24 "subl $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] @@ -3209,7 +3442,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \ "vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ - "vpsllw $6,%%ymm4,%%ymm4 \n" \ + "vpsllw $6,%%ymm4,%%ymm2 \n" \ + "vpsrlw $4,%%ymm4,%%ymm4 \n" \ + "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" // Read 8 UV from 210, upsample to 16 UV. With 16 Alpha. @@ -3224,7 +3459,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \ "vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ - "vpsllw $6,%%ymm4,%%ymm4 \n" \ + "vpsllw $6,%%ymm4,%%ymm2 \n" \ + "vpsrlw $4,%%ymm4,%%ymm4 \n" \ + "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" \ "vmovdqu (%[a_buf]),%%ymm5 \n" \ "vpsraw $2,%%ymm5,%%ymm5 \n" \ @@ -3242,7 +3479,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpunpcklwd %%ymm2,%%ymm3,%%ymm3 \n" \ "vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ - "vpsllw $6,%%ymm4,%%ymm4 \n" \ + "vpsllw $6,%%ymm4,%%ymm2 \n" \ + "vpsrlw $4,%%ymm4,%%ymm4 \n" \ + "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" // Read 8 UV from 212 12 bit, upsample to 16 UV @@ -3257,7 +3496,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \ "vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ - "vpsllw $0x4,%%ymm4,%%ymm4 \n" \ + "vpsllw $4,%%ymm4,%%ymm2 \n" \ + "vpsrlw $8,%%ymm4,%%ymm4 \n" \ + "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" // Read 16 UV from 410. With 16 Alpha. @@ -3271,7 +3512,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpunpcklwd %%ymm2,%%ymm3,%%ymm3 \n" \ "vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ - "vpsllw $6,%%ymm4,%%ymm4 \n" \ + "vpsllw $6,%%ymm4,%%ymm2 \n" \ + "vpsrlw $4,%%ymm4,%%ymm4 \n" \ + "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" \ "vmovdqu (%[a_buf]),%%ymm5 \n" \ "vpsraw $2,%%ymm5,%%ymm5 \n" \ @@ -4785,6 +5028,84 @@ void DetileRow_SSE2(const uint8_t* src, } #endif // HAS_DETILEROW_SSE2 +#ifdef HAS_DETILEROW_16_SSE2 +void DetileRow_16_SSE2(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + asm volatile( + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea (%0,%3,2),%0 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"(src_tile_stride) // %3 + : "cc", "memory", "xmm0", "xmm1"); +} +#endif // HAS_DETILEROW_SSE2 + +#ifdef HAS_DETILEROW_16_AVX +void DetileRow_16_AVX(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + asm volatile( + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "lea (%0,%3,2),%0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"(src_tile_stride) // %3 + : "cc", "memory", "xmm0"); +} +#endif // HAS_DETILEROW_AVX + +#ifdef HAS_DETILETOYUY2_SSE2 +// Read 16 Y, 8 UV, and write 8 YUYV. +void DetileToYUY2_SSE2(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { + asm volatile( + "1: \n" + "movdqu (%0),%%xmm0 \n" // Load 16 Y + "sub $0x10,%3 \n" + "lea (%0,%4),%0 \n" + "movdqu (%1),%%xmm1 \n" // Load 8 UV + "lea (%1,%5),%1 \n" + "movdqu %%xmm0,%%xmm2 \n" + "punpcklbw %%xmm1,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm2 \n" + "movdqu %%xmm0,(%2) \n" + "movdqu %%xmm2,0x10(%2) \n" + "lea 0x20(%2),%2 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_yuy2), // %2 + "+r"(width) // %3 + : "r"(src_y_tile_stride), // %4 + "r"(src_uv_tile_stride) // %5 + : "cc", "memory", "xmm0", "xmm1", "xmm2" // Clobber list + ); +} +#endif + #ifdef HAS_DETILESPLITUVROW_SSSE3 // TODO(greenjustin): Look into generating these constants instead of loading // them since this can cause branch mispredicts for fPIC code on 32-bit @@ -4821,36 +5142,59 @@ void DetileSplitUVRow_SSSE3(const uint8_t* src_uv, } #endif // HAS_DETILESPLITUVROW_SSSE3 +#ifdef HAS_MERGEUVROW_AVX512BW +void MergeUVRow_AVX512BW(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width) { + asm volatile("sub %0,%1 \n" + + LABELALIGN + "1: \n" + "vpmovzxbw (%0),%%zmm0 \n" + "vpmovzxbw 0x00(%0,%1,1),%%zmm1 \n" + "lea 0x20(%0),%0 \n" + "vpsllw $0x8,%%zmm1,%%zmm1 \n" + "vporq %%zmm0,%%zmm1,%%zmm2 \n" + "vmovdqu64 %%zmm2,(%2) \n" + "lea 0x40(%2),%2 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); +} +#endif // HAS_MERGEUVROW_AVX512BW + #ifdef HAS_MERGEUVROW_AVX2 void MergeUVRow_AVX2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { - asm volatile( - - "sub %0,%1 \n" + asm volatile("sub %0,%1 \n" - LABELALIGN + LABELALIGN "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vmovdqu 0x00(%0,%1,1),%%ymm1 \n" - "lea 0x20(%0),%0 \n" - "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" - "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm2,(%2) \n" - "vextractf128 $0x0,%%ymm0,0x10(%2) \n" - "vextractf128 $0x1,%%ymm2,0x20(%2) \n" - "vextractf128 $0x1,%%ymm0,0x30(%2) \n" - "lea 0x40(%2),%2 \n" - "sub $0x20,%3 \n" + "vpmovzxbw (%0),%%ymm0 \n" + "vpmovzxbw 0x00(%0,%1,1),%%ymm1 \n" + "lea 0x10(%0),%0 \n" + "vpsllw $0x8,%%ymm1,%%ymm1 \n" + "vpor %%ymm0,%%ymm1,%%ymm2 \n" + "vmovdqu %%ymm2,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x10,%3 \n" "jg 1b \n" "vzeroupper \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : - : "memory", "cc", "xmm0", "xmm1", "xmm2"); + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGEUVROW_AVX2 @@ -4859,11 +5203,9 @@ void MergeUVRow_SSE2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { - asm volatile( + asm volatile("sub %0,%1 \n" - "sub %0,%1 \n" - - LABELALIGN + LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%1,1),%%xmm1 \n" @@ -4876,12 +5218,12 @@ void MergeUVRow_SSE2(const uint8_t* src_u, "lea 0x20(%2),%2 \n" "sub $0x10,%3 \n" "jg 1b \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : - : "memory", "cc", "xmm0", "xmm1", "xmm2"); + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGEUVROW_SSE2 @@ -4891,37 +5233,35 @@ void MergeUVRow_16_AVX2(const uint16_t* src_u, uint16_t* dst_uv, int depth, int width) { - depth = 16 - depth; // clang-format off asm volatile ( "vmovd %4,%%xmm3 \n" + "vmovd %5,%%xmm4 \n" + + "sub %0,%1 \n" + // 8 pixels per loop. - // 16 pixels per loop. - LABELALIGN + LABELALIGN "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vmovdqu (%0,%1,1),%%ymm1 \n" - "add $0x20,%0 \n" - + "vpmovzxwd (%0),%%ymm0 \n" + "vpmovzxwd 0x00(%0,%1,1),%%ymm1 \n" + "lea 0x10(%0),%0 \n" "vpsllw %%xmm3,%%ymm0,%%ymm0 \n" - "vpsllw %%xmm3,%%ymm1,%%ymm1 \n" - "vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates - "vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm2,(%2) \n" - "vextractf128 $0x0,%%ymm0,0x10(%2) \n" - "vextractf128 $0x1,%%ymm2,0x20(%2) \n" - "vextractf128 $0x1,%%ymm0,0x30(%2) \n" - "add $0x40,%2 \n" - "sub $0x10,%3 \n" + "vpslld %%xmm4,%%ymm1,%%ymm1 \n" + "vpor %%ymm0,%%ymm1,%%ymm2 \n" + "vmovdqu %%ymm2,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x8,%3 \n" "jg 1b \n" "vzeroupper \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : "r"(depth) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : "r"(16 - depth), // %4 + "r"(32 - depth) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); // clang-format on } #endif // HAS_MERGEUVROW_AVX2 @@ -5127,7 +5467,6 @@ void Convert16To8Row_AVX2(const uint16_t* src_y, // 512 = 9 bits // 1024 = 10 bits // 4096 = 12 bits -// TODO(fbarchard): reduce to SSE2 void Convert8To16Row_SSE2(const uint8_t* src_y, uint16_t* dst_y, int scale, @@ -6178,6 +6517,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) { "lea 0x40(%1),%1 \n" "sub $0x40,%2 \n" "jg 1b \n" + "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 @@ -6461,6 +6801,33 @@ void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { : "memory", "cc", "xmm0", "xmm1", "xmm5"); } +void YUY2ToNVUVRow_SSE2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width) { + asm volatile(LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_uv), // %1 + "+r"(width) // %2 + : "r"((intptr_t)(stride_yuy2)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); +} + void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, @@ -6661,6 +7028,35 @@ void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { : "memory", "cc", "xmm0", "xmm1", "xmm5"); } +void YUY2ToNVUVRow_AVX2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width) { + asm volatile( + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vpavgb 0x00(%0,%3,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%3,1),%%ymm1,%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_uv), // %1 + "+r"(width) // %2 + : "r"((intptr_t)(stride_yuy2)) // %3 + : "memory", "cc", "xmm0", "xmm1"); +} + void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, @@ -7045,93 +7441,106 @@ void BlendPlaneRow_AVX2(const uint8_t* src0, #ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha. -static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, - 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u}; -static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, - 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u}; +static const vec8 kAttenuateShuffle = {6, -128, 6, -128, 6, -128, + -128, -128, 14, -128, 14, -128, + 14, -128, -128, -128}; + // Attenuate 4 pixels at a time. void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( - "pcmpeqb %%xmm3,%%xmm3 \n" - "pslld $0x18,%%xmm3 \n" "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" + "pxor %%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "punpcklbw %%xmm6,%%xmm7 \n" + "sub %0,%1 \n" // 4 pixel loop. LABELALIGN "1: \n" - "movdqu (%0),%%xmm0 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu (%0),%%xmm1 \n" - "punpcklbw %%xmm1,%%xmm1 \n" - "pmulhuw %%xmm1,%%xmm0 \n" - "movdqu (%0),%%xmm1 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu (%0),%%xmm2 \n" - "punpckhbw %%xmm2,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "movdqu (%0),%%xmm2 \n" - "lea 0x10(%0),%0 \n" - "pand %%xmm3,%%xmm2 \n" + "movdqu (%0),%%xmm6 \n" + "movdqa %%xmm6,%%xmm0 \n" + "movdqa %%xmm6,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "punpckhbw %%xmm5,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "pshufb %%xmm4,%%xmm2 \n" // a,a,a,0 + "pshufb %%xmm4,%%xmm3 \n" + "pmullw %%xmm2,%%xmm0 \n" // rgb * alpha + "pmullw %%xmm3,%%xmm1 \n" + "paddw %%xmm7,%%xmm0 \n" // + 255 + "paddw %%xmm7,%%xmm1 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0,(%1) \n" - "lea 0x10(%1),%1 \n" + "pand %%xmm5,%%xmm6 \n" + "por %%xmm6,%%xmm0 \n" + "movdqu %%xmm0,(%0,%1) \n" + "lea 0x10(%0),%0 \n" "sub $0x4,%2 \n" "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha0), // %3 - "m"(kShuffleAlpha1) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kAttenuateShuffle) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBATTENUATEROW_SSSE3 #ifdef HAS_ARGBATTENUATEROW_AVX2 + // Shuffle table duplicating alpha. -static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u, - 128u, 128u, 14u, 15u, 14u, 15u, - 14u, 15u, 128u, 128u}; +static const lvec8 kAttenuateShuffle_AVX2 = { + 6, -128, 6, -128, 6, -128, -128, -128, 14, -128, 14, + -128, 14, -128, -128, -128, 22, -128, 22, -128, 22, -128, + -128, -128, 30, -128, 30, -128, 30, -128, -128, -128}; + // Attenuate 8 pixels at a time. void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( - "vbroadcastf128 %3,%%ymm4 \n" + "vmovdqa %3,%%ymm4 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpslld $0x18,%%ymm5,%%ymm5 \n" + "vpxor %%ymm6,%%ymm6,%%ymm6 \n" + "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" + "vpunpcklbw %%ymm6,%%ymm7,%%ymm7 \n" "sub %0,%1 \n" // 8 pixel loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm6 \n" - "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" - "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" + "vpunpcklbw %%ymm5,%%ymm6,%%ymm0 \n" + "vpunpckhbw %%ymm5,%%ymm6,%%ymm1 \n" "vpshufb %%ymm4,%%ymm0,%%ymm2 \n" "vpshufb %%ymm4,%%ymm1,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpand %%ymm5,%%ymm6,%%ymm6 \n" + "vpmullw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmullw %%ymm3,%%ymm1,%%ymm1 \n" + "vpaddw %%ymm7,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm7,%%ymm1,%%ymm1 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpor %%ymm6,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm6,%%ymm1 \n" + "vpor %%ymm1,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,0x00(%0,%1,1) \n" "lea 0x20(%0),%0 \n" "sub $0x8,%2 \n" "jg 1b \n" "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha_AVX2) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kAttenuateShuffle_AVX2) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBATTENUATEROW_AVX2 diff --git a/files/source/row_lasx.cc b/source/row_lasx.cc index 7dd18f40..be85022e 100644 --- a/files/source/row_lasx.cc +++ b/source/row_lasx.cc @@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y, __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m256i vec_ubvr, vec_ugvg; __m256i const_0x80 = __lasx_xvldi(0x80); - __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000, - 0xF000F000F000F000}; + __m256i alpha = (__m256i)v4u64{0xF000F000F000F000, 0xF000F000F000F000, + 0xF000F000F000F000, 0xF000F000F000F000}; __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0}; @@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y, __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m256i vec_ubvr, vec_ugvg; __m256i const_0x80 = __lasx_xvldi(0x80); - __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000, - 0x8000800080008000}; + __m256i alpha = (__m256i)v4u64{0x8000800080008000, 0x8000800080008000, + 0x8000800080008000, 0x8000800080008000}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr); @@ -775,40 +775,6 @@ void UYVYToUV422Row_LASX(const uint8_t* src_uyvy, } } -void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width) { - int x; - int len = width / 32; - __m256i src0, src1, src2, src3, vec0, vec1, vec2, vec3; - __m256i tmp0, tmp1, dst0; - __m256i const_19 = __lasx_xvldi(0x19); - __m256i const_42 = __lasx_xvldi(0x42); - __m256i const_81 = __lasx_xvldi(0x81); - __m256i const_1080 = {0x1080108010801080, 0x1080108010801080, - 0x1080108010801080, 0x1080108010801080}; - __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, - 0x0000000700000003}; - - for (x = 0; x < len; x++) { - DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64, - src_argb0, 96, src0, src1, src2, src3); - vec0 = __lasx_xvpickev_b(src1, src0); - vec1 = __lasx_xvpickev_b(src3, src2); - vec2 = __lasx_xvpickod_b(src1, src0); - vec3 = __lasx_xvpickod_b(src3, src2); - tmp0 = __lasx_xvmaddwev_h_bu(const_1080, vec0, const_19); - tmp1 = __lasx_xvmaddwev_h_bu(const_1080, vec1, const_19); - tmp0 = __lasx_xvmaddwev_h_bu(tmp0, vec2, const_81); - tmp1 = __lasx_xvmaddwev_h_bu(tmp1, vec3, const_81); - tmp0 = __lasx_xvmaddwod_h_bu(tmp0, vec0, const_42); - tmp1 = __lasx_xvmaddwod_h_bu(tmp1, vec1, const_42); - dst0 = __lasx_xvssrani_b_h(tmp1, tmp0, 8); - dst0 = __lasx_xvperm_w(dst0, control); - __lasx_xvst(dst0, dst_y, 0); - src_argb0 += 128; - dst_y += 32; - } -} - void ARGBToUVRow_LASX(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, @@ -833,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0, 0x0009000900090009, 0x0009000900090009}; __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; - __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64, @@ -1071,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb, __m256i const_38 = __lasx_xvldi(38); __m256i const_94 = __lasx_xvldi(94); __m256i const_18 = __lasx_xvldi(18); - __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; for (x = 0; x < len; x++) { @@ -1216,7 +1182,7 @@ void ARGBAttenuateRow_LASX(const uint8_t* src_argb, void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { int x; int len = width / 16; @@ -1643,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0, @@ -1760,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0, @@ -1811,48 +1777,6 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, } } -void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { - int x; - int len = width / 32; - __m256i src0, src1, src2; - __m256i tmp0, tmp1, tmp2, tmp3; - __m256i reg0, reg1, reg2, dst0; - __m256i const_129 = __lasx_xvldi(129); - __m256i const_br = {0x4219421942194219, 0x4219421942194219, - 0x4219421942194219, 0x4219421942194219}; - __m256i const_1080 = {0x1080108010801080, 0x1080108010801080, - 0x1080108010801080, 0x1080108010801080}; - __m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200, - 0x17151412110F0E0C}; - __m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18, - 0x0F0D0C0A09070604}; - __m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001, - 0x001600130010000D}; - __m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019, - 0x000E000B00080005}; - - for (x = 0; x < len; x++) { - reg0 = __lasx_xvld(src_rgb24, 0); - reg1 = __lasx_xvld(src_rgb24, 32); - reg2 = __lasx_xvld(src_rgb24, 64); - src0 = __lasx_xvpermi_q(reg1, reg0, 0x30); - src1 = __lasx_xvpermi_q(reg2, reg0, 0x21); - src2 = __lasx_xvpermi_q(reg2, reg1, 0x30); - tmp0 = __lasx_xvshuf_b(src1, src0, shuff0); - tmp1 = __lasx_xvshuf_b(src1, src2, shuff1); - tmp2 = __lasx_xvshuf_b(src1, src0, shuff2); - tmp3 = __lasx_xvshuf_b(src1, src2, shuff3); - reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129); - reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129); - reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1); - dst0 = __lasx_xvpickod_b(reg1, reg0); - __lasx_xvst(dst0, dst_y, 0); - dst_y += 32; - src_rgb24 += 96; - } -} - void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, @@ -1869,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18, 0x15120F0C09060300, 0x00000000001E1B18}; __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908, @@ -1916,48 +1840,6 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, } } -void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) { - int x; - int len = width / 32; - __m256i src0, src1, src2; - __m256i tmp0, tmp1, tmp2, tmp3; - __m256i reg0, reg1, reg2, dst0; - __m256i const_129 = __lasx_xvldi(129); - __m256i const_br = {0x1942194219421942, 0x1942194219421942, - 0x1942194219421942, 0x1942194219421942}; - __m256i const_1080 = {0x1080108010801080, 0x1080108010801080, - 0x1080108010801080, 0x1080108010801080}; - __m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200, - 0x17151412110F0E0C}; - __m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18, - 0x0F0D0C0A09070604}; - __m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001, - 0x001600130010000D}; - __m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019, - 0x000E000B00080005}; - - for (x = 0; x < len; x++) { - reg0 = __lasx_xvld(src_raw, 0); - reg1 = __lasx_xvld(src_raw, 32); - reg2 = __lasx_xvld(src_raw, 64); - src0 = __lasx_xvpermi_q(reg1, reg0, 0x30); - src1 = __lasx_xvpermi_q(reg2, reg0, 0x21); - src2 = __lasx_xvpermi_q(reg2, reg1, 0x30); - tmp0 = __lasx_xvshuf_b(src1, src0, shuff0); - tmp1 = __lasx_xvshuf_b(src1, src2, shuff1); - tmp2 = __lasx_xvshuf_b(src1, src0, shuff2); - tmp3 = __lasx_xvshuf_b(src1, src2, shuff3); - reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129); - reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129); - reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1); - dst0 = __lasx_xvpickod_b(reg1, reg0); - __lasx_xvst(dst0, dst_y, 0); - dst_y += 32; - src_raw += 96; - } -} - void RAWToUVRow_LASX(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, @@ -1974,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw, __m256i const_38 = __lasx_xvldi(0x413); __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18, 0x15120F0C09060300, 0x00000000001E1B18}; __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908, @@ -2118,36 +2000,228 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y, } } -void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - int len = width / 32; - __m256i src0, src1, src2, src3, dst0; - __m256i tmp0, tmp1, tmp2, tmp3; - __m256i reg0, reg1; - __m256i const_128 = __lasx_xvldi(0x480); - __m256i const_150 = __lasx_xvldi(0x96); - __m256i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D, - 0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D}; - __m256i shuff = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, - 0x0000000700000003}; +struct RgbConstants { + uint8_t kRGBToY[4]; + uint16_t kAddY; + uint16_t pad; +}; + +// RGB to JPeg coefficients +// B * 0.1140 coefficient = 29 +// G * 0.5870 coefficient = 150 +// R * 0.2990 coefficient = 77 +// Add 0.5 = 0x80 +static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, + 128, + 0}; + +static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; + +// RGB to BT.601 coefficients +// B * 0.1016 coefficient = 25 +// G * 0.5078 coefficient = 129 +// R * 0.2578 coefficient = 66 +// Add 16.5 = 0x1080 + +static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + 0x1080, + 0}; + +static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, + 0x1080, + 0}; + +// ARGB expects first 3 values to contain RGB and 4th value is ignored. +static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7}; + asm volatile( + "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants + "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants + "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants + "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants + "xvld $xr20, %4, 0 \n\t" // load shuff + "1: \n\t" + "xvld $xr4, %0, 0 \n\t" + "xvld $xr5, %0, 32 \n\t" + "xvld $xr6, %0, 64 \n\t" + "xvld $xr7, %0, 96 \n\t" // load 32 pixels of + // ARGB + "xvor.v $xr12, $xr3, $xr3 \n\t" + "xvor.v $xr13, $xr3, $xr3 \n\t" + "addi.d %2, %2, -32 \n\t" // 32 processed per + // loop. + "xvpickev.b $xr8, $xr5, $xr4 \n\t" // BR + "xvpickev.b $xr10, $xr7, $xr6 \n\t" + "xvpickod.b $xr9, $xr5, $xr4 \n\t" // GA + "xvpickod.b $xr11, $xr7, $xr6 \n\t" + "xvmaddwev.h.bu $xr12, $xr8, $xr0 \n\t" // B + "xvmaddwev.h.bu $xr13, $xr10, $xr0 \n\t" + "xvmaddwev.h.bu $xr12, $xr9, $xr1 \n\t" // G + "xvmaddwev.h.bu $xr13, $xr11, $xr1 \n\t" + "xvmaddwod.h.bu $xr12, $xr8, $xr2 \n\t" // R + "xvmaddwod.h.bu $xr13, $xr10, $xr2 \n\t" + "addi.d %0, %0, 128 \n\t" + "xvpickod.b $xr10, $xr13, $xr12 \n\t" + "xvperm.w $xr11, $xr10, $xr20 \n\t" + "xvst $xr11, %1, 0 \n\t" + "addi.d %1, %1, 32 \n\t" + "bnez %2, 1b \n\t" + : "+&r"(src_argb), // %0 + "+&r"(dst_y), // %1 + "+&r"(width) // %2 + : "r"(rgbconstants), "r"(shuff) + : "memory"); +} - for (x = 0; x < len; x++) { - DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64, src_argb, - 96, src0, src1, src2, src3); - tmp0 = __lasx_xvpickev_b(src1, src0); - tmp1 = __lasx_xvpickod_b(src1, src0); - tmp2 = __lasx_xvpickev_b(src3, src2); - tmp3 = __lasx_xvpickod_b(src3, src2); - reg0 = __lasx_xvmaddwev_h_bu(const_128, tmp1, const_150); - reg1 = __lasx_xvmaddwev_h_bu(const_128, tmp3, const_150); - reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp2); - dst0 = __lasx_xvpickod_b(reg1, reg0); - dst0 = __lasx_xvperm_w(dst0, shuff); - __lasx_xvst(dst0, dst_y, 0); - dst_y += 32; - src_argb += 128; - } +void ARGBToYRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) { + ARGBToYMatrixRow_LASX(src_argb, dst_y, width, &kRgb24I601Constants); +} + +void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_LASX(src_argb, dst_yj, width, &kRgb24JPEGConstants); +} + +void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + ARGBToYMatrixRow_LASX(src_abgr, dst_y, width, &kRawI601Constants); +} + +void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_LASX(src_abgr, dst_yj, width, &kRawJPEGConstants); +} + +// RGBA expects first value to be A and ignored, then 3 values to contain RGB. +// Same code as ARGB, except the LD4 +static void RGBAToYMatrixRow_LASX(const uint8_t* src_rgba, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7}; + asm volatile( + "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants + "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants + "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants + "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants + "xvld $xr20, %4, 0 \n\t" // load shuff + "1: \n\t" + "xvld $xr4, %0, 0 \n\t" + "xvld $xr5, %0, 32 \n\t" + "xvld $xr6, %0, 64 \n\t" + "xvld $xr7, %0, 96 \n\t" // load 32 pixels of + // RGBA + "xvor.v $xr12, $xr3, $xr3 \n\t" + "xvor.v $xr13, $xr3, $xr3 \n\t" + "addi.d %2, %2, -32 \n\t" // 32 processed per + // loop. + "xvpickev.b $xr8, $xr5, $xr4 \n\t" // AG + "xvpickev.b $xr10, $xr7, $xr6 \n\t" + "xvpickod.b $xr9, $xr5, $xr4 \n\t" // BR + "xvpickod.b $xr11, $xr7, $xr6 \n\t" + "xvmaddwev.h.bu $xr12, $xr9, $xr0 \n\t" // B + "xvmaddwev.h.bu $xr13, $xr11, $xr0 \n\t" + "xvmaddwod.h.bu $xr12, $xr8, $xr1 \n\t" // G + "xvmaddwod.h.bu $xr13, $xr10, $xr1 \n\t" + "xvmaddwod.h.bu $xr12, $xr9, $xr2 \n\t" // R + "xvmaddwod.h.bu $xr13, $xr11, $xr2 \n\t" + "addi.d %0, %0, 128 \n\t" + "xvpickod.b $xr10, $xr13, $xr12 \n\t" + "xvperm.w $xr11, $xr10, $xr20 \n\t" + "xvst $xr11, %1, 0 \n\t" + "addi.d %1, %1, 32 \n\t" + "bnez %2, 1b \n\t" + : "+&r"(src_rgba), // %0 + "+&r"(dst_y), // %1 + "+&r"(width) // %2 + : "r"(rgbconstants), "r"(shuff) + : "memory"); +} + +void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width) { + RGBAToYMatrixRow_LASX(src_rgba, dst_y, width, &kRgb24I601Constants); +} + +void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { + RGBAToYMatrixRow_LASX(src_rgba, dst_yj, width, &kRgb24JPEGConstants); +} + +void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width) { + RGBAToYMatrixRow_LASX(src_bgra, dst_y, width, &kRawI601Constants); +} + +static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + int8_t shuff[128] = { + 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, + 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, + 24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, + 24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, + 1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, + 1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, + 25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0, + 25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0}; + asm volatile( + "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants + "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants + "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants + "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants + "xvld $xr4, %4, 0 \n\t" // load shuff + "xvld $xr5, %4, 32 \n\t" + "xvld $xr6, %4, 64 \n\t" + "xvld $xr7, %4, 96 \n\t" + "1: \n\t" + "xvld $xr8, %0, 0 \n\t" + "xvld $xr9, %0, 32 \n\t" + "xvld $xr10, %0, 64 \n\t" // load 32 pixels of + // RGB + "xvor.v $xr12, $xr3, $xr3 \n\t" + "xvor.v $xr13, $xr3, $xr3 \n\t" + "xvor.v $xr11, $xr9, $xr9 \n\t" + "addi.d %2, %2, -32 \n\t" // 32 processed per + // loop. + "xvpermi.q $xr9, $xr8, 0x30 \n\t" // src0 + "xvpermi.q $xr8, $xr10, 0x03 \n\t" // src1 + "xvpermi.q $xr10, $xr11, 0x30 \n\t" // src2 + "xvshuf.b $xr14, $xr8, $xr9, $xr4 \n\t" + "xvshuf.b $xr15, $xr8, $xr10, $xr5 \n\t" + "xvshuf.b $xr16, $xr8, $xr9, $xr6 \n\t" + "xvshuf.b $xr17, $xr8, $xr10, $xr7 \n\t" + "xvmaddwev.h.bu $xr12, $xr16, $xr1 \n\t" // G + "xvmaddwev.h.bu $xr13, $xr17, $xr1 \n\t" + "xvmaddwev.h.bu $xr12, $xr14, $xr0 \n\t" // B + "xvmaddwev.h.bu $xr13, $xr15, $xr0 \n\t" + "xvmaddwod.h.bu $xr12, $xr14, $xr2 \n\t" // R + "xvmaddwod.h.bu $xr13, $xr15, $xr2 \n\t" + "addi.d %0, %0, 96 \n\t" + "xvpickod.b $xr10, $xr13, $xr12 \n\t" + "xvst $xr10, %1, 0 \n\t" + "addi.d %1, %1, 32 \n\t" + "bnez %2, 1b \n\t" + : "+&r"(src_rgba), // %0 + "+&r"(dst_y), // %1 + "+&r"(width) // %2 + : "r"(rgbconstants), // %3 + "r"(shuff) // %4 + : "memory"); +} + +void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) { + RGBToYMatrixRow_LASX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants); +} + +void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width) { + RGBToYMatrixRow_LASX(src_raw, dst_yj, width, &kRawJPEGConstants); +} + +void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { + RGBToYMatrixRow_LASX(src_rgb24, dst_y, width, &kRgb24I601Constants); +} + +void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) { + RGBToYMatrixRow_LASX(src_raw, dst_y, width, &kRawI601Constants); } void ARGBToUVJRow_LASX(const uint8_t* src_argb, @@ -2168,8 +2242,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb, __m256i const_21 = __lasx_xvldi(0x415); __m256i const_53 = __lasx_xvldi(0x435); __m256i const_10 = __lasx_xvldi(0x40A); - __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, + 0x8080808080808080, 0x8080808080808080}; __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301, 0x1F1D0F0D1B190B09}; diff --git a/files/source/row_lsx.cc b/source/row_lsx.cc index 3e8b901a..fa088c9e 100644 --- a/files/source/row_lsx.cc +++ b/source/row_lsx.cc @@ -31,6 +31,91 @@ extern "C" { yb = __lsx_vreplgr2vr_w(yuvconst->kYBiasToRgb[0]); \ } +// Load 32 YUV422 pixel data +#define READYUV422_D(psrc_y, psrc_u, psrc_v, out_y, uv_l, uv_h) \ + { \ + __m128i temp0, temp1; \ + \ + DUP2_ARG2(__lsx_vld, psrc_y, 0, psrc_u, 0, out_y, temp0); \ + temp1 = __lsx_vld(psrc_v, 0); \ + temp0 = __lsx_vsub_b(temp0, const_80); \ + temp1 = __lsx_vsub_b(temp1, const_80); \ + temp0 = __lsx_vsllwil_h_b(temp0, 0); \ + temp1 = __lsx_vsllwil_h_b(temp1, 0); \ + uv_l = __lsx_vilvl_h(temp0, temp1); \ + uv_h = __lsx_vilvh_h(temp0, temp1); \ + } + +// Load 16 YUV422 pixel data +#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, uv) \ + { \ + __m128i temp0, temp1; \ + \ + out_y = __lsx_vld(psrc_y, 0); \ + temp0 = __lsx_vldrepl_d(psrc_u, 0); \ + temp1 = __lsx_vldrepl_d(psrc_v, 0); \ + uv = __lsx_vilvl_b(temp0, temp1); \ + uv = __lsx_vsub_b(uv, const_80); \ + uv = __lsx_vsllwil_h_b(uv, 0); \ + } + +// Convert 16 pixels of YUV420 to RGB. +#define YUVTORGB_D(in_y, in_uvl, in_uvh, ubvr, ugvg, yg, yb, b_l, b_h, g_l, \ + g_h, r_l, r_h) \ + { \ + __m128i u_l, u_h, v_l, v_h; \ + __m128i yl_ev, yl_od, yh_ev, yh_od; \ + __m128i temp0, temp1, temp2, temp3; \ + \ + temp0 = __lsx_vilvl_b(in_y, in_y); \ + temp1 = __lsx_vilvh_b(in_y, in_y); \ + yl_ev = __lsx_vmulwev_w_hu_h(temp0, yg); \ + yl_od = __lsx_vmulwod_w_hu_h(temp0, yg); \ + yh_ev = __lsx_vmulwev_w_hu_h(temp1, yg); \ + yh_od = __lsx_vmulwod_w_hu_h(temp1, yg); \ + DUP4_ARG2(__lsx_vsrai_w, yl_ev, 16, yl_od, 16, yh_ev, 16, yh_od, 16, \ + yl_ev, yl_od, yh_ev, yh_od); \ + yl_ev = __lsx_vadd_w(yl_ev, yb); \ + yl_od = __lsx_vadd_w(yl_od, yb); \ + yh_ev = __lsx_vadd_w(yh_ev, yb); \ + yh_od = __lsx_vadd_w(yh_od, yb); \ + v_l = __lsx_vmulwev_w_h(in_uvl, ubvr); \ + u_l = __lsx_vmulwod_w_h(in_uvl, ubvr); \ + v_h = __lsx_vmulwev_w_h(in_uvh, ubvr); \ + u_h = __lsx_vmulwod_w_h(in_uvh, ubvr); \ + temp0 = __lsx_vadd_w(yl_ev, u_l); \ + temp1 = __lsx_vadd_w(yl_od, u_l); \ + temp2 = __lsx_vadd_w(yh_ev, u_h); \ + temp3 = __lsx_vadd_w(yh_od, u_h); \ + DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \ + temp1, temp2, temp3); \ + DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \ + temp2, temp3); \ + b_l = __lsx_vpackev_h(temp1, temp0); \ + b_h = __lsx_vpackev_h(temp3, temp2); \ + temp0 = __lsx_vadd_w(yl_ev, v_l); \ + temp1 = __lsx_vadd_w(yl_od, v_l); \ + temp2 = __lsx_vadd_w(yh_ev, v_h); \ + temp3 = __lsx_vadd_w(yh_od, v_h); \ + DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \ + temp1, temp2, temp3); \ + DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \ + temp2, temp3); \ + r_l = __lsx_vpackev_h(temp1, temp0); \ + r_h = __lsx_vpackev_h(temp3, temp2); \ + DUP2_ARG2(__lsx_vdp2_w_h, in_uvl, ugvg, in_uvh, ugvg, u_l, u_h); \ + temp0 = __lsx_vsub_w(yl_ev, u_l); \ + temp1 = __lsx_vsub_w(yl_od, u_l); \ + temp2 = __lsx_vsub_w(yh_ev, u_h); \ + temp3 = __lsx_vsub_w(yh_od, u_h); \ + DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \ + temp1, temp2, temp3); \ + DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \ + temp2, temp3); \ + g_l = __lsx_vpackev_h(temp1, temp0); \ + g_h = __lsx_vpackev_h(temp3, temp2); \ + } + // Convert 8 pixels of YUV420 to RGB. #define YUVTORGB(in_y, in_vu, vrub, vgug, yg, yb, out_b, out_g, out_r) \ { \ @@ -118,6 +203,25 @@ extern "C" { out_g = __lsx_vpackev_h(tmp1, tmp0); \ } +// Pack and Store 16 ARGB values. +#define STOREARGB_D(a_l, a_h, r_l, r_h, g_l, g_h, b_l, b_h, pdst_argb) \ + { \ + __m128i temp0, temp1, temp2, temp3; \ + temp0 = __lsx_vpackev_b(g_l, b_l); \ + temp1 = __lsx_vpackev_b(a_l, r_l); \ + temp2 = __lsx_vpackev_b(g_h, b_h); \ + temp3 = __lsx_vpackev_b(a_h, r_h); \ + r_l = __lsx_vilvl_h(temp1, temp0); \ + r_h = __lsx_vilvh_h(temp1, temp0); \ + g_l = __lsx_vilvl_h(temp3, temp2); \ + g_h = __lsx_vilvh_h(temp3, temp2); \ + __lsx_vst(r_l, pdst_argb, 0); \ + __lsx_vst(r_h, pdst_argb, 16); \ + __lsx_vst(g_l, pdst_argb, 32); \ + __lsx_vst(g_h, pdst_argb, 48); \ + pdst_argb += 64; \ + } + // Pack and Store 8 ARGB values. #define STOREARGB(in_a, in_r, in_g, in_b, pdst_argb) \ { \ @@ -155,6 +259,1028 @@ extern "C" { _dst0 = __lsx_vpickod_b(_reg1, _reg0); \ } +void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width) { + int x; + int len = width / 32; + __m128i src0, src1; + __m128i shuffler = {0x08090A0B0C0D0E0F, 0x0001020304050607}; + src += width - 32; + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1); + DUP2_ARG3(__lsx_vshuf_b, src0, src0, shuffler, src1, src1, shuffler, src0, + src1); + __lsx_vst(src1, dst, 0); + __lsx_vst(src0, dst, 16); + dst += 32; + src -= 32; + } +} + +void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width) { + int x; + int len = width / 8; + __m128i src, dst; + __m128i shuffler = {0x0004000500060007, 0x0000000100020003}; + + src_uv += (width - 8) << 1; + for (x = 0; x < len; x++) { + src = __lsx_vld(src_uv, 0); + dst = __lsx_vshuf_h(shuffler, src, src); + __lsx_vst(dst, dst_uv, 0); + src_uv -= 16; + dst_uv += 16; + } +} + +void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width) { + int x; + int len = width / 8; + __m128i src0, src1; + __m128i shuffler = {0x0B0A09080F0E0D0C, 0x0302010007060504}; + + src += (width * 4) - 32; + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1); + DUP2_ARG3(__lsx_vshuf_b, src0, src0, shuffler, src1, src1, shuffler, src0, + src1); + __lsx_vst(src1, dst, 0); + __lsx_vst(src0, dst, 16); + dst += 32; + src -= 32; + } +} + +void I422ToYUY2Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width) { + int x; + int len = width / 16; + __m128i src_u0, src_v0, src_y0, vec_uv0; + __m128i vec_yuy2_0, vec_yuy2_1; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_u, 0, src_v, 0, src_u0, src_v0); + src_y0 = __lsx_vld(src_y, 0); + vec_uv0 = __lsx_vilvl_b(src_v0, src_u0); + vec_yuy2_0 = __lsx_vilvl_b(vec_uv0, src_y0); + vec_yuy2_1 = __lsx_vilvh_b(vec_uv0, src_y0); + __lsx_vst(vec_yuy2_0, dst_yuy2, 0); + __lsx_vst(vec_yuy2_1, dst_yuy2, 16); + src_u += 8; + src_v += 8; + src_y += 16; + dst_yuy2 += 32; + } +} + +void I422ToUYVYRow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width) { + int x; + int len = width / 16; + __m128i src_u0, src_v0, src_y0, vec_uv0; + __m128i vec_uyvy0, vec_uyvy1; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_u, 0, src_v, 0, src_u0, src_v0); + src_y0 = __lsx_vld(src_y, 0); + vec_uv0 = __lsx_vilvl_b(src_v0, src_u0); + vec_uyvy0 = __lsx_vilvl_b(src_y0, vec_uv0); + vec_uyvy1 = __lsx_vilvh_b(src_y0, vec_uv0); + __lsx_vst(vec_uyvy0, dst_uyvy, 0); + __lsx_vst(vec_uyvy1, dst_uyvy, 16); + src_u += 8; + src_v += 8; + src_y += 16; + dst_uyvy += 32; + } +} + +void I422ToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int len = width / 16; + __m128i vec_yb, vec_yg, vec_ub, vec_ug, vec_vr, vec_vg; + __m128i vec_ubvr, vec_ugvg; + __m128i alpha = __lsx_vldi(0xFF); + __m128i const_80 = __lsx_vldi(0x80); + + YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); + vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); + vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg); + + for (x = 0; x < len; x++) { + __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h; + + READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h); + YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l, + g_h, r_l, r_h); + STOREARGB_D(alpha, alpha, r_l, r_h, g_l, g_h, b_l, b_h, dst_argb); + src_y += 16; + src_u += 8; + src_v += 8; + } +} + +void I422ToRGBARow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int len = width / 16; + __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m128i vec_ubvr, vec_ugvg; + __m128i alpha = __lsx_vldi(0xFF); + __m128i const_80 = __lsx_vldi(0x80); + + YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); + vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); + vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg); + + for (x = 0; x < len; x++) { + __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h; + + READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h); + YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l, + g_h, r_l, r_h); + STOREARGB_D(r_l, r_h, g_l, g_h, b_l, b_h, alpha, alpha, dst_argb); + src_y += 16; + src_u += 8; + src_v += 8; + } +} + +void I422AlphaToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int len = width / 16; + int res = width & 15; + __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m128i vec_ubvr, vec_ugvg; + __m128i zero = __lsx_vldi(0); + __m128i const_80 = __lsx_vldi(0x80); + + YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); + vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); + vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg); + + for (x = 0; x < len; x++) { + __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h, a_l, a_h; + + y = __lsx_vld(src_a, 0); + a_l = __lsx_vilvl_b(zero, y); + a_h = __lsx_vilvh_b(zero, y); + READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h); + YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l, + g_h, r_l, r_h); + STOREARGB_D(a_l, a_h, r_l, r_h, g_l, g_h, b_l, b_h, dst_argb); + src_y += 16; + src_u += 8; + src_v += 8; + src_a += 16; + } + if (res) { + __m128i y, uv, r, g, b, a; + a = __lsx_vld(src_a, 0); + a = __lsx_vsllwil_hu_bu(a, 0); + READYUV422(src_y, src_u, src_v, y, uv); + YUVTORGB(y, uv, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b, g, r); + STOREARGB(a, r, g, b, dst_argb); + } +} + +void I422ToRGB24Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int32_t width) { + int x; + int len = width / 16; + __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m128i vec_ubvr, vec_ugvg; + __m128i const_80 = __lsx_vldi(0x80); + __m128i shuffler0 = {0x0504120302100100, 0x0A18090816070614}; + __m128i shuffler1 = {0x1E0F0E1C0D0C1A0B, 0x1E0F0E1C0D0C1A0B}; + + YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); + vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); + vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg); + + for (x = 0; x < len; x++) { + __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h; + __m128i temp0, temp1, temp2, temp3; + + READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h); + YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l, + g_h, r_l, r_h); + temp0 = __lsx_vpackev_b(g_l, b_l); + temp1 = __lsx_vpackev_b(g_h, b_h); + DUP4_ARG3(__lsx_vshuf_b, r_l, temp0, shuffler1, r_h, temp1, shuffler1, r_l, + temp0, shuffler0, r_h, temp1, shuffler0, temp2, temp3, temp0, + temp1); + + b_l = __lsx_vilvl_d(temp1, temp2); + b_h = __lsx_vilvh_d(temp3, temp1); + __lsx_vst(temp0, dst_argb, 0); + __lsx_vst(b_l, dst_argb, 16); + __lsx_vst(b_h, dst_argb, 32); + dst_argb += 48; + src_y += 16; + src_u += 8; + src_v += 8; + } +} + +// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R. +void I422ToRGB565Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int len = width / 16; + __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m128i vec_ubvr, vec_ugvg; + __m128i const_80 = __lsx_vldi(0x80); + + YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); + vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); + vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg); + + for (x = 0; x < len; x++) { + __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h; + + READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h); + YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l, + g_h, r_l, r_h); + b_l = __lsx_vsrli_h(b_l, 3); + b_h = __lsx_vsrli_h(b_h, 3); + g_l = __lsx_vsrli_h(g_l, 2); + g_h = __lsx_vsrli_h(g_h, 2); + r_l = __lsx_vsrli_h(r_l, 3); + r_h = __lsx_vsrli_h(r_h, 3); + r_l = __lsx_vslli_h(r_l, 11); + r_h = __lsx_vslli_h(r_h, 11); + g_l = __lsx_vslli_h(g_l, 5); + g_h = __lsx_vslli_h(g_h, 5); + r_l = __lsx_vor_v(r_l, g_l); + r_l = __lsx_vor_v(r_l, b_l); + r_h = __lsx_vor_v(r_h, g_h); + r_h = __lsx_vor_v(r_h, b_h); + __lsx_vst(r_l, dst_rgb565, 0); + __lsx_vst(r_h, dst_rgb565, 16); + dst_rgb565 += 32; + src_y += 16; + src_u += 8; + src_v += 8; + } +} + +// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G. +void I422ToARGB4444Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int len = width / 16; + __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m128i vec_ubvr, vec_ugvg; + __m128i const_80 = __lsx_vldi(0x80); + __m128i alpha = (__m128i)v2u64{0xF000F000F000F000, 0xF000F000F000F000}; + __m128i mask = {0x00F000F000F000F0, 0x00F000F000F000F0}; + + YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); + vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); + vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg); + + for (x = 0; x < len; x++) { + __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h; + + READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h); + YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l, + g_h, r_l, r_h); + b_l = __lsx_vsrli_h(b_l, 4); + b_h = __lsx_vsrli_h(b_h, 4); + r_l = __lsx_vsrli_h(r_l, 4); + r_h = __lsx_vsrli_h(r_h, 4); + g_l = __lsx_vand_v(g_l, mask); + g_h = __lsx_vand_v(g_h, mask); + r_l = __lsx_vslli_h(r_l, 8); + r_h = __lsx_vslli_h(r_h, 8); + r_l = __lsx_vor_v(r_l, alpha); + r_h = __lsx_vor_v(r_h, alpha); + r_l = __lsx_vor_v(r_l, g_l); + r_h = __lsx_vor_v(r_h, g_h); + r_l = __lsx_vor_v(r_l, b_l); + r_h = __lsx_vor_v(r_h, b_h); + __lsx_vst(r_l, dst_argb4444, 0); + __lsx_vst(r_h, dst_argb4444, 16); + dst_argb4444 += 32; + src_y += 16; + src_u += 8; + src_v += 8; + } +} + +void I422ToARGB1555Row_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int len = width / 16; + __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m128i vec_ubvr, vec_ugvg; + __m128i const_80 = __lsx_vldi(0x80); + __m128i alpha = (__m128i)v2u64{0x8000800080008000, 0x8000800080008000}; + + YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb); + vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr); + vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg); + + for (x = 0; x < len; x++) { + __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h; + + READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h); + YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l, + g_h, r_l, r_h); + b_l = __lsx_vsrli_h(b_l, 3); + b_h = __lsx_vsrli_h(b_h, 3); + g_l = __lsx_vsrli_h(g_l, 3); + + g_h = __lsx_vsrli_h(g_h, 3); + g_l = __lsx_vslli_h(g_l, 5); + g_h = __lsx_vslli_h(g_h, 5); + r_l = __lsx_vsrli_h(r_l, 3); + r_h = __lsx_vsrli_h(r_h, 3); + r_l = __lsx_vslli_h(r_l, 10); + r_h = __lsx_vslli_h(r_h, 10); + r_l = __lsx_vor_v(r_l, alpha); + r_h = __lsx_vor_v(r_h, alpha); + r_l = __lsx_vor_v(r_l, g_l); + r_h = __lsx_vor_v(r_h, g_h); + r_l = __lsx_vor_v(r_l, b_l); + r_h = __lsx_vor_v(r_h, b_h); + __lsx_vst(r_l, dst_argb1555, 0); + __lsx_vst(r_h, dst_argb1555, 16); + dst_argb1555 += 32; + src_y += 16; + src_u += 8; + src_v += 8; + } +} + +void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { + int x; + int len = width / 16; + __m128i src0, src1, dst0; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src0, src1); + dst0 = __lsx_vpickev_b(src1, src0); + __lsx_vst(dst0, dst_y, 0); + src_yuy2 += 32; + dst_y += 16; + } +} + +void YUY2ToUVRow_LSX(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2; + int x; + int len = width / 16; + __m128i src0, src1, src2, src3; + __m128i tmp0, dst0, dst1; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src_yuy2_next, 0, + src_yuy2_next, 16, src0, src1, src2, src3); + src0 = __lsx_vpickod_b(src1, src0); + src1 = __lsx_vpickod_b(src3, src2); + tmp0 = __lsx_vavgr_bu(src1, src0); + dst0 = __lsx_vpickev_b(tmp0, tmp0); + dst1 = __lsx_vpickod_b(tmp0, tmp0); + __lsx_vstelm_d(dst0, dst_u, 0, 0); + __lsx_vstelm_d(dst1, dst_v, 0, 0); + src_yuy2 += 32; + src_yuy2_next += 32; + dst_u += 8; + dst_v += 8; + } +} + +void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + int len = width / 16; + __m128i src0, src1, tmp0, dst0, dst1; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src0, src1); + tmp0 = __lsx_vpickod_b(src1, src0); + dst0 = __lsx_vpickev_b(tmp0, tmp0); + dst1 = __lsx_vpickod_b(tmp0, tmp0); + __lsx_vstelm_d(dst0, dst_u, 0, 0); + __lsx_vstelm_d(dst1, dst_v, 0, 0); + src_yuy2 += 32; + dst_u += 8; + dst_v += 8; + } +} + +void UYVYToYRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { + int x; + int len = width / 16; + __m128i src0, src1, dst0; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src0, src1); + dst0 = __lsx_vpickod_b(src1, src0); + __lsx_vst(dst0, dst_y, 0); + src_uyvy += 32; + dst_y += 16; + } +} + +void UYVYToUVRow_LSX(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy; + int x; + int len = width / 16; + __m128i src0, src1, src2, src3, tmp0, dst0, dst1; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src_uyvy_next, 0, + src_uyvy_next, 16, src0, src1, src2, src3); + src0 = __lsx_vpickev_b(src1, src0); + src1 = __lsx_vpickev_b(src3, src2); + tmp0 = __lsx_vavgr_bu(src1, src0); + dst0 = __lsx_vpickev_b(tmp0, tmp0); + dst1 = __lsx_vpickod_b(tmp0, tmp0); + __lsx_vstelm_d(dst0, dst_u, 0, 0); + __lsx_vstelm_d(dst1, dst_v, 0, 0); + src_uyvy += 32; + src_uyvy_next += 32; + dst_u += 8; + dst_v += 8; + } +} + +void UYVYToUV422Row_LSX(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + int len = width / 16; + __m128i src0, src1, tmp0, dst0, dst1; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src0, src1); + tmp0 = __lsx_vpickev_b(src1, src0); + dst0 = __lsx_vpickev_b(tmp0, tmp0); + dst1 = __lsx_vpickod_b(tmp0, tmp0); + __lsx_vstelm_d(dst0, dst_u, 0, 0); + __lsx_vstelm_d(dst1, dst_v, 0, 0); + src_uyvy += 32; + dst_u += 8; + dst_v += 8; + } +} + +void ARGBToUVRow_LSX(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + int len = width / 16; + const uint8_t* src_argb1 = src_argb0 + src_stride_argb; + + __m128i src0, src1, src2, src3, src4, src5, src6, src7; + __m128i vec0, vec1, vec2, vec3; + __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, dst0, dst1; + __m128i const_0x70 = {0x0038003800380038, 0x0038003800380038}; + __m128i const_0x4A = {0x0025002500250025, 0x0025002500250025}; + __m128i const_0x26 = {0x0013001300130013, 0x0013001300130013}; + __m128i const_0x5E = {0x002f002f002f002f, 0x002f002f002f002f}; + __m128i const_0x12 = {0x0009000900090009, 0x0009000900090009}; + __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, src_argb0, + 48, src0, src1, src2, src3); + DUP4_ARG2(__lsx_vld, src_argb1, 0, src_argb1, 16, src_argb1, 32, src_argb1, + 48, src4, src5, src6, src7); + vec0 = __lsx_vaddwev_h_bu(src0, src4); + vec1 = __lsx_vaddwev_h_bu(src1, src5); + vec2 = __lsx_vaddwev_h_bu(src2, src6); + vec3 = __lsx_vaddwev_h_bu(src3, src7); + tmp0 = __lsx_vpickev_h(vec1, vec0); + tmp1 = __lsx_vpickev_h(vec3, vec2); + tmp2 = __lsx_vpickod_h(vec1, vec0); + tmp3 = __lsx_vpickod_h(vec3, vec2); + vec0 = __lsx_vaddwod_h_bu(src0, src4); + vec1 = __lsx_vaddwod_h_bu(src1, src5); + vec2 = __lsx_vaddwod_h_bu(src2, src6); + vec3 = __lsx_vaddwod_h_bu(src3, src7); + tmp4 = __lsx_vpickev_h(vec1, vec0); + tmp5 = __lsx_vpickev_h(vec3, vec2); + vec0 = __lsx_vpickev_h(tmp1, tmp0); + vec1 = __lsx_vpickod_h(tmp1, tmp0); + src0 = __lsx_vavgr_h(vec0, vec1); + vec0 = __lsx_vpickev_h(tmp3, tmp2); + vec1 = __lsx_vpickod_h(tmp3, tmp2); + src1 = __lsx_vavgr_h(vec0, vec1); + vec0 = __lsx_vpickev_h(tmp5, tmp4); + vec1 = __lsx_vpickod_h(tmp5, tmp4); + src2 = __lsx_vavgr_h(vec0, vec1); + dst0 = __lsx_vmadd_h(const_0x8080, src0, const_0x70); + dst0 = __lsx_vmsub_h(dst0, src2, const_0x4A); + dst0 = __lsx_vmsub_h(dst0, src1, const_0x26); + dst1 = __lsx_vmadd_h(const_0x8080, src1, const_0x70); + dst1 = __lsx_vmsub_h(dst1, src2, const_0x5E); + dst1 = __lsx_vmsub_h(dst1, src0, const_0x12); + dst0 = __lsx_vsrai_h(dst0, 8); + dst1 = __lsx_vsrai_h(dst1, 8); + dst0 = __lsx_vpickev_b(dst1, dst0); + __lsx_vstelm_d(dst0, dst_u, 0, 0); + __lsx_vstelm_d(dst0, dst_v, 0, 1); + src_argb0 += 64; + src_argb1 += 64; + dst_u += 8; + dst_v += 8; + } +} + +void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { + int x; + int len = (width / 16) - 1; + __m128i src0, src1, src2, src3; + __m128i tmp0, tmp1, tmp2, tmp3; + __m128i shuf = {0x0908060504020100, 0x000000000E0D0C0A}; + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, + src0, src1, src2, src3); + tmp0 = __lsx_vshuf_b(src0, src0, shuf); + tmp1 = __lsx_vshuf_b(src1, src1, shuf); + tmp2 = __lsx_vshuf_b(src2, src2, shuf); + tmp3 = __lsx_vshuf_b(src3, src3, shuf); + __lsx_vst(tmp0, dst_rgb, 0); + __lsx_vst(tmp1, dst_rgb, 12); + __lsx_vst(tmp2, dst_rgb, 24); + __lsx_vst(tmp3, dst_rgb, 36); + dst_rgb += 48; + src_argb += 64; + } + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, + src0, src1, src2, src3); + tmp0 = __lsx_vshuf_b(src0, src0, shuf); + tmp1 = __lsx_vshuf_b(src1, src1, shuf); + tmp2 = __lsx_vshuf_b(src2, src2, shuf); + tmp3 = __lsx_vshuf_b(src3, src3, shuf); + __lsx_vst(tmp0, dst_rgb, 0); + __lsx_vst(tmp1, dst_rgb, 12); + __lsx_vst(tmp2, dst_rgb, 24); + dst_rgb += 36; + __lsx_vst(tmp3, dst_rgb, 0); +} + +void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { + int x; + int len = (width / 16) - 1; + __m128i src0, src1, src2, src3; + __m128i tmp0, tmp1, tmp2, tmp3; + __m128i shuf = {0x090A040506000102, 0x000000000C0D0E08}; + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, + src0, src1, src2, src3); + tmp0 = __lsx_vshuf_b(src0, src0, shuf); + tmp1 = __lsx_vshuf_b(src1, src1, shuf); + tmp2 = __lsx_vshuf_b(src2, src2, shuf); + tmp3 = __lsx_vshuf_b(src3, src3, shuf); + __lsx_vst(tmp0, dst_rgb, 0); + __lsx_vst(tmp1, dst_rgb, 12); + __lsx_vst(tmp2, dst_rgb, 24); + __lsx_vst(tmp3, dst_rgb, 36); + dst_rgb += 48; + src_argb += 64; + } + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, + src0, src1, src2, src3); + tmp0 = __lsx_vshuf_b(src0, src0, shuf); + tmp1 = __lsx_vshuf_b(src1, src1, shuf); + tmp2 = __lsx_vshuf_b(src2, src2, shuf); + tmp3 = __lsx_vshuf_b(src3, src3, shuf); + __lsx_vst(tmp0, dst_rgb, 0); + __lsx_vst(tmp1, dst_rgb, 12); + __lsx_vst(tmp2, dst_rgb, 24); + dst_rgb += 36; + __lsx_vst(tmp3, dst_rgb, 0); +} + +void ARGBToRGB565Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { + int x; + int len = width / 8; + __m128i zero = __lsx_vldi(0); + __m128i src0, src1, tmp0, tmp1, dst0; + __m128i shift = {0x0300030003000300, 0x0300030003000300}; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1); + tmp0 = __lsx_vpickev_b(src1, src0); + tmp1 = __lsx_vpickod_b(src1, src0); + tmp0 = __lsx_vsrli_b(tmp0, 3); + tmp1 = __lsx_vpackev_b(zero, tmp1); + tmp1 = __lsx_vsrli_h(tmp1, 2); + tmp0 = __lsx_vsll_b(tmp0, shift); + tmp1 = __lsx_vslli_h(tmp1, 5); + dst0 = __lsx_vor_v(tmp0, tmp1); + __lsx_vst(dst0, dst_rgb, 0); + dst_rgb += 16; + src_argb += 32; + } +} + +void ARGBToARGB1555Row_LSX(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { + int x; + int len = width / 8; + __m128i zero = __lsx_vldi(0); + __m128i src0, src1, tmp0, tmp1, tmp2, tmp3, dst0; + __m128i shift1 = {0x0703070307030703, 0x0703070307030703}; + __m128i shift2 = {0x0200020002000200, 0x0200020002000200}; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1); + tmp0 = __lsx_vpickev_b(src1, src0); + tmp1 = __lsx_vpickod_b(src1, src0); + tmp0 = __lsx_vsrli_b(tmp0, 3); + tmp1 = __lsx_vsrl_b(tmp1, shift1); + tmp0 = __lsx_vsll_b(tmp0, shift2); + tmp2 = __lsx_vpackev_b(zero, tmp1); + tmp3 = __lsx_vpackod_b(zero, tmp1); + tmp2 = __lsx_vslli_h(tmp2, 5); + tmp3 = __lsx_vslli_h(tmp3, 15); + dst0 = __lsx_vor_v(tmp0, tmp2); + dst0 = __lsx_vor_v(dst0, tmp3); + __lsx_vst(dst0, dst_rgb, 0); + dst_rgb += 16; + src_argb += 32; + } +} + +void ARGBToARGB4444Row_LSX(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { + int x; + int len = width / 8; + __m128i src0, src1, tmp0, tmp1, dst0; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1); + tmp0 = __lsx_vpickev_b(src1, src0); + tmp1 = __lsx_vpickod_b(src1, src0); + tmp1 = __lsx_vandi_b(tmp1, 0xF0); + tmp0 = __lsx_vsrli_b(tmp0, 4); + dst0 = __lsx_vor_v(tmp1, tmp0); + __lsx_vst(dst0, dst_rgb, 0); + dst_rgb += 16; + src_argb += 32; + } +} + +void ARGBToUV444Row_LSX(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int32_t width) { + int x; + int len = width / 16; + __m128i src0, src1, src2, src3; + __m128i tmp0, tmp1, tmp2, tmp3; + __m128i reg0, reg1, reg2, reg3, dst0, dst1; + __m128i const_112 = __lsx_vldi(112); + __m128i const_74 = __lsx_vldi(74); + __m128i const_38 = __lsx_vldi(38); + __m128i const_94 = __lsx_vldi(94); + __m128i const_18 = __lsx_vldi(18); + __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, + src0, src1, src2, src3); + tmp0 = __lsx_vpickev_h(src1, src0); + tmp1 = __lsx_vpickod_h(src1, src0); + tmp2 = __lsx_vpickev_h(src3, src2); + tmp3 = __lsx_vpickod_h(src3, src2); + reg0 = __lsx_vmaddwev_h_bu(const_0x8080, tmp0, const_112); + reg1 = __lsx_vmaddwev_h_bu(const_0x8080, tmp2, const_112); + reg2 = __lsx_vmulwod_h_bu(tmp0, const_74); + reg3 = __lsx_vmulwod_h_bu(tmp2, const_74); + reg2 = __lsx_vmaddwev_h_bu(reg2, tmp1, const_38); + reg3 = __lsx_vmaddwev_h_bu(reg3, tmp3, const_38); + reg0 = __lsx_vsub_h(reg0, reg2); + reg1 = __lsx_vsub_h(reg1, reg3); + reg0 = __lsx_vsrai_h(reg0, 8); + reg1 = __lsx_vsrai_h(reg1, 8); + dst0 = __lsx_vpickev_b(reg1, reg0); + + reg0 = __lsx_vmaddwev_h_bu(const_0x8080, tmp1, const_112); + reg1 = __lsx_vmaddwev_h_bu(const_0x8080, tmp3, const_112); + reg2 = __lsx_vmulwev_h_bu(tmp0, const_18); + reg3 = __lsx_vmulwev_h_bu(tmp2, const_18); + reg2 = __lsx_vmaddwod_h_bu(reg2, tmp0, const_94); + reg3 = __lsx_vmaddwod_h_bu(reg3, tmp2, const_94); + reg0 = __lsx_vsub_h(reg0, reg2); + reg1 = __lsx_vsub_h(reg1, reg3); + reg0 = __lsx_vsrai_h(reg0, 8); + reg1 = __lsx_vsrai_h(reg1, 8); + dst1 = __lsx_vpickev_b(reg1, reg0); + + __lsx_vst(dst0, dst_u, 0); + __lsx_vst(dst1, dst_v, 0); + dst_u += 16; + dst_v += 16; + src_argb += 64; + } +} + +void ARGBMultiplyRow_LSX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + int x; + int len = width / 4; + __m128i zero = __lsx_vldi(0); + __m128i src0, src1, dst0, dst1; + __m128i tmp0, tmp1, tmp2, tmp3; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1); + tmp0 = __lsx_vilvl_b(src0, src0); + tmp1 = __lsx_vilvh_b(src0, src0); + tmp2 = __lsx_vilvl_b(zero, src1); + tmp3 = __lsx_vilvh_b(zero, src1); + dst0 = __lsx_vmuh_hu(tmp0, tmp2); + dst1 = __lsx_vmuh_hu(tmp1, tmp3); + dst0 = __lsx_vpickev_b(dst1, dst0); + __lsx_vst(dst0, dst_argb, 0); + src_argb0 += 16; + src_argb1 += 16; + dst_argb += 16; + } +} + +void ARGBAddRow_LSX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + int x; + int len = width / 4; + __m128i src0, src1, dst0; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1); + dst0 = __lsx_vsadd_bu(src0, src1); + __lsx_vst(dst0, dst_argb, 0); + src_argb0 += 16; + src_argb1 += 16; + dst_argb += 16; + } +} + +void ARGBSubtractRow_LSX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + int x; + int len = width / 4; + __m128i src0, src1, dst0; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1); + dst0 = __lsx_vssub_bu(src0, src1); + __lsx_vst(dst0, dst_argb, 0); + src_argb0 += 16; + src_argb1 += 16; + dst_argb += 16; + } +} + +void ARGBAttenuateRow_LSX(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + int x; + int len = width / 8; + __m128i src0, src1, tmp0, tmp1; + __m128i reg0, reg1, reg2, reg3, reg4, reg5; + __m128i b, g, r, a, dst0, dst1; + __m128i control = {0x0005000100040000, 0x0007000300060002}; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1); + tmp0 = __lsx_vpickev_b(src1, src0); + tmp1 = __lsx_vpickod_b(src1, src0); + b = __lsx_vpackev_b(tmp0, tmp0); + r = __lsx_vpackod_b(tmp0, tmp0); + g = __lsx_vpackev_b(tmp1, tmp1); + a = __lsx_vpackod_b(tmp1, tmp1); + reg0 = __lsx_vmulwev_w_hu(b, a); + reg1 = __lsx_vmulwod_w_hu(b, a); + reg2 = __lsx_vmulwev_w_hu(r, a); + reg3 = __lsx_vmulwod_w_hu(r, a); + reg4 = __lsx_vmulwev_w_hu(g, a); + reg5 = __lsx_vmulwod_w_hu(g, a); + reg0 = __lsx_vssrani_h_w(reg1, reg0, 24); + reg2 = __lsx_vssrani_h_w(reg3, reg2, 24); + reg4 = __lsx_vssrani_h_w(reg5, reg4, 24); + reg0 = __lsx_vshuf_h(control, reg0, reg0); + reg2 = __lsx_vshuf_h(control, reg2, reg2); + reg4 = __lsx_vshuf_h(control, reg4, reg4); + tmp0 = __lsx_vpackev_b(reg4, reg0); + tmp1 = __lsx_vpackev_b(a, reg2); + dst0 = __lsx_vilvl_h(tmp1, tmp0); + dst1 = __lsx_vilvh_h(tmp1, tmp0); + __lsx_vst(dst0, dst_argb, 0); + __lsx_vst(dst1, dst_argb, 16); + dst_argb += 32; + src_argb += 32; + } +} + +void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb, + uint8_t* dst_rgb, + uint32_t dither4, + int width) { + int x; + int len = width / 8; + __m128i src0, src1, tmp0, tmp1, dst0; + __m128i b, g, r; + __m128i zero = __lsx_vldi(0); + __m128i vec_dither = __lsx_vldrepl_w(&dither4, 0); + + vec_dither = __lsx_vilvl_b(zero, vec_dither); + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1); + tmp0 = __lsx_vpickev_b(src1, src0); + tmp1 = __lsx_vpickod_b(src1, src0); + b = __lsx_vpackev_b(zero, tmp0); + r = __lsx_vpackod_b(zero, tmp0); + g = __lsx_vpackev_b(zero, tmp1); + b = __lsx_vadd_h(b, vec_dither); + g = __lsx_vadd_h(g, vec_dither); + r = __lsx_vadd_h(r, vec_dither); + DUP2_ARG1(__lsx_vclip255_h, b, g, b, g); + r = __lsx_vclip255_h(r); + b = __lsx_vsrai_h(b, 3); + g = __lsx_vsrai_h(g, 2); + r = __lsx_vsrai_h(r, 3); + g = __lsx_vslli_h(g, 5); + r = __lsx_vslli_h(r, 11); + dst0 = __lsx_vor_v(b, g); + dst0 = __lsx_vor_v(dst0, r); + __lsx_vst(dst0, dst_rgb, 0); + src_argb += 32; + dst_rgb += 16; + } +} + +void ARGBShuffleRow_LSX(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { + int x; + int len = width / 8; + __m128i src0, src1, dst0, dst1; + __m128i shuf = {0x0404040400000000, 0x0C0C0C0C08080808}; + __m128i temp = __lsx_vldrepl_w(shuffler, 0); + + shuf = __lsx_vadd_b(shuf, temp); + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1); + dst0 = __lsx_vshuf_b(src0, src0, shuf); + dst1 = __lsx_vshuf_b(src1, src1, shuf); + __lsx_vst(dst0, dst_argb, 0); + __lsx_vst(dst1, dst_argb, 16); + src_argb += 32; + dst_argb += 32; + } +} + +void ARGBShadeRow_LSX(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value) { + int x; + int len = width / 4; + __m128i src0, dst0, tmp0, tmp1; + __m128i vec_value = __lsx_vreplgr2vr_w(value); + + vec_value = __lsx_vilvl_b(vec_value, vec_value); + for (x = 0; x < len; x++) { + src0 = __lsx_vld(src_argb, 0); + tmp0 = __lsx_vilvl_b(src0, src0); + tmp1 = __lsx_vilvh_b(src0, src0); + tmp0 = __lsx_vmuh_hu(tmp0, vec_value); + tmp1 = __lsx_vmuh_hu(tmp1, vec_value); + dst0 = __lsx_vpickod_b(tmp1, tmp0); + __lsx_vst(dst0, dst_argb, 0); + src_argb += 16; + dst_argb += 16; + } +} + +void ARGBGrayRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width) { + int x; + int len = width / 8; + __m128i src0, src1, tmp0, tmp1; + __m128i reg0, reg1, reg2, dst0, dst1; + __m128i const_128 = __lsx_vldi(0x480); + __m128i const_150 = __lsx_vldi(0x96); + __m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D}; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1); + tmp0 = __lsx_vpickev_b(src1, src0); + tmp1 = __lsx_vpickod_b(src1, src0); + reg0 = __lsx_vdp2_h_bu(tmp0, const_br); + reg1 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150); + reg2 = __lsx_vadd_h(reg0, reg1); + tmp0 = __lsx_vpackod_b(reg2, reg2); + tmp1 = __lsx_vpackod_b(tmp1, reg2); + dst0 = __lsx_vilvl_h(tmp1, tmp0); + dst1 = __lsx_vilvh_h(tmp1, tmp0); + __lsx_vst(dst0, dst_argb, 0); + __lsx_vst(dst1, dst_argb, 16); + src_argb += 32; + dst_argb += 32; + } +} + +void ARGBSepiaRow_LSX(uint8_t* dst_argb, int width) { + int x; + int len = width / 8; + __m128i src0, src1, tmp0, tmp1; + __m128i reg0, reg1, spb, spg, spr; + __m128i dst0, dst1; + __m128i spb_g = __lsx_vldi(68); + __m128i spg_g = __lsx_vldi(88); + __m128i spr_g = __lsx_vldi(98); + __m128i spb_br = {0x2311231123112311, 0x2311231123112311}; + __m128i spg_br = {0x2D162D162D162D16, 0x2D162D162D162D16}; + __m128i spr_br = {0x3218321832183218, 0x3218321832183218}; + __m128i shuff = {0x1706150413021100, 0x1F0E1D0C1B0A1908}; + + for (x = 0; x < len; x++) { + DUP2_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, src0, src1); + tmp0 = __lsx_vpickev_b(src1, src0); + tmp1 = __lsx_vpickod_b(src1, src0); + DUP2_ARG2(__lsx_vdp2_h_bu, tmp0, spb_br, tmp0, spg_br, spb, spg); + spr = __lsx_vdp2_h_bu(tmp0, spr_br); + spb = __lsx_vmaddwev_h_bu(spb, tmp1, spb_g); + spg = __lsx_vmaddwev_h_bu(spg, tmp1, spg_g); + spr = __lsx_vmaddwev_h_bu(spr, tmp1, spr_g); + spb = __lsx_vsrli_h(spb, 7); + spg = __lsx_vsrli_h(spg, 7); + spr = __lsx_vsrli_h(spr, 7); + spg = __lsx_vsat_hu(spg, 7); + spr = __lsx_vsat_hu(spr, 7); + reg0 = __lsx_vpackev_b(spg, spb); + reg1 = __lsx_vshuf_b(tmp1, spr, shuff); + dst0 = __lsx_vilvl_h(reg1, reg0); + dst1 = __lsx_vilvh_h(reg1, reg0); + __lsx_vst(dst0, dst_argb, 0); + __lsx_vst(dst1, dst_argb, 16); + dst_argb += 32; + } +} + void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) { @@ -407,7 +1533,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0, @@ -516,7 +1642,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0, @@ -561,39 +1687,6 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, } } -void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { - int x; - int len = width / 16; - __m128i src0, src1, src2; - __m128i tmp0, tmp1, tmp2, tmp3; - __m128i reg0, reg1, dst0; - __m128i const_129 = __lsx_vldi(129); - __m128i const_br = {0x4219421942194219, 0x4219421942194219}; - __m128i const_1080 = {0x1080108010801080, 0x1080108010801080}; - __m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C}; - __m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604}; - __m128i shuff2 = {0x000A000700040001, 0x001600130010000D}; - __m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005}; - - for (x = 0; x < len; x++) { - src0 = __lsx_vld(src_rgb24, 0); - src1 = __lsx_vld(src_rgb24, 16); - src2 = __lsx_vld(src_rgb24, 32); - tmp0 = __lsx_vshuf_b(src1, src0, shuff0); - tmp1 = __lsx_vshuf_b(src1, src2, shuff1); - tmp2 = __lsx_vshuf_b(src1, src0, shuff2); - tmp3 = __lsx_vshuf_b(src1, src2, shuff3); - reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129); - reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129); - reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1); - dst0 = __lsx_vpickod_b(reg1, reg0); - __lsx_vst(dst0, dst_y, 0); - dst_y += 16; - src_rgb24 += 48; - } -} - void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, @@ -610,7 +1703,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18}; __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908}; __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; @@ -647,39 +1740,6 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, } } -void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) { - int x; - int len = width / 16; - __m128i src0, src1, src2; - __m128i tmp0, tmp1, tmp2, tmp3; - __m128i reg0, reg1, dst0; - __m128i const_129 = __lsx_vldi(129); - __m128i const_br = {0x1942194219421942, 0x1942194219421942}; - __m128i const_1080 = {0x1080108010801080, 0x1080108010801080}; - __m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C}; - __m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604}; - __m128i shuff2 = {0x000A000700040001, 0x001600130010000D}; - __m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005}; - - for (x = 0; x < len; x++) { - src0 = __lsx_vld(src_raw, 0); - src1 = __lsx_vld(src_raw, 16); - src2 = __lsx_vld(src_raw, 32); - tmp0 = __lsx_vshuf_b(src1, src0, shuff0); - tmp1 = __lsx_vshuf_b(src1, src2, shuff1); - tmp2 = __lsx_vshuf_b(src1, src0, shuff2); - tmp3 = __lsx_vshuf_b(src1, src2, shuff3); - reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129); - reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129); - reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); - __lsx_vst(dst0, dst_y, 0); - dst_y += 16; - src_raw += 48; - } -} - void RAWToUVRow_LSX(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, @@ -696,7 +1756,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18}; __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908}; __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; @@ -914,62 +1974,6 @@ void SobelXYRow_LSX(const uint8_t* src_sobelx, } } -void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - int len = width / 16; - __m128i src0, src1, src2, src3, dst0; - __m128i tmp0, tmp1, tmp2, tmp3; - __m128i reg0, reg1; - __m128i const_128 = __lsx_vldi(0x480); - __m128i const_150 = __lsx_vldi(0x96); - __m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D}; - - for (x = 0; x < len; x++) { - DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, - src0, src1, src2, src3); - tmp0 = __lsx_vpickev_b(src1, src0); - tmp1 = __lsx_vpickod_b(src1, src0); - tmp2 = __lsx_vpickev_b(src3, src2); - tmp3 = __lsx_vpickod_b(src3, src2); - reg0 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150); - reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150); - reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2); - dst0 = __lsx_vpickod_b(reg1, reg0); - __lsx_vst(dst0, dst_y, 0); - dst_y += 16; - src_argb += 64; - } -} - -void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) { - int x; - int len = width / 16; - __m128i src0, src1, src2, src3, dst0; - __m128i tmp0, tmp1, tmp2, tmp3; - __m128i reg0, reg1; - __m128i const_129 = __lsx_vldi(0x81); - __m128i const_br = {0x1942194219421942, 0x1942194219421942}; - __m128i const_1080 = {0x1080108010801080, 0x1080108010801080}; - - for (x = 0; x < len; x++) { - DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48, - src0, src1, src2, src3); - tmp0 = __lsx_vpickod_b(src1, src0); - tmp1 = __lsx_vpickev_b(src1, src0); - tmp2 = __lsx_vpickod_b(src3, src2); - tmp3 = __lsx_vpickev_b(src3, src2); - reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129); - reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129); - reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); - __lsx_vst(dst0, dst_y, 0); - dst_y += 16; - src_bgra += 64; - } -} - void BGRAToUVRow_LSX(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_u, @@ -987,7 +1991,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48, @@ -1018,34 +2022,6 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra, } } -void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) { - int x; - int len = width / 16; - __m128i src0, src1, src2, src3, dst0; - __m128i tmp0, tmp1, tmp2, tmp3; - __m128i reg0, reg1; - __m128i const_129 = __lsx_vldi(0x81); - __m128i const_br = {0x1942194219421942, 0x1942194219421942}; - __m128i const_1080 = {0x1080108010801080, 0x1080108010801080}; - - for (x = 0; x < len; x++) { - DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48, - src0, src1, src2, src3); - tmp0 = __lsx_vpickev_b(src1, src0); - tmp1 = __lsx_vpickod_b(src1, src0); - tmp2 = __lsx_vpickev_b(src3, src2); - tmp3 = __lsx_vpickod_b(src3, src2); - reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp1, const_129); - reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129); - reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); - __lsx_vst(dst0, dst_y, 0); - dst_y += 16; - src_abgr += 64; - } -} - void ABGRToUVRow_LSX(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_u, @@ -1063,7 +2039,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48, @@ -1094,34 +2070,6 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr, } } -void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) { - int x; - int len = width / 16; - __m128i src0, src1, src2, src3, dst0; - __m128i tmp0, tmp1, tmp2, tmp3; - __m128i reg0, reg1; - __m128i const_129 = __lsx_vldi(0x81); - __m128i const_br = {0x4219421942194219, 0x4219421942194219}; - __m128i const_1080 = {0x1080108010801080, 0x1080108010801080}; - - for (x = 0; x < len; x++) { - DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48, - src0, src1, src2, src3); - tmp0 = __lsx_vpickod_b(src1, src0); - tmp1 = __lsx_vpickev_b(src1, src0); - tmp2 = __lsx_vpickod_b(src3, src2); - tmp3 = __lsx_vpickev_b(src3, src2); - reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129); - reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129); - reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0); - reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2); - dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8); - __lsx_vst(dst0, dst_y, 0); - dst_y += 16; - src_rgba += 64; - } -} - void RGBAToUVRow_LSX(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_u, @@ -1139,7 +2087,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba, __m128i const_38 = __lsx_vldi(0x413); __m128i const_94 = __lsx_vldi(0x42F); __m128i const_18 = __lsx_vldi(0x409); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48, @@ -1188,7 +2136,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb, __m128i const_21 = __lsx_vldi(0x415); __m128i const_53 = __lsx_vldi(0x435); __m128i const_10 = __lsx_vldi(0x40A); - __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, @@ -1566,7 +2514,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb, __m128i const_256 = __lsx_vldi(0x500); __m128i zero = __lsx_vldi(0); __m128i alpha = __lsx_vldi(0xFF); - __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; + __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16, @@ -1612,7 +2560,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb, __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset); __m128i vec_scale = __lsx_vreplgr2vr_w(scale); __m128i zero = __lsx_vldi(0); - __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; + __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000}; for (x = 0; x < len; x++) { DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48, @@ -1821,6 +2769,216 @@ void HalfFloatRow_LSX(const uint16_t* src, } } +struct RgbConstants { + uint8_t kRGBToY[4]; + uint16_t kAddY; + uint16_t pad; +}; + +// RGB to JPeg coefficients +// B * 0.1140 coefficient = 29 +// G * 0.5870 coefficient = 150 +// R * 0.2990 coefficient = 77 +// Add 0.5 = 0x80 +static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, + 128, + 0}; + +static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; + +// RGB to BT.601 coefficients +// B * 0.1016 coefficient = 25 +// G * 0.5078 coefficient = 129 +// R * 0.2578 coefficient = 66 +// Add 16.5 = 0x1080 + +static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + 0x1080, + 0}; + +static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, + 0x1080, + 0}; + +// ARGB expects first 3 values to contain RGB and 4th value is ignored. +static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + asm volatile( + "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants + "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants + "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants + "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants + "1: \n\t" + "vld $vr4, %0, 0 \n\t" + "vld $vr5, %0, 16 \n\t" + "vld $vr6, %0, 32 \n\t" + "vld $vr7, %0, 48 \n\t" // load 16 pixels of + // ARGB + "vor.v $vr12, $vr3, $vr3 \n\t" + "vor.v $vr13, $vr3, $vr3 \n\t" + "addi.d %2, %2, -16 \n\t" // 16 processed per + // loop. + "vpickev.b $vr8, $vr5, $vr4 \n\t" // BR + "vpickev.b $vr10, $vr7, $vr6 \n\t" + "vpickod.b $vr9, $vr5, $vr4 \n\t" // GA + "vpickod.b $vr11, $vr7, $vr6 \n\t" + "vmaddwev.h.bu $vr12, $vr8, $vr0 \n\t" // B + "vmaddwev.h.bu $vr13, $vr10, $vr0 \n\t" + "vmaddwev.h.bu $vr12, $vr9, $vr1 \n\t" // G + "vmaddwev.h.bu $vr13, $vr11, $vr1 \n\t" + "vmaddwod.h.bu $vr12, $vr8, $vr2 \n\t" // R + "vmaddwod.h.bu $vr13, $vr10, $vr2 \n\t" + "addi.d %0, %0, 64 \n\t" + "vpickod.b $vr10, $vr13, $vr12 \n\t" + "vst $vr10, %1, 0 \n\t" + "addi.d %1, %1, 16 \n\t" + "bnez %2, 1b \n\t" + : "+&r"(src_argb), // %0 + "+&r"(dst_y), // %1 + "+&r"(width) // %2 + : "r"(rgbconstants) + : "memory"); +} + +void ARGBToYRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) { + ARGBToYMatrixRow_LSX(src_argb, dst_y, width, &kRgb24I601Constants); +} + +void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_LSX(src_argb, dst_yj, width, &kRgb24JPEGConstants); +} + +void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + ARGBToYMatrixRow_LSX(src_abgr, dst_y, width, &kRawI601Constants); +} + +void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_LSX(src_abgr, dst_yj, width, &kRawJPEGConstants); +} + +// RGBA expects first value to be A and ignored, then 3 values to contain RGB. +// Same code as ARGB, except the LD4 +static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + asm volatile( + "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants + "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants + "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants + "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants + "1: \n\t" + "vld $vr4, %0, 0 \n\t" + "vld $vr5, %0, 16 \n\t" + "vld $vr6, %0, 32 \n\t" + "vld $vr7, %0, 48 \n\t" // load 16 pixels of + // RGBA + "vor.v $vr12, $vr3, $vr3 \n\t" + "vor.v $vr13, $vr3, $vr3 \n\t" + "addi.d %2, %2, -16 \n\t" // 16 processed per + // loop. + "vpickev.b $vr8, $vr5, $vr4 \n\t" // AG + "vpickev.b $vr10, $vr7, $vr6 \n\t" + "vpickod.b $vr9, $vr5, $vr4 \n\t" // BR + "vpickod.b $vr11, $vr7, $vr6 \n\t" + "vmaddwev.h.bu $vr12, $vr9, $vr0 \n\t" // B + "vmaddwev.h.bu $vr13, $vr11, $vr0 \n\t" + "vmaddwod.h.bu $vr12, $vr8, $vr1 \n\t" // G + "vmaddwod.h.bu $vr13, $vr10, $vr1 \n\t" + "vmaddwod.h.bu $vr12, $vr9, $vr2 \n\t" // R + "vmaddwod.h.bu $vr13, $vr11, $vr2 \n\t" + "addi.d %0, %0, 64 \n\t" + "vpickod.b $vr10, $vr13, $vr12 \n\t" + "vst $vr10, %1, 0 \n\t" + "addi.d %1, %1, 16 \n\t" + "bnez %2, 1b \n\t" + : "+&r"(src_rgba), // %0 + "+&r"(dst_y), // %1 + "+&r"(width) // %2 + : "r"(rgbconstants) + : "memory"); +} + +void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) { + RGBAToYMatrixRow_LSX(src_rgba, dst_y, width, &kRgb24I601Constants); +} + +void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { + RGBAToYMatrixRow_LSX(src_rgba, dst_yj, width, &kRgb24JPEGConstants); +} + +void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) { + RGBAToYMatrixRow_LSX(src_bgra, dst_y, width, &kRawI601Constants); +} + +static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + int8_t shuff[64] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, + 20, 21, 23, 24, 26, 27, 29, 30, 0, 1, 3, 4, 6, + 7, 9, 10, 12, 13, 15, 1, 0, 4, 0, 7, 0, 10, + 0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28, 0, + 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0}; + asm volatile( + "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants + "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants + "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants + "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants + "vld $vr4, %4, 0 \n\t" // load shuff + "vld $vr5, %4, 16 \n\t" + "vld $vr6, %4, 32 \n\t" + "vld $vr7, %4, 48 \n\t" + "1: \n\t" + "vld $vr8, %0, 0 \n\t" + "vld $vr9, %0, 16 \n\t" + "vld $vr10, %0, 32 \n\t" // load 16 pixels of + // RGB + "vor.v $vr12, $vr3, $vr3 \n\t" + "vor.v $vr13, $vr3, $vr3 \n\t" + "addi.d %2, %2, -16 \n\t" // 16 processed per + // loop. + "vshuf.b $vr14, $vr9, $vr8, $vr4 \n\t" + "vshuf.b $vr15, $vr9, $vr10, $vr5 \n\t" + "vshuf.b $vr16, $vr9, $vr8, $vr6 \n\t" + "vshuf.b $vr17, $vr9, $vr10, $vr7 \n\t" + "vmaddwev.h.bu $vr12, $vr16, $vr1 \n\t" // G + "vmaddwev.h.bu $vr13, $vr17, $vr1 \n\t" + "vmaddwev.h.bu $vr12, $vr14, $vr0 \n\t" // B + "vmaddwev.h.bu $vr13, $vr15, $vr0 \n\t" + "vmaddwod.h.bu $vr12, $vr14, $vr2 \n\t" // R + "vmaddwod.h.bu $vr13, $vr15, $vr2 \n\t" + "addi.d %0, %0, 48 \n\t" + "vpickod.b $vr10, $vr13, $vr12 \n\t" + "vst $vr10, %1, 0 \n\t" + "addi.d %1, %1, 16 \n\t" + "bnez %2, 1b \n\t" + : "+&r"(src_rgba), // %0 + "+&r"(dst_y), // %1 + "+&r"(width) // %2 + : "r"(rgbconstants), // %3 + "r"(shuff) // %4 + : "memory"); +} + +void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) { + RGBToYMatrixRow_LSX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants); +} + +void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width) { + RGBToYMatrixRow_LSX(src_raw, dst_yj, width, &kRawJPEGConstants); +} + +void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { + RGBToYMatrixRow_LSX(src_rgb24, dst_y, width, &kRgb24I601Constants); +} + +void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) { + RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants); +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/source/row_msa.cc b/source/row_msa.cc index b7d5bb5e..b7d5bb5e 100644 --- a/files/source/row_msa.cc +++ b/source/row_msa.cc diff --git a/files/source/row_neon.cc b/source/row_neon.cc index 804ff839..31142a90 100644 --- a/files/source/row_neon.cc +++ b/source/row_neon.cc @@ -89,12 +89,14 @@ extern "C" { "vsli.u16 d2, d2, #8 \n" \ "vsri.u16 d3, d3, #8 \n" +// TODO: Use single register for kUVCoeff and multiply by lane #define YUVTORGB_SETUP \ + "vld1.16 {d31}, [%[kRGBCoeffBias]] \n" \ "vld4.8 {d26[], d27[], d28[], d29[]}, [%[kUVCoeff]] \n" \ - "vld1.16 {d31[]}, [%[kRGBCoeffBias]]! \n" \ - "vld1.16 {d20[], d21[]}, [%[kRGBCoeffBias]]! \n" \ - "vld1.16 {d22[], d23[]}, [%[kRGBCoeffBias]]! \n" \ - "vld1.16 {d24[], d25[]}, [%[kRGBCoeffBias]] \n" + "vdup.u16 q10, d31[1] \n" \ + "vdup.u16 q11, d31[2] \n" \ + "vdup.u16 q12, d31[3] \n" \ + "vdup.u16 d31, d31[0] \n" // q0: B uint16x8_t // q1: G uint16x8_t @@ -156,6 +158,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y, : "cc", "memory", YUVTORGB_REGS, "d6"); } +void I444ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV444 YUVTORGB + RGBTORGB8 + "subs %[width], %[width], #8 \n" + "vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n" + "bgt 1b \n" + : [src_y] "+r"(src_y), // %[src_y] + [src_u] "+r"(src_u), // %[src_u] + [src_v] "+r"(src_v), // %[src_v] + [dst_rgb24] "+r"(dst_rgb24), // %[dst_argb] + [width] "+r"(width) // %[width] + : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] + [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] + : "cc", "memory", YUVTORGB_REGS); +} + void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -586,10 +611,10 @@ void DetileRow_NEON(const uint8_t* src, int width) { asm volatile( "1: \n" - "vld1.16 {q0}, [%0], %3 \n" // load 16 bytes + "vld1.8 {q0}, [%0], %3 \n" // load 16 bytes "subs %2, %2, #16 \n" // 16 processed per loop - "pld [%0, 1792] \n" - "vst1.16 {q0}, [%1]! \n" // store 16 bytes + "pld [%0, #1792] \n" + "vst1.8 {q0}, [%1]! \n" // store 16 bytes "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 @@ -599,6 +624,26 @@ void DetileRow_NEON(const uint8_t* src, ); } +// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's. +void DetileRow_16_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + asm volatile( + "1: \n" + "vld1.16 {q0, q1}, [%0], %3 \n" // load 16 pixels + "subs %2, %2, #16 \n" // 16 processed per loop + "pld [%0, #3584] \n" + "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"(src_tile_stride * 2) // %3 + : "cc", "memory", "q0", "q1" // Clobber List + ); +} + // Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V. void DetileSplitUVRow_NEON(const uint8_t* src_uv, ptrdiff_t src_tile_stride, @@ -609,7 +654,7 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv, "1: \n" "vld2.8 {d0, d1}, [%0], %4 \n" "subs %3, %3, #16 \n" - "pld [%0, 1792] \n" + "pld [%0, #1792] \n" "vst1.8 {d0}, [%1]! \n" "vst1.8 {d1}, [%2]! \n" "bgt 1b \n" @@ -622,6 +667,101 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv, ); } +#if LIBYUV_USE_ST2 +// Read 16 Y, 8 UV, and write 8 YUYV. +void DetileToYUY2_NEON(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { + asm volatile( + "1: \n" + "vld1.8 {q0}, [%0], %4 \n" // Load 16 Y + "pld [%0, #1792] \n" + "vld1.8 {q1}, [%1], %5 \n" // Load 8 UV + "pld [%1, #1792] \n" + "subs %3, %3, #16 \n" + "vst2.8 {q0, q1}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_yuy2), // %2 + "+r"(width) // %3 + : "r"(src_y_tile_stride), // %4 + "r"(src_uv_tile_stride) // %5 + : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber list + ); +} +#else +// Read 16 Y, 8 UV, and write 8 YUYV. +void DetileToYUY2_NEON(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { + asm volatile( + "1: \n" + "vld1.8 {q0}, [%0], %4 \n" // Load 16 Y + "vld1.8 {q1}, [%1], %5 \n" // Load 8 UV + "subs %3, %3, #16 \n" + "pld [%0, #1792] \n" + "vzip.8 q0, q1 \n" + "pld [%1, #1792] \n" + "vst1.8 {q0, q1}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_yuy2), // %2 + "+r"(width) // %3 + : "r"(src_y_tile_stride), // %4 + "r"(src_uv_tile_stride) // %5 + : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber list + ); +} +#endif + +void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) { + asm volatile( + "1: \n" + "vld1.8 {q14}, [%0]! \n" // Load lower bits. + "vld1.8 {q9}, [%0]! \n" // Load upper bits row + // by row. + "vld1.8 {q11}, [%0]! \n" + "vld1.8 {q13}, [%0]! \n" + "vld1.8 {q15}, [%0]! \n" + "vshl.u8 q8, q14, #6 \n" // Shift lower bit data + // appropriately. + "vshl.u8 q10, q14, #4 \n" + "vshl.u8 q12, q14, #2 \n" + "vzip.u8 q8, q9 \n" // Interleave upper and + // lower bits. + "vzip.u8 q10, q11 \n" + "vzip.u8 q12, q13 \n" + "vzip.u8 q14, q15 \n" + "vsri.u16 q8, q8, #10 \n" // Copy upper 6 bits + // into lower 6 bits for + // better accuracy in + // conversions. + "vsri.u16 q9, q9, #10 \n" + "vsri.u16 q10, q10, #10 \n" + "vsri.u16 q11, q11, #10 \n" + "vsri.u16 q12, q12, #10 \n" + "vsri.u16 q13, q13, #10 \n" + "vsri.u16 q14, q14, #10 \n" + "vsri.u16 q15, q15, #10 \n" + "vstmia %1!, {q8-q15} \n" // Store pixel block (64 + // pixels). + "subs %2, %2, #80 \n" + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(size) // %2 + : + : "cc", "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); +} + // Reads 16 U's and V's and writes out 16 pairs of UV. void MergeUVRow_NEON(const uint8_t* src_u, const uint8_t* src_v, @@ -664,7 +804,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb, "+r"(dst_b), // %3 "+r"(width) // %4 : // Input registers - : "cc", "memory", "d0", "d1", "d2" // Clobber List + : "cc", "memory", "q0", "q1", "q2" // Clobber List ); } @@ -1505,6 +1645,29 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy, ); } +void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width) { + asm volatile( + "add %1, %0, %1 \n" // stride + src_yuy2 + "1: \n" + "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. + "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. + "vld2.8 {q2, q3}, [%1]! \n" // load next row YUY2. + "vrhadd.u8 q4, q1, q3 \n" // average rows of UV + "vst1.8 {q4}, [%2]! \n" // store 8 UV. + "bgt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(stride_yuy2), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", + "d7" // Clobber List + ); +} + // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, @@ -1590,7 +1753,7 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb, void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { asm volatile( "vdup.32 d7, %2 \n" // dither4 @@ -1664,19 +1827,27 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, ); } +struct RgbUVConstants { + uint8_t kRGBToU[4]; + uint8_t kRGBToV[4]; +}; + // 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - asm volatile( - "vmov.u8 d24, #112 \n" // UB / VR 0.875 - // coefficient - "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient - "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient - "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient - "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient +void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct RgbUVConstants* rgbuvconstants) { + asm volatile( + + "vld1.8 {d0}, [%4] \n" // load rgbuvconstants + "vdup.u8 d24, d0[0] \n" // UB 0.875 coefficient + "vdup.u8 d25, d0[1] \n" // UG -0.5781 coefficient + "vdup.u8 d26, d0[2] \n" // UR -0.2969 coefficient + "vdup.u8 d27, d0[4] \n" // VB -0.1406 coefficient + "vdup.u8 d28, d0[5] \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 + "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %3, %3, #8 \n" // 8 processed per loop. @@ -1694,15 +1865,53 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb, "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"(rgbuvconstants) // %4 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"); } +// RGB to bt601 coefficients +// UB 0.875 coefficient = 112 +// UG -0.5781 coefficient = 74 +// UR -0.2969 coefficient = 38 +// VB -0.1406 coefficient = 18 +// VG -0.7344 coefficient = 94 +// VR 0.875 coefficient = 112 (ignored) + +static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0}, + {18, 94, 112, 0}}; + +// RGB to JPeg coefficients +// UB coeff 0.500 = 127 +// UG coeff -0.33126 = 84 +// UR coeff -0.16874 = 43 +// VB coeff -0.08131 = 20 +// VG coeff -0.41869 = 107 +// VR coeff 0.500 = 127 (ignored) + +static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0}, + {20, 107, 127, 0}}; + +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kRgb24I601UVConstants); +} + +void ARGBToUVJ444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kRgb24JPegUVConstants); +} + // clang-format off // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. #define RGBTOUV(QB, QG, QR) \ @@ -1762,7 +1971,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb, ); } -// TODO(fbarchard): Subsample match C code. +// TODO(fbarchard): Subsample match Intel code. void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -1808,6 +2017,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb, ); } +void ABGRToUVJRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_uj, + uint8_t* dst_vj, + int width) { + asm volatile ( + "add %1, %0, %1 \n" // src_stride + src_argb + "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient + "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient + "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient + "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient + "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient + "vmov.u16 q15, #0x8080 \n" // 128.5 + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. + "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. + "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. + "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. + "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. + "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. + "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. + "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. + "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. + + "vrshr.u16 q0, q0, #1 \n" // 2x average + "vrshr.u16 q1, q1, #1 \n" + "vrshr.u16 q2, q2, #1 \n" + + "subs %4, %4, #16 \n" // 16 processed per loop. + RGBTOUV(q2, q1, q0) + "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. + "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. + "bgt 1b \n" + : "+r"(src_abgr), // %0 + "+r"(src_stride_abgr), // %1 + "+r"(dst_uj), // %2 + "+r"(dst_vj), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); +} + // TODO(fbarchard): Subsample match C code. void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, @@ -2494,7 +2748,6 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64, struct RgbConstants { uint8_t kRGBToY[4]; uint16_t kAddY; - uint16_t pad; }; // RGB to JPeg coefficients @@ -2502,11 +2755,9 @@ struct RgbConstants { // G * 0.5870 coefficient = 150 // R * 0.2990 coefficient = 77 // Add 0.5 = 0x80 -static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, - 128, - 0}; +static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128}; -static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; +static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128}; // RGB to BT.601 coefficients // B * 0.1016 coefficient = 25 @@ -2515,12 +2766,9 @@ static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; // Add 16.5 = 0x1080 static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, - 0x1080, - 0}; + 0x1080}; -static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, - 0x1080, - 0}; +static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080}; // ARGB expects first 3 values to contain RGB and 4th value is ignored. void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, @@ -2567,6 +2815,10 @@ void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants); } +void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants); +} + // RGBA expects first value to be A and ignored, then 3 values to contain RGB. // Same code as ARGB, except the LD4 void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba, @@ -2846,6 +3098,8 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( + "vmov.u16 q15, #0x00ff \n" // 255 for rounding up + // Attenuate 8 pixels. "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. @@ -2853,16 +3107,16 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, "vmull.u8 q10, d0, d3 \n" // b * a "vmull.u8 q11, d1, d3 \n" // g * a "vmull.u8 q12, d2, d3 \n" // r * a - "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 - "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 - "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 + "vaddhn.u16 d0, q10, q15 \n" // (b + 255) >> 8 + "vaddhn.u16 d1, q11, q15 \n" // (g + 255) >> 8 + "vaddhn.u16 d2, q12, q15 \n" // (r + 255) >> 8 "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : - : "cc", "memory", "q0", "q1", "q10", "q11", "q12"); + : "cc", "memory", "q0", "q1", "q10", "q11", "q12", "q15"); } // Quantize 8 ARGB pixels (32 bytes). @@ -3633,7 +3887,7 @@ void SplitUVRow_16_NEON(const uint16_t* src_uv, "+r"(dst_v), // %2 "+r"(width) // %3 : "r"(shift) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", "q4"); + : "cc", "memory", "q0", "q1", "q2"); } void MergeUVRow_16_NEON(const uint16_t* src_u, @@ -3687,31 +3941,25 @@ void DivideRow_16_NEON(const uint16_t* src_y, int scale, int width) { asm volatile( - "vdup.16 q0, %3 \n" - "1: \n" - "vld1.16 {q1}, [%0]! \n" - "vld1.16 {q2}, [%0]! \n" - "vmovl.u16 q3, d2 \n" - "vmovl.u16 q1, d3 \n" - "vmovl.u16 q4, d4 \n" - "vmovl.u16 q2, d5 \n" - "vshl.u32 q3, q3, q0 \n" - "vshl.u32 q4, q4, q0 \n" - "vshl.u32 q1, q1, q0 \n" - "vshl.u32 q2, q2, q0 \n" - "vmovn.u32 d2, q3 \n" - "vmovn.u32 d3, q1 \n" - "vmovn.u32 d4, q4 \n" - "vmovn.u32 d5, q2 \n" - "vst1.16 {q1}, [%1]! \n" - "vst1.16 {q2}, [%1]! \n" + "vdup.16 d8, %3 \n" + "1: \n" + "vld1.16 {q2, q3}, [%0]! \n" + "vmull.u16 q0, d4, d8 \n" + "vmull.u16 q1, d5, d8 \n" + "vmull.u16 q2, d6, d8 \n" + "vmull.u16 q3, d7, d8 \n" + "vshrn.u32 d0, q0, #16 \n" + "vshrn.u32 d1, q1, #16 \n" + "vshrn.u32 d2, q2, #16 \n" + "vshrn.u32 d3, q3, #16 \n" + "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels "subs %2, %2, #16 \n" // 16 src pixels per loop "bgt 1b \n" : "+r"(src_y), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(scale) // %3 - : "cc", "memory", "q0", "q1", "q2", "q3", "q4"); + : "cc", "memory", "q0", "q1", "q2", "q3", "d8"); } // Use scale to convert lsb formats to msb, depending how many bits there are: diff --git a/files/source/row_neon64.cc b/source/row_neon64.cc index 0f120373..1679f87c 100644 --- a/files/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -142,6 +142,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y, : "cc", "memory", YUVTORGB_REGS, "v19"); } +void I444ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV444 YUVTORGB + RGBTORGB8 + "subs %w[width], %w[width], #8 \n" + "st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n" + "b.gt 1b \n" + : [src_y] "+r"(src_y), // %[src_y] + [src_u] "+r"(src_u), // %[src_u] + [src_v] "+r"(src_v), // %[src_v] + [dst_rgb24] "+r"(dst_rgb24), // %[dst_rgb24] + [width] "+r"(width) // %[width] + : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] + [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] + : "cc", "memory", YUVTORGB_REGS); +} + void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -627,6 +650,26 @@ void DetileRow_NEON(const uint8_t* src, ); } +// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's. +void DetileRow_16_NEON(const uint16_t* src, + ptrdiff_t src_tile_stride, + uint16_t* dst, + int width) { + asm volatile( + "1: \n" + "ld1 {v0.8h,v1.8h}, [%0], %3 \n" // load 16 pixels + "subs %w2, %w2, #16 \n" // 16 processed per loop + "prfm pldl1keep, [%0, 3584] \n" // 7 tiles of 512b ahead + "st1 {v0.8h,v1.8h}, [%1], #32 \n" // store 16 pixels + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"(src_tile_stride * 2) // %3 + : "cc", "memory", "v0", "v1" // Clobber List + ); +} + // Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V. void DetileSplitUVRow_NEON(const uint8_t* src_uv, ptrdiff_t src_tile_stride, @@ -651,6 +694,100 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv, } #if LIBYUV_USE_ST2 +// Read 16 Y, 8 UV, and write 8 YUY2 +void DetileToYUY2_NEON(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { + asm volatile( + "1: \n" + "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys + "prfm pldl1keep, [%0, 1792] \n" + "ld1 {v1.16b}, [%1], %5 \n" // load 8 UVs + "prfm pldl1keep, [%1, 1792] \n" + "subs %w3, %w3, #16 \n" // store 8 YUY2 + "st2 {v0.16b,v1.16b}, [%2], #32 \n" + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_yuy2), // %2 + "+r"(width) // %3 + : "r"(src_y_tile_stride), // %4 + "r"(src_uv_tile_stride) // %5 + : "cc", "memory", "v0", "v1" // Clobber list + ); +} +#else +// Read 16 Y, 8 UV, and write 8 YUY2 +void DetileToYUY2_NEON(const uint8_t* src_y, + ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, + ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, + int width) { + asm volatile( + "1: \n" + "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys + "ld1 {v1.16b}, [%1], %5 \n" // load 8 UVs + "subs %w3, %w3, #16 \n" + "prfm pldl1keep, [%0, 1792] \n" + "zip1 v2.16b, v0.16b, v1.16b \n" + "prfm pldl1keep, [%1, 1792] \n" + "zip2 v3.16b, v0.16b, v1.16b \n" + "st1 {v2.16b,v3.16b}, [%2], #32 \n" // store 8 YUY2 + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_yuy2), // %2 + "+r"(width) // %3 + : "r"(src_y_tile_stride), // %4 + "r"(src_uv_tile_stride) // %5 + : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber list + ); +} +#endif + +// Unpack MT2T into tiled P010 64 pixels at a time. See +// tinyurl.com/mtk-10bit-video-format for format documentation. +void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) { + asm volatile( + "1: \n" + "ld1 {v7.16b}, [%0], #16 \n" + "ld1 {v0.16b-v3.16b}, [%0], #64 \n" + "shl v4.16b, v7.16b, #6 \n" + "shl v5.16b, v7.16b, #4 \n" + "shl v6.16b, v7.16b, #2 \n" + "subs %2, %2, #80 \n" + "zip1 v16.16b, v4.16b, v0.16b \n" + "zip1 v18.16b, v5.16b, v1.16b \n" + "zip1 v20.16b, v6.16b, v2.16b \n" + "zip1 v22.16b, v7.16b, v3.16b \n" + "zip2 v17.16b, v4.16b, v0.16b \n" + "zip2 v19.16b, v5.16b, v1.16b \n" + "zip2 v21.16b, v6.16b, v2.16b \n" + "zip2 v23.16b, v7.16b, v3.16b \n" + "sri v16.8h, v16.8h, #10 \n" + "sri v17.8h, v17.8h, #10 \n" + "sri v18.8h, v18.8h, #10 \n" + "sri v19.8h, v19.8h, #10 \n" + "st1 {v16.8h-v19.8h}, [%1], #64 \n" + "sri v20.8h, v20.8h, #10 \n" + "sri v21.8h, v21.8h, #10 \n" + "sri v22.8h, v22.8h, #10 \n" + "sri v23.8h, v23.8h, #10 \n" + "st1 {v20.8h-v23.8h}, [%1], #64 \n" + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(size) // %2 + : + : "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); +} + +#if LIBYUV_USE_ST2 // Reads 16 U's and V's and writes out 16 pairs of UV. void MergeUVRow_NEON(const uint8_t* src_u, const uint8_t* src_v, @@ -1729,6 +1866,29 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy, ); } +void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_uv, + int width) { + const uint8_t* src_yuy2b = src_yuy2 + stride_yuy2; + asm volatile( + "1: \n" + "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels + "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. + "ld2 {v2.16b,v3.16b}, [%1], #32 \n" // load next row + "urhadd v4.16b, v1.16b, v3.16b \n" // average rows of UV + "prfm pldl1keep, [%0, 448] \n" + "st1 {v4.16b}, [%2], #16 \n" // store 8 UV. + "b.gt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(src_yuy2b), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List + ); +} + // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, @@ -1819,24 +1979,23 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb, void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { asm volatile( - "dup v1.4s, %w2 \n" // dither4 + "dup v1.4s, %w3 \n" // dither4 "1: \n" - "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // load 8 - // pixels - "subs %w3, %w3, #8 \n" // 8 processed per loop. + "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. "uqadd v16.8b, v16.8b, v1.8b \n" "prfm pldl1keep, [%0, 448] \n" "uqadd v17.8b, v17.8b, v1.8b \n" "uqadd v18.8b, v18.8b, v1.8b \n" ARGBTORGB565 - "st1 {v18.16b}, [%0], #16 \n" // store 8 pixels RGB565. + "st1 {v18.16b}, [%1], #16 \n" // store 8 pixels RGB565. "b.gt 1b \n" - : "+r"(dst_rgb) // %0 - : "r"(src_argb), // %1 - "r"(dither4), // %2 - "r"(width) // %3 + : "+r"(src_argb), // %0 + "+r"(dst_rgb), // %1 + "+r"(width) // %2 + : "r"(dither4) // %3 : "cc", "memory", "v1", "v16", "v17", "v18", "v19"); } @@ -2039,19 +2198,26 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, ); } +struct RgbUVConstants { + uint8_t kRGBToU[4]; + uint8_t kRGBToV[4]; +}; + // 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { +void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct RgbUVConstants* rgbuvconstants) { asm volatile( - "movi v24.8b, #112 \n" // UB / VR 0.875 - // coefficient - "movi v25.8b, #74 \n" // UG -0.5781 coefficient - "movi v26.8b, #38 \n" // UR -0.2969 coefficient - "movi v27.8b, #18 \n" // VB -0.1406 coefficient - "movi v28.8b, #94 \n" // VG -0.7344 coefficient - "movi v29.16b,#0x80 \n" // 128.5 + "ldr d0, [%4] \n" // load rgbuvconstants + "dup v24.16b, v0.b[0] \n" // UB 0.875 coefficient + "dup v25.16b, v0.b[1] \n" // UG -0.5781 coefficient + "dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient + "dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient + "dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient + "movi v29.16b, #0x80 \n" // 128.5 + "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w3, %w3, #8 \n" // 8 processed per loop. @@ -2070,15 +2236,53 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb, "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"(rgbuvconstants) // %4 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26", "v27", "v28", "v29"); } +// RGB to bt601 coefficients +// UB 0.875 coefficient = 112 +// UG -0.5781 coefficient = 74 +// UR -0.2969 coefficient = 38 +// VB -0.1406 coefficient = 18 +// VG -0.7344 coefficient = 94 +// VR 0.875 coefficient = 112 (ignored) + +static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0}, + {18, 94, 112, 0}}; + +// RGB to JPeg coefficients +// UB coeff 0.500 = 127 +// UG coeff -0.33126 = 84 +// UR coeff -0.16874 = 43 +// VB coeff -0.08131 = 20 +// VG coeff -0.41869 = 107 +// VR coeff 0.500 = 127 (ignored) + +static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0}, + {20, 107, 127, 0}}; + +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kRgb24I601UVConstants); +} + +void ARGBToUVJ444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kRgb24JPegUVConstants); +} + #define RGBTOUV_SETUP_REG \ "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ @@ -2144,6 +2348,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb, ); } +// TODO(fbarchard): Subsample match Intel code. void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -2189,6 +2394,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb, ); } +void ABGRToUVJRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_uj, + uint8_t* dst_vj, + int width) { + const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr; + asm volatile ( + "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2 + "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2 + "movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2 + "movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2 + "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2 + "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit) + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. + "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts. + "prfm pldl1keep, [%0, 448] \n" + "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. + "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 + "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts. + "prfm pldl1keep, [%1, 448] \n" + "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. + "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts. + + "urshr v0.8h, v0.8h, #1 \n" // 2x average + "urshr v1.8h, v1.8h, #1 \n" + "urshr v2.8h, v2.8h, #1 \n" + + "subs %w4, %w4, #16 \n" // 16 processed per loop. + RGBTOUV(v2.8h, v1.8h, v0.8h) + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_abgr), // %0 + "+r"(src_abgr_1), // %1 + "+r"(dst_uj), // %2 + "+r"(dst_vj), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v20", "v21", "v22", "v23", "v24", "v25" + ); +} + void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, @@ -2738,34 +2988,8 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, struct RgbConstants { uint8_t kRGBToY[4]; uint16_t kAddY; - uint16_t pad; }; -// RGB to JPeg coefficients -// B * 0.1140 coefficient = 29 -// G * 0.5870 coefficient = 150 -// R * 0.2990 coefficient = 77 -// Add 0.5 = 0x80 -static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, - 128, - 0}; - -static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; - -// RGB to BT.601 coefficients -// B * 0.1016 coefficient = 25 -// G * 0.5078 coefficient = 129 -// R * 0.2578 coefficient = 66 -// Add 16.5 = 0x1080 - -static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, - 0x1080, - 0}; - -static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, - 0x1080, - 0}; - // ARGB expects first 3 values to contain RGB and 4th value is ignored. void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, @@ -2800,6 +3024,26 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, "v17"); } +// RGB to JPeg coefficients +// B * 0.1140 coefficient = 29 +// G * 0.5870 coefficient = 150 +// R * 0.2990 coefficient = 77 +// Add 0.5 = 0x80 +static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128}; + +static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128}; + +// RGB to BT.601 coefficients +// B * 0.1016 coefficient = 25 +// G * 0.5078 coefficient = 129 +// R * 0.2578 coefficient = 66 +// Add 16.5 = 0x1080 + +static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + 0x1080}; + +static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080}; + void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kRgb24I601Constants); } @@ -2812,6 +3056,10 @@ void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants); } +void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants); +} + // RGBA expects first value to be A and ignored, then 3 values to contain RGB. // Same code as ARGB, except the LD4 void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba, @@ -3193,6 +3441,8 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( + "movi v7.8h, #0x00ff \n" // 255 for rounding up + // Attenuate 8 pixels. "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB @@ -3201,16 +3451,16 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, "prfm pldl1keep, [%0, 448] \n" "umull v5.8h, v1.8b, v3.8b \n" // g * a "umull v6.8h, v2.8b, v3.8b \n" // r * a - "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8 - "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8 - "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8 + "addhn v0.8b, v4.8h, v7.8h \n" // (b + 255) >> 8 + "addhn v1.8b, v5.8h, v7.8h \n" // (g + 255) >> 8 + "addhn v2.8b, v6.8h, v7.8h \n" // (r + 255) >> 8 "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"); + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } // Quantize 8 ARGB pixels (32 bytes). @@ -3751,6 +4001,86 @@ void ByteToFloatRow_NEON(const uint8_t* src, : "cc", "memory", "v1", "v2", "v3"); } +// Convert FP16 Half Floats to FP32 Floats +void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16 + float* dst, + int width) { + asm volatile( + "1: \n" + "ld1 {v1.8h}, [%0], #16 \n" // load 8 halffloats + "subs %w2, %w2, #8 \n" // 8 floats per loop + "prfm pldl1keep, [%0, 448] \n" + "fcvtl v2.4s, v1.4h \n" // 8 floats + "fcvtl2 v3.4s, v1.8h \n" + "stp q2, q3, [%1], #32 \n" // store 8 floats + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v1", "v2", "v3"); +} + +// Convert FP16 Half Floats to FP32 Floats +// Read a column and write a row +void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16 + int src_stride, // stride in elements + float* dst, + int width) { + asm volatile( + "cmp %w2, #8 \n" // Is there 8 rows? + "b.lo 2f \n" + "1: \n" + "ld1 {v0.h}[0], [%0], %3 \n" // load 8 halffloats + "ld1 {v0.h}[1], [%0], %3 \n" + "ld1 {v0.h}[2], [%0], %3 \n" + "ld1 {v0.h}[3], [%0], %3 \n" + "ld1 {v1.h}[0], [%0], %3 \n" + "ld1 {v1.h}[1], [%0], %3 \n" + "ld1 {v1.h}[2], [%0], %3 \n" + "ld1 {v1.h}[3], [%0], %3 \n" + "subs %w2, %w2, #8 \n" // 8 rows per loop + "prfm pldl1keep, [%0, 448] \n" + "fcvtl v2.4s, v0.4h \n" // 4 floats + "fcvtl v3.4s, v1.4h \n" // 4 more floats + "stp q2, q3, [%1], #32 \n" // store 8 floats + "b.gt 1b \n" + "cmp %w2, #1 \n" // Is there 1 value? + "b.lo 3f \n" + "2: \n" + "ld1 {v1.h}[0], [%0], %3 \n" // load 1 halffloats + "subs %w2, %w2, #1 \n" // 1 floats per loop + "fcvtl v2.4s, v1.4h \n" // 1 floats + "str s2, [%1], #4 \n" // store 1 floats + "b.gt 2b \n" + "3: \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"((ptrdiff_t)(src_stride * 2)) // %3 + : "cc", "memory", "v0", "v1", "v2", "v3"); +} + +// Convert FP32 Floats to FP16 Half Floats +void ConvertFP32ToFP16Row_NEON(const float* src, + uint16_t* dst, // fp16 + int width) { + asm volatile( + "1: \n" + "ldp q2, q3, [%0], #32 \n" // load 8 floats + "subs %w2, %w2, #8 \n" // 8 floats per loop + "prfm pldl1keep, [%0, 448] \n" + "fcvtn v1.4h, v2.4s \n" // 8 fp16 halffloats + "fcvtn2 v1.8h, v3.4s \n" + "str q1, [%1], #16 \n" // store 8 fp16 halffloats + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v1", "v2", "v3"); +} + float ScaleMaxSamples_NEON(const float* src, float* dst, float scale, @@ -4241,23 +4571,19 @@ void DivideRow_16_NEON(const uint16_t* src_y, int scale, int width) { asm volatile( - "dup v0.8h, %w3 \n" + "dup v4.8h, %w3 \n" "1: \n" - "ldp q1, q2, [%0], #32 \n" - "ushll v3.4s, v1.4h, #0 \n" - "ushll v4.4s, v2.4h, #0 \n" + "ldp q2, q3, [%0], #32 \n" + "umull v0.4s, v2.4h, v4.4h \n" + "umull2 v1.4s, v2.8h, v4.8h \n" + "umull v2.4s, v3.4h, v4.4h \n" + "umull2 v3.4s, v3.8h, v4.8h \n" "prfm pldl1keep, [%0, 448] \n" - "ushll2 v1.4s, v1.8h, #0 \n" - "ushll2 v2.4s, v2.8h, #0 \n" - "mul v3.4s, v0.4s, v3.4s \n" - "mul v4.4s, v0.4s, v4.4s \n" - "mul v1.4s, v0.4s, v1.4s \n" - "mul v2.4s, v0.4s, v2.4s \n" - "shrn v3.4h, v3.4s, #16 \n" - "shrn v4.4h, v4.4s, #16 \n" - "shrn2 v3.8h, v1.4s, #16 \n" - "shrn2 v4.8h, v2.4s, #16 \n" - "stp q3, q3, [%1], #32 \n" // store 16 pixels + "shrn v0.4h, v0.4s, #16 \n" + "shrn2 v0.8h, v1.4s, #16 \n" + "shrn v1.4h, v2.4s, #16 \n" + "shrn2 v1.8h, v3.4s, #16 \n" + "stp q0, q1, [%1], #32 \n" // store 16 pixels "subs %w2, %w2, #16 \n" // 16 src pixels per loop "b.gt 1b \n" : "+r"(src_y), // %0 diff --git a/source/row_rvv.cc b/source/row_rvv.cc new file mode 100644 index 00000000..0bf2bef6 --- /dev/null +++ b/source/row_rvv.cc @@ -0,0 +1,1394 @@ +/* + * Copyright 2023 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Copyright (c) 2023 SiFive, Inc. All rights reserved. + * + * Contributed by Darren Hsieh <darren.hsieh@sifive.com> + * Contributed by Bruce Lai <bruce.lai@sifive.com> + */ + +#include "libyuv/row.h" + +// This module is for clang rvv. GCC hasn't supported segment load & store. +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \ + defined(__clang__) +#include <assert.h> +#include <riscv_vector.h> + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +// Fill YUV -> RGB conversion constants into vectors +// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode +// register) is set to round-to-nearest-up mode(0). +#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \ + { \ + asm volatile("csrwi vxrm, 0"); \ + ub = yuvconst->kUVCoeff[0]; \ + vr = yuvconst->kUVCoeff[1]; \ + ug = yuvconst->kUVCoeff[2]; \ + vg = yuvconst->kUVCoeff[3]; \ + yg = yuvconst->kRGBCoeffBias[0]; \ + bb = yuvconst->kRGBCoeffBias[1] + 32; \ + bg = yuvconst->kRGBCoeffBias[2] - 32; \ + br = yuvconst->kRGBCoeffBias[3] + 32; \ + } + +// Read [2*VLEN/8] Y, [VLEN/8] U and [VLEN/8] V from 422 +#define READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \ + { \ + vuint8m1_t v_tmp0, v_tmp1; \ + vuint8m2_t v_y; \ + vuint16m2_t v_u_16, v_v_16; \ + vl = __riscv_vsetvl_e8m1((w + 1) / 2); \ + v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl); \ + v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \ + v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl); \ + v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \ + v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \ + v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \ + v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \ + v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \ + vl = __riscv_vsetvl_e8m2(w); \ + v_y = __riscv_vle8_v_u8m2(src_y, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ + } + +// Read [2*VLEN/8] Y, [2*VLEN/8] U, and [2*VLEN/8] V from 444 +#define READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \ + { \ + vuint8m2_t v_y; \ + vl = __riscv_vsetvl_e8m2(w); \ + v_y = __riscv_vle8_v_u8m2(src_y, vl); \ + v_u = __riscv_vle8_v_u8m2(src_u, vl); \ + v_v = __riscv_vle8_v_u8m2(src_v, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ + } + +// Convert from YUV to fixed point RGB +#define YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, \ + v_b_16, v_r_16) \ + { \ + vuint16m4_t v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4; \ + vuint32m8_t v_tmp5; \ + v_tmp0 = __riscv_vwmulu_vx_u16m4(v_u, ug, vl); \ + v_y_16 = __riscv_vmul_vx_u16m4(v_y_16, 0x0101, vl); \ + v_tmp0 = __riscv_vwmaccu_vx_u16m4(v_tmp0, vg, v_v, vl); \ + v_tmp1 = __riscv_vwmulu_vx_u16m4(v_u, ub, vl); \ + v_tmp5 = __riscv_vwmulu_vx_u32m8(v_y_16, yg, vl); \ + v_tmp2 = __riscv_vnsrl_wx_u16m4(v_tmp5, 16, vl); \ + v_tmp3 = __riscv_vadd_vx_u16m4(v_tmp2, bg, vl); \ + v_tmp4 = __riscv_vadd_vv_u16m4(v_tmp2, v_tmp1, vl); \ + v_tmp2 = __riscv_vwmaccu_vx_u16m4(v_tmp2, vr, v_v, vl); \ + v_g_16 = __riscv_vssubu_vv_u16m4(v_tmp3, v_tmp0, vl); \ + v_b_16 = __riscv_vssubu_vx_u16m4(v_tmp4, bb, vl); \ + v_r_16 = __riscv_vssubu_vx_u16m4(v_tmp2, br, vl); \ + } + +// Convert from fixed point RGB To 8 bit RGB +#define RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r) \ + { \ + v_g = __riscv_vnclipu_wx_u8m2(v_g_16, 6, vl); \ + v_b = __riscv_vnclipu_wx_u8m2(v_b_16, 6, vl); \ + v_r = __riscv_vnclipu_wx_u8m2(v_r_16, 6, vl); \ + } + +// Read [2*VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_uv +#define READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16) \ + { \ + vuint8m1_t v_tmp0, v_tmp1; \ + vuint8m2_t v_y; \ + vuint16m2_t v_u_16, v_v_16; \ + vl = __riscv_vsetvl_e8m1((w + 1) / 2); \ + __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_uv, vl); \ + v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \ + v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \ + v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \ + v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \ + v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \ + v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \ + vl = __riscv_vsetvl_e8m2(w); \ + v_y = __riscv_vle8_v_u8m2(src_y, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ + } + +// Read 2*[VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_vu +#define READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16) \ + { \ + vuint8m1_t v_tmp0, v_tmp1; \ + vuint8m2_t v_y; \ + vuint16m2_t v_u_16, v_v_16; \ + vl = __riscv_vsetvl_e8m1((w + 1) / 2); \ + __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_vu, vl); \ + v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \ + v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \ + v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \ + v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \ + v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \ + v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \ + vl = __riscv_vsetvl_e8m2(w); \ + v_y = __riscv_vle8_v_u8m2(src_y, vl); \ + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \ + } + +#ifdef HAS_ARGBTOAR64ROW_RVV +void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) { + size_t avl = (size_t)4 * width; + do { + vuint16m8_t v_ar64; + vuint8m4_t v_argb; + size_t vl = __riscv_vsetvl_e8m4(avl); + v_argb = __riscv_vle8_v_u8m4(src_argb, vl); + v_ar64 = __riscv_vwaddu_vx_u16m8(v_argb, 0, vl); + v_ar64 = __riscv_vmul_vx_u16m8(v_ar64, 0x0101, vl); + __riscv_vse16_v_u16m8(dst_ar64, v_ar64, vl); + avl -= vl; + src_argb += vl; + dst_ar64 += vl; + } while (avl > 0); +} +#endif + +#ifdef HAS_ARGBTOAB64ROW_RVV +void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) { + size_t avl = (size_t)width; + do { + vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16; + vuint8m1_t v_b, v_g, v_r, v_a; + size_t vl = __riscv_vsetvl_e8m1(avl); + __riscv_vlseg4e8_v_u8m1(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + v_b_16 = __riscv_vwaddu_vx_u16m2(v_b, 0, vl); + v_g_16 = __riscv_vwaddu_vx_u16m2(v_g, 0, vl); + v_r_16 = __riscv_vwaddu_vx_u16m2(v_r, 0, vl); + v_a_16 = __riscv_vwaddu_vx_u16m2(v_a, 0, vl); + v_b_16 = __riscv_vmul_vx_u16m2(v_b_16, 0x0101, vl); + v_g_16 = __riscv_vmul_vx_u16m2(v_g_16, 0x0101, vl); + v_r_16 = __riscv_vmul_vx_u16m2(v_r_16, 0x0101, vl); + v_a_16 = __riscv_vmul_vx_u16m2(v_a_16, 0x0101, vl); + __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r_16, v_g_16, v_b_16, v_a_16, vl); + avl -= vl; + src_argb += 4 * vl; + dst_ab64 += 4 * vl; + } while (avl > 0); +} +#endif + +#ifdef HAS_AR64TOARGBROW_RVV +void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) { + size_t avl = (size_t)4 * width; + do { + vuint16m8_t v_ar64; + vuint8m4_t v_argb; + size_t vl = __riscv_vsetvl_e16m8(avl); + v_ar64 = __riscv_vle16_v_u16m8(src_ar64, vl); + v_argb = __riscv_vnsrl_wx_u8m4(v_ar64, 8, vl); + __riscv_vse8_v_u8m4(dst_argb, v_argb, vl); + avl -= vl; + src_ar64 += vl; + dst_argb += vl; + } while (avl > 0); +} +#endif + +#ifdef HAS_AR64TOAB64ROW_RVV +void AR64ToAB64Row_RVV(const uint16_t* src_ar64, + uint16_t* dst_ab64, + int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e16m2(w); + vuint16m2_t v_b, v_g, v_r, v_a; + __riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl); + __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl); + w -= vl; + src_ar64 += vl * 4; + dst_ab64 += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_AB64TOARGBROW_RVV +void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { + size_t avl = (size_t)width; + do { + vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16; + vuint8m1_t v_b, v_g, v_r, v_a; + size_t vl = __riscv_vsetvl_e16m2(avl); + __riscv_vlseg4e16_v_u16m2(&v_r_16, &v_g_16, &v_b_16, &v_a_16, src_ab64, vl); + v_b = __riscv_vnsrl_wx_u8m1(v_b_16, 8, vl); + v_g = __riscv_vnsrl_wx_u8m1(v_g_16, 8, vl); + v_r = __riscv_vnsrl_wx_u8m1(v_r_16, 8, vl); + v_a = __riscv_vnsrl_wx_u8m1(v_a_16, 8, vl); + __riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl); + avl -= vl; + src_ab64 += 4 * vl; + dst_argb += 4 * vl; + } while (avl > 0); +} +#endif + +#ifdef HAS_RAWTOARGBROW_RVV +void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + vuint8m2_t v_b, v_g, v_r; + __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_raw += vl * 3; + dst_argb += vl * 4; + vl = __riscv_vsetvl_e8m2(w); + } while (w > 0); +} +#endif + +#ifdef HAS_RAWTORGBAROW_RVV +void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + vuint8m2_t v_b, v_g, v_r; + __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl); + __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl); + w -= vl; + src_raw += vl * 3; + dst_rgba += vl * 4; + vl = __riscv_vsetvl_e8m2(w); + } while (w > 0); +} +#endif + +#ifdef HAS_RAWTORGB24ROW_RVV +void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { + size_t w = (size_t)width; + do { + vuint8m2_t v_b, v_g, v_r; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_raw, vl); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_r, v_g, v_b, vl); + w -= vl; + src_raw += vl * 3; + dst_rgb24 += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTORAWROW_RVV +void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) { + size_t w = (size_t)width; + do { + vuint8m2_t v_b, v_g, v_r, v_a; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg3e8_v_u8m2(dst_raw, v_r, v_g, v_b, vl); + w -= vl; + src_argb += vl * 4; + dst_raw += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTORGB24ROW_RVV +void ARGBToRGB24Row_RVV(const uint8_t* src_argb, + uint8_t* dst_rgb24, + int width) { + size_t w = (size_t)width; + do { + vuint8m2_t v_b, v_g, v_r, v_a; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl); + w -= vl; + src_argb += vl * 4; + dst_rgb24 += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTOABGRROW_RVV +void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl); + w -= vl; + src_argb += vl * 4; + dst_abgr += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTOBGRAROW_RVV +void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl); + w -= vl; + src_argb += vl * 4; + dst_bgra += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTORGBAROW_RVV +void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl); + w -= vl; + src_argb += vl * 4; + dst_rgba += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_RGBATOARGBROW_RVV +void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a, v_r, v_g, v_b; + __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_rgba += vl * 4; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_RGB24TOARGBROW_RVV +void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + vuint8m2_t v_b, v_g, v_r; + __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb24, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_rgb24 += vl * 3; + dst_argb += vl * 4; + vl = __riscv_vsetvl_e8m2(w); + } while (w > 0); +} +#endif + +#ifdef HAS_I444TOARGBROW_RVV +void I444ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_u += vl; + src_v += vl; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_I444ALPHATOARGBROW_RVV +void I444AlphaToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t vl; + size_t w = (size_t)width; + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + do { + READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); + v_a = __riscv_vle8_v_u8m2(src_a, vl); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_a += vl; + src_u += vl; + src_v += vl; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_I444TORGB24ROW_RVV +void I444ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + size_t vl; + size_t w = (size_t)width; + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + do { + READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl); + w -= vl; + src_y += vl; + src_u += vl; + src_v += vl; + dst_rgb24 += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_I422TOARGBROW_RVV +void I422ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_u += vl / 2; + src_v += vl / 2; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_I422ALPHATOARGBROW_RVV +void I422AlphaToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t vl; + size_t w = (size_t)width; + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + do { + READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); + v_a = __riscv_vle8_v_u8m2(src_a, vl); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_a += vl; + src_u += vl / 2; + src_v += vl / 2; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_I422TORGBAROW_RVV +void I422ToRGBARow_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl); + w -= vl; + src_y += vl; + src_u += vl / 2; + src_v += vl / 2; + dst_rgba += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_I422TORGB24ROW_RVV +void I422ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + size_t vl; + size_t w = (size_t)width; + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + do { + READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl); + w -= vl; + src_y += vl; + src_u += vl / 2; + src_v += vl / 2; + dst_rgb24 += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_I400TOARGBROW_RVV +void I400ToARGBRow_RVV(const uint8_t* src_y, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + const bool is_yb_positive = (yuvconstants->kRGBCoeffBias[4] >= 0); + vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl); + vuint16m4_t v_yb; + vuint16m4_t v_yg = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[0], vl); + // To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) sets to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + if (is_yb_positive) { + v_yb = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[4] - 32, vl); + } else { + v_yb = __riscv_vmv_v_x_u16m4(-yuvconstants->kRGBCoeffBias[4] + 32, vl); + } + do { + vuint8m2_t v_y, v_out; + vuint16m4_t v_y_16, v_tmp0, v_tmp1, v_tmp2; + vl = __riscv_vsetvl_e8m2(w); + v_y = __riscv_vle8_v_u8m2(src_y, vl); + v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); + v_tmp0 = __riscv_vmul_vx_u16m4(v_y_16, 0x0101, vl); // 257 * v_y + v_tmp1 = __riscv_vmulhu_vv_u16m4(v_tmp0, v_yg, vl); + if (is_yb_positive) { + v_tmp2 = __riscv_vsaddu_vv_u16m4(v_tmp1, v_yb, vl); + } else { + v_tmp2 = __riscv_vssubu_vv_u16m4(v_tmp1, v_yb, vl); + } + v_out = __riscv_vnclipu_wx_u8m2(v_tmp2, 6, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_out, v_out, v_out, v_a, vl); + w -= vl; + src_y += vl; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_J400TOARGBROW_RVV +void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + vuint8m2_t v_y; + v_y = __riscv_vle8_v_u8m2(src_y, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_y, v_y, v_y, v_a, vl); + w -= vl; + src_y += vl; + dst_argb += vl * 4; + vl = __riscv_vsetvl_e8m2(w); + } while (w > 0); +} +#endif + +#ifdef HAS_COPYROW_RVV +void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m8(w); + vuint8m8_t v_data = __riscv_vle8_v_u8m8(src, vl); + __riscv_vse8_v_u8m8(dst, v_data, vl); + w -= vl; + src += vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_NV12TOARGBROW_RVV +void NV12ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_uv += vl; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_NV12TORGB24ROW_RVV +void NV12ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + do { + READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl); + w -= vl; + src_y += vl; + src_uv += vl; + dst_rgb24 += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_NV21TOARGBROW_RVV +void NV21ToARGBRow_RVV(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_y += vl; + src_vu += vl; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_NV21TORGB24ROW_RVV +void NV21ToRGB24Row_RVV(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + uint8_t ub, vr, ug, vg; + int16_t yg, bb, bg, br; + vuint8m2_t v_u, v_v; + vuint8m2_t v_b, v_g, v_r; + vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16; + YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br); + do { + READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16); + YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, + v_b_16, v_r_16); + RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl); + w -= vl; + src_y += vl; + src_vu += vl; + dst_rgb24 += vl * 3; + } while (w > 0); +} +#endif + +// Bilinear filter [VLEN/8]x2 -> [VLEN/8]x1 + +#ifdef HAS_INTERPOLATEROW_RVV +void InterpolateRow_RVV(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { + int y1_fraction = source_y_fraction; + int y0_fraction = 256 - y1_fraction; + const uint8_t* src_ptr1 = src_ptr + src_stride; + size_t dst_w = (size_t)dst_width; + assert(source_y_fraction >= 0); + assert(source_y_fraction < 256); + // Blend 100 / 0 - Copy row unchanged. + if (y1_fraction == 0) { + do { + size_t vl = __riscv_vsetvl_e8m8(dst_w); + __riscv_vse8_v_u8m8(dst_ptr, __riscv_vle8_v_u8m8(src_ptr, vl), vl); + dst_w -= vl; + src_ptr += vl; + dst_ptr += vl; + } while (dst_w > 0); + return; + } + // To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up(0). + asm volatile("csrwi vxrm, 0"); + // Blend 50 / 50. + if (y1_fraction == 128) { + do { + size_t vl = __riscv_vsetvl_e8m8(dst_w); + vuint8m8_t row0 = __riscv_vle8_v_u8m8(src_ptr, vl); + vuint8m8_t row1 = __riscv_vle8_v_u8m8(src_ptr1, vl); + // Use round-to-nearest-up mode for averaging add + vuint8m8_t row_out = __riscv_vaaddu_vv_u8m8(row0, row1, vl); + __riscv_vse8_v_u8m8(dst_ptr, row_out, vl); + dst_w -= vl; + src_ptr += vl; + src_ptr1 += vl; + dst_ptr += vl; + } while (dst_w > 0); + return; + } + // General purpose row blend. + do { + size_t vl = __riscv_vsetvl_e8m4(dst_w); + vuint8m4_t row0 = __riscv_vle8_v_u8m4(src_ptr, vl); + vuint16m8_t acc = __riscv_vwmulu_vx_u16m8(row0, y0_fraction, vl); + vuint8m4_t row1 = __riscv_vle8_v_u8m4(src_ptr1, vl); + acc = __riscv_vwmaccu_vx_u16m8(acc, y1_fraction, row1, vl); + // Use round-to-nearest-up mode for vnclip + __riscv_vse8_v_u8m4(dst_ptr, __riscv_vnclipu_wx_u8m4(acc, 8, vl), vl); + dst_w -= vl; + src_ptr += vl; + src_ptr1 += vl; + dst_ptr += vl; + } while (dst_w > 0); +} +#endif + +#ifdef HAS_SPLITRGBROW_RVV +void SplitRGBRow_RVV(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width) { + size_t w = (size_t)width; + do { + vuint8m2_t v_b, v_g, v_r; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_rgb, vl); + __riscv_vse8_v_u8m2(dst_r, v_r, vl); + __riscv_vse8_v_u8m2(dst_g, v_g, vl); + __riscv_vse8_v_u8m2(dst_b, v_b, vl); + w -= vl; + dst_r += vl; + dst_g += vl; + dst_b += vl; + src_rgb += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_MERGERGBROW_RVV +void MergeRGBRow_RVV(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_r = __riscv_vle8_v_u8m2(src_r, vl); + vuint8m2_t v_g = __riscv_vle8_v_u8m2(src_g, vl); + vuint8m2_t v_b = __riscv_vle8_v_u8m2(src_b, vl); + __riscv_vsseg3e8_v_u8m2(dst_rgb, v_r, v_g, v_b, vl); + w -= vl; + src_r += vl; + src_g += vl; + src_b += vl; + dst_rgb += vl * 3; + } while (w > 0); +} +#endif + +#ifdef HAS_SPLITARGBROW_RVV +void SplitARGBRow_RVV(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width) { + size_t w = (size_t)width; + do { + vuint8m2_t v_b, v_g, v_r, v_a; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vse8_v_u8m2(dst_a, v_a, vl); + __riscv_vse8_v_u8m2(dst_r, v_r, vl); + __riscv_vse8_v_u8m2(dst_g, v_g, vl); + __riscv_vse8_v_u8m2(dst_b, v_b, vl); + w -= vl; + dst_a += vl; + dst_r += vl; + dst_g += vl; + dst_b += vl; + src_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_MERGEARGBROW_RVV +void MergeARGBRow_RVV(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_r = __riscv_vle8_v_u8m2(src_r, vl); + vuint8m2_t v_g = __riscv_vle8_v_u8m2(src_g, vl); + vuint8m2_t v_b = __riscv_vle8_v_u8m2(src_b, vl); + vuint8m2_t v_a = __riscv_vle8_v_u8m2(src_a, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_r += vl; + src_g += vl; + src_b += vl; + src_a += vl; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SPLITXRGBROW_RVV +void SplitXRGBRow_RVV(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width) { + size_t w = (size_t)width; + do { + vuint8m2_t v_b, v_g, v_r, v_a; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vse8_v_u8m2(dst_r, v_r, vl); + __riscv_vse8_v_u8m2(dst_g, v_g, vl); + __riscv_vse8_v_u8m2(dst_b, v_b, vl); + w -= vl; + dst_r += vl; + dst_g += vl; + dst_b += vl; + src_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_MERGEXRGBROW_RVV +void MergeXRGBRow_RVV(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl); + do { + vuint8m2_t v_r, v_g, v_b; + v_r = __riscv_vle8_v_u8m2(src_r, vl); + v_g = __riscv_vle8_v_u8m2(src_g, vl); + v_b = __riscv_vle8_v_u8m2(src_b, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_r += vl; + src_g += vl; + src_b += vl; + dst_argb += vl * 4; + vl = __riscv_vsetvl_e8m2(w); + } while (w > 0); +} +#endif + +#ifdef HAS_SPLITUVROW_RVV +void SplitUVRow_RVV(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m4(w); + vuint8m4_t v_u, v_v; + __riscv_vlseg2e8_v_u8m4(&v_u, &v_v, src_uv, vl); + __riscv_vse8_v_u8m4(dst_u, v_u, vl); + __riscv_vse8_v_u8m4(dst_v, v_v, vl); + w -= vl; + dst_u += vl; + dst_v += vl; + src_uv += 2 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_MERGEUVROW_RVV +void MergeUVRow_RVV(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width) { + size_t w = (size_t)width; + do { + vuint8m4_t v_u, v_v; + size_t vl = __riscv_vsetvl_e8m4(w); + v_u = __riscv_vle8_v_u8m4(src_u, vl); + v_v = __riscv_vle8_v_u8m4(src_v, vl); + __riscv_vsseg2e8_v_u8m4(dst_uv, v_u, v_v, vl); + w -= vl; + src_u += vl; + src_v += vl; + dst_uv += 2 * vl; + } while (w > 0); +} +#endif + +struct RgbConstants { + uint8_t kRGBToY[4]; + uint16_t kAddY; + uint16_t pad; +}; + +// RGB to JPeg coefficients +// B * 0.1140 coefficient = 29 +// G * 0.5870 coefficient = 150 +// R * 0.2990 coefficient = 77 +// Add 0.5 = 0x80 +static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, + 128, + 0}; + +static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; + +// RGB to BT.601 coefficients +// B * 0.1016 coefficient = 25 +// G * 0.5078 coefficient = 129 +// R * 0.2578 coefficient = 66 +// Add 16.5 = 0x1080 + +static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + 0x1080, + 0}; + +static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, + 0x1080, + 0}; + +// ARGB expects first 3 values to contain RGB and 4th value is ignored +#ifdef HAS_ARGBTOYMATRIXROW_RVV +void ARGBToYMatrixRow_RVV(const uint8_t* src_argb, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + assert(width != 0); + size_t w = (size_t)width; + vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant + vuint16m4_t v_addy; // vector is to store kAddY + size_t vl = __riscv_vsetvl_e8m2(w); + v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl); + v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl); + v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl); + v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl); + do { + vuint8m2_t v_b, v_g, v_r, v_a, v_y; + vuint16m4_t v_y_u16; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl); + v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl); + v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl); + v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl); + v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl); + __riscv_vse8_v_u8m2(dst_y, v_y, vl); + w -= vl; + src_argb += 4 * vl; + dst_y += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBTOYROW_RVV +void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width) { + ARGBToYMatrixRow_RVV(src_argb, dst_y, width, &kRgb24I601Constants); +} +#endif + +#ifdef HAS_ARGBTOYJROW_RVV +void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_RVV(src_argb, dst_yj, width, &kRgb24JPEGConstants); +} +#endif + +#ifdef HAS_ABGRTOYROW_RVV +void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + ARGBToYMatrixRow_RVV(src_abgr, dst_y, width, &kRawI601Constants); +} +#endif + +#ifdef HAS_ABGRTOYJROW_RVV +void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_RVV(src_abgr, dst_yj, width, &kRawJPEGConstants); +} +#endif + +// RGBA expects first value to be A and ignored, then 3 values to contain RGB. +#ifdef HAS_RGBATOYMATRIXROW_RVV +void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + assert(width != 0); + size_t w = (size_t)width; + vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant + vuint16m4_t v_addy; // vector is to store kAddY + size_t vl = __riscv_vsetvl_e8m2(w); + v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl); + v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl); + v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl); + v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl); + do { + vuint8m2_t v_b, v_g, v_r, v_a, v_y; + vuint16m4_t v_y_u16; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl); + v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl); + v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl); + v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl); + v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl); + v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl); + __riscv_vse8_v_u8m2(dst_y, v_y, vl); + w -= vl; + src_rgba += 4 * vl; + dst_y += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_RGBATOYROW_RVV +void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width) { + RGBAToYMatrixRow_RVV(src_rgba, dst_y, width, &kRgb24I601Constants); +} +#endif + +#ifdef HAS_RGBATOYJROW_RVV +void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { + RGBAToYMatrixRow_RVV(src_rgba, dst_yj, width, &kRgb24JPEGConstants); +} +#endif + +#ifdef HAS_BGRATOYROW_RVV +void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) { + RGBAToYMatrixRow_RVV(src_bgra, dst_y, width, &kRawI601Constants); +} +#endif + +#ifdef HAS_RGBTOYMATRIXROW_RVV +void RGBToYMatrixRow_RVV(const uint8_t* src_rgb, + uint8_t* dst_y, + int width, + const struct RgbConstants* rgbconstants) { + assert(width != 0); + size_t w = (size_t)width; + vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant + vuint16m4_t v_addy; // vector is to store kAddY + size_t vl = __riscv_vsetvl_e8m2(w); + v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl); + v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl); + v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl); + v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl); + do { + vuint8m2_t v_b, v_g, v_r, v_y; + vuint16m4_t v_y_u16; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb, vl); + v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl); + v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl); + v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl); + v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl); + v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl); + __riscv_vse8_v_u8m2(dst_y, v_y, vl); + w -= vl; + src_rgb += 3 * vl; + dst_y += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_RGB24TOYJROW_RVV +void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) { + RGBToYMatrixRow_RVV(src_rgb24, dst_yj, width, &kRgb24JPEGConstants); +} +#endif + +#ifdef HAS_RAWTOYJROW_RVV +void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width) { + RGBToYMatrixRow_RVV(src_raw, dst_yj, width, &kRawJPEGConstants); +} +#endif + +#ifdef HAS_RGB24TOYROW_RVV +void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { + RGBToYMatrixRow_RVV(src_rgb24, dst_y, width, &kRgb24I601Constants); +} +#endif + +#ifdef HAS_RAWTOYROW_RVV +void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) { + RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants); +} +#endif + +// Blend src_argb over src_argb1 and store to dst_argb. +// dst_argb may be src_argb or src_argb1. +// src_argb: RGB values have already been pre-multiplied by the a. +#ifdef HAS_ARGBBLENDROW_RVV +void ARGBBlendRow_RVV(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + size_t w = (size_t)width; + size_t vl = __riscv_vsetvlmax_e8m2(); + // clamp255((((256 - a) * b) >> 8) + f) + // = b * (256 - a) / 256 + f + // = b - (b * a / 256) + f + vuint8m2_t v_255 = __riscv_vmv_v_x_u8m2(255, vl); + do { + vuint8m2_t v_src0_b, v_src0_g, v_src0_r, v_src0_a; + vuint8m2_t v_src1_b, v_src1_g, v_src1_r, v_src1_a; + vuint8m2_t v_tmp_b, v_tmp_g, v_tmp_r; + vuint8m2_t v_dst_b, v_dst_g, v_dst_r; + vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_src0_b, &v_src0_g, &v_src0_r, &v_src0_a, + src_argb, vl); + __riscv_vlseg4e8_v_u8m2(&v_src1_b, &v_src1_g, &v_src1_r, &v_src1_a, + src_argb1, vl); + + v_tmp_b = __riscv_vmulhu_vv_u8m2(v_src1_b, v_src0_a, vl); + v_tmp_g = __riscv_vmulhu_vv_u8m2(v_src1_g, v_src0_a, vl); + v_tmp_r = __riscv_vmulhu_vv_u8m2(v_src1_r, v_src0_a, vl); + + v_dst_b = __riscv_vsub_vv_u8m2(v_src1_b, v_tmp_b, vl); + v_dst_g = __riscv_vsub_vv_u8m2(v_src1_g, v_tmp_g, vl); + v_dst_r = __riscv_vsub_vv_u8m2(v_src1_r, v_tmp_r, vl); + + v_dst_b = __riscv_vsaddu_vv_u8m2(v_dst_b, v_src0_b, vl); + v_dst_g = __riscv_vsaddu_vv_u8m2(v_dst_g, v_src0_g, vl); + v_dst_r = __riscv_vsaddu_vv_u8m2(v_dst_r, v_src0_r, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_dst_b, v_dst_g, v_dst_r, v_255, vl); + + w -= vl; + src_argb += 4 * vl; + src_argb1 += 4 * vl; + dst_argb += 4 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_BLENDPLANEROW_RVV +void BlendPlaneRow_RVV(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width) { + size_t w = (size_t)width; + do { + vuint16m8_t v_dst_u16; + vuint8m4_t v_dst; + size_t vl = __riscv_vsetvl_e8m4(w); + vuint8m4_t v_src0 = __riscv_vle8_v_u8m4(src0, vl); + vuint8m4_t v_src1 = __riscv_vle8_v_u8m4(src1, vl); + vuint8m4_t v_alpha = __riscv_vle8_v_u8m4(alpha, vl); + vuint8m4_t v_255_minus_alpha = __riscv_vrsub_vx_u8m4(v_alpha, 255u, vl); + + // (a * foreground) + (1-a) * background + v_dst_u16 = __riscv_vwmulu_vv_u16m8(v_alpha, v_src0, vl); + v_dst_u16 = + __riscv_vwmaccu_vv_u16m8(v_dst_u16, v_255_minus_alpha, v_src1, vl); + v_dst_u16 = __riscv_vadd_vx_u16m8(v_dst_u16, 255u, vl); + v_dst = __riscv_vnsrl_wx_u8m4(v_dst_u16, 8, vl); + + __riscv_vse8_v_u8m4(dst, v_dst, vl); + w -= vl; + src0 += vl; + src1 += vl; + alpha += vl; + dst += vl; + } while (w > 0); +} +#endif + +// Attenuate: (f * a + 255) >> 8 +#ifdef HAS_ARGBATTENUATEROW_RVV +void ARGBAttenuateRow_RVV(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + size_t w = (size_t)width; + do { + vuint8m2_t v_b, v_g, v_r, v_a; + vuint16m4_t v_ba_16, v_ga_16, v_ra_16; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + // f * a + v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl); + v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl); + v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl); + // f * a + 255 + v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl); + v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl); + v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl); + // (f * a + 255) >> 8 + v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl); + v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl); + v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + w -= vl; + src_argb += vl * 4; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBEXTRACTALPHAROW_RVV +void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + size_t w = (size_t)width; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_b, v_g, v_r, v_a; + __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + __riscv_vse8_v_u8m2(dst_a, v_a, vl); + w -= vl; + src_argb += vl * 4; + dst_a += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_ARGBCOPYYTOALPHAROW_RVV +void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width) { + size_t w = (size_t)width; + const ptrdiff_t dst_stride = 4; + dst += 3; + do { + size_t vl = __riscv_vsetvl_e8m8(w); + vuint8m8_t v_a = __riscv_vle8_v_u8m8(src, vl); + __riscv_vsse8_v_u8m8(dst, dst_stride, v_a, vl); + w -= vl; + src += vl; + dst += vl * dst_stride; + } while (w > 0); +} +#endif + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && + // defined(__clang__) diff --git a/files/source/row_win.cc b/source/row_win.cc index c7c1ff60..5fb28521 100644 --- a/files/source/row_win.cc +++ b/source/row_win.cc @@ -14,7 +14,9 @@ #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) -#if defined(_M_X64) +#if defined(_M_ARM64EC) +#include <intrin.h> +#elif defined(_M_X64) #include <emmintrin.h> #include <tmmintrin.h> // For _mm_maddubs_epi16 #endif @@ -893,7 +895,7 @@ __declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { __asm { @@ -940,7 +942,7 @@ __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb, #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 __declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { __asm { mov eax, [esp + 4] // src_argb @@ -2789,6 +2791,44 @@ __declspec(naked) void I422ToRGB24Row_SSSE3( } } +// 8 pixels. +// 8 UV values, mixed with 8 Y producing 8 RGB24 (24 bytes). +__declspec(naked) void I444ToRGB24Row_SSSE3( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + __asm { + push esi + push edi + push ebx + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // argb + mov ebx, [esp + 12 + 20] // yuvconstants + mov ecx, [esp + 12 + 24] // width + sub edi, esi + movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 + movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 + + convertloop: + READYUV444 + YUVTORGB(ebx) + STORERGB24 + + sub ecx, 8 + jg convertloop + + pop ebx + pop edi + pop esi + ret + } +} + // 8 pixels // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). __declspec(naked) void I422ToRGB565Row_SSSE3( @@ -3423,17 +3463,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u, sub edx, eax convertloop: - vmovdqu ymm0, [eax] // read 32 U's - vmovdqu ymm1, [eax + edx] // and 32 V's - lea eax, [eax + 32] - vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2 - vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3 - vextractf128 [edi], ymm2, 0 // bytes 0..15 - vextractf128 [edi + 16], ymm0, 0 // bytes 16..31 - vextractf128 [edi + 32], ymm2, 1 // bytes 32..47 - vextractf128 [edi + 48], ymm0, 1 // bytes 47..63 - lea edi, [edi + 64] - sub ecx, 32 + vpmovzxbw ymm0, [eax] + vpmovzxbw ymm1, [eax + edx] + lea eax, [eax + 16] + vpsllw ymm1, ymm1, 8 + vpor ymm2, ymm1, ymm0 + vmovdqu [edi], ymm2 + lea edi, [edi + 32] + sub ecx, 16 jg convertloop pop edi diff --git a/files/source/scale.cc b/source/scale.cc index e1335f1e..b7a602ba 100644 --- a/files/source/scale.cc +++ b/source/scale.cc @@ -135,6 +135,14 @@ static void ScalePlaneDown2(int src_width, } } #endif +#if defined(HAS_SCALEROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowDown2 = filtering == kFilterNone + ? ScaleRowDown2_RVV + : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV + : ScaleRowDown2Box_RVV); + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -198,6 +206,51 @@ static void ScalePlaneDown2_16(int src_width, } } +void ScalePlaneDown2_16To8(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint8_t* dst_ptr, + int scale, + enum FilterMode filtering) { + int y; + void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width, int scale) = + (src_width & 1) + ? (filtering == kFilterNone + ? ScaleRowDown2_16To8_Odd_C + : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C + : ScaleRowDown2Box_16To8_Odd_C)) + : (filtering == kFilterNone + ? ScaleRowDown2_16To8_C + : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C + : ScaleRowDown2Box_16To8_C)); + int row_stride = src_stride * 2; + (void)dst_height; + if (!filtering) { + src_ptr += src_stride; // Point to odd rows. + src_stride = 0; + } + + if (filtering == kFilterLinear) { + src_stride = 0; + } + for (y = 0; y < src_height / 2; ++y) { + ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale); + src_ptr += row_stride; + dst_ptr += dst_stride; + } + if (src_height & 1) { + if (!filtering) { + src_ptr -= src_stride; // Point to last row. + } + ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale); + } +} + // Scale plane, 1/4 // This is an optimized version for scaling down a plane to 1/4 of // its original size. @@ -267,6 +320,11 @@ static void ScalePlaneDown4(int src_width, } } #endif +#if defined(HAS_SCALEROWDOWN4_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV; + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -427,6 +485,17 @@ static void ScalePlaneDown34(int src_width, } } #endif +#if defined(HAS_SCALEROWDOWN34_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_RVV; + ScaleRowDown34_1 = ScaleRowDown34_RVV; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV; + ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV; + } + } +#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); @@ -642,6 +711,17 @@ static void ScalePlaneDown38(int src_width, } } #endif +#if defined(HAS_SCALEROWDOWN38_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_RVV; + ScaleRowDown38_2 = ScaleRowDown38_RVV; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV; + ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV; + } + } +#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); @@ -775,9 +855,11 @@ static void ScaleAddCols2_C(int dst_width, int ix = x >> 16; x += dx; boxwidth = MIN1((x >> 16) - ix); - *dst_ptr++ = - SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >> - 16; + int scaletbl_index = boxwidth - minboxwidth; + assert((scaletbl_index == 0) || (scaletbl_index == 1)); + *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) * + scaletbl[scaletbl_index] >> + 16); } } @@ -797,9 +879,10 @@ static void ScaleAddCols2_16_C(int dst_width, int ix = x >> 16; x += dx; boxwidth = MIN1((x >> 16) - ix); - *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * - scaletbl[boxwidth - minboxwidth] >> - 16; + int scaletbl_index = boxwidth - minboxwidth; + assert((scaletbl_index == 0) || (scaletbl_index == 1)); + *dst_ptr++ = + SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16; } } @@ -814,7 +897,7 @@ static void ScaleAddCols0_C(int dst_width, (void)dx; src_ptr += (x >> 16); for (i = 0; i < dst_width; ++i) { - *dst_ptr++ = src_ptr[i] * scaleval >> 16; + *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16); } } @@ -829,7 +912,7 @@ static void ScaleAddCols1_C(int dst_width, int i; x >>= 16; for (i = 0; i < dst_width; ++i) { - *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; + *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16); x += boxwidth; } } @@ -856,14 +939,14 @@ static void ScaleAddCols1_16_C(int dst_width, // one pixel of destination using fixed point (16.16) to step // through source, sampling a box of pixel with simple // averaging. -static void ScalePlaneBox(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static int ScalePlaneBox(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -877,6 +960,8 @@ static void ScalePlaneBox(int src_width, { // Allocate a row buffer of uint16_t. align_buffer_64(row16, src_width * 2); + if (!row16) + return 1; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint16_t* src_ptr, uint8_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_C @@ -923,6 +1008,11 @@ static void ScalePlaneBox(int src_width, } } #endif +#if defined(HAS_SCALEADDROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleAddRow = ScaleAddRow_RVV; + } +#endif for (j = 0; j < dst_height; ++j) { int boxheight; @@ -943,16 +1033,17 @@ static void ScalePlaneBox(int src_width, } free_aligned_buffer_64(row16); } + return 0; } -static void ScalePlaneBox_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static int ScalePlaneBox_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -966,6 +1057,8 @@ static void ScalePlaneBox_16(int src_width, { // Allocate a row buffer of uint32_t. align_buffer_64(row32, src_width * 4); + if (!row32) + return 1; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint32_t* src_ptr, uint16_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; @@ -997,18 +1090,19 @@ static void ScalePlaneBox_16(int src_width, } free_aligned_buffer_64(row32); } + return 0; } // Scale plane down with bilinear interpolation. -void ScalePlaneBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1017,13 +1111,15 @@ void ScalePlaneBilinearDown(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row buffer. align_buffer_64(row, src_width); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; int j; - void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1070,6 +1166,11 @@ void ScalePlaneBilinearDown(int src_width, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif #if defined(HAS_SCALEFILTERCOLS_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { @@ -1121,17 +1222,18 @@ void ScalePlaneBilinearDown(int src_width, } } free_aligned_buffer_64(row); + return 0; } -void ScalePlaneBilinearDown_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearDown_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -1140,13 +1242,15 @@ void ScalePlaneBilinearDown_16(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row buffer. align_buffer_64(row, src_width * 2); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; int j; - void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, + void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; - void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr, + void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1212,18 +1316,19 @@ void ScalePlaneBilinearDown_16(int src_width, } } free_aligned_buffer_64(row); + return 0; } // Scale up down with bilinear interpolation. -void ScalePlaneBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, + enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1231,10 +1336,10 @@ void ScalePlaneBilinearUp(int src_width, int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) = filtering ? ScaleFilterCols_C : ScaleCols_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1265,6 +1370,11 @@ void ScalePlaneBilinearUp(int src_width, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_C; @@ -1315,11 +1425,13 @@ void ScalePlaneBilinearUp(int src_width, const uint8_t* src = src_ptr + yi * (int64_t)src_stride; // Allocate 2 row buffers. - const int kRowSize = (dst_width + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1360,6 +1472,7 @@ void ScalePlaneBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale plane, horizontally up by 2 times. @@ -1367,20 +1480,21 @@ void ScalePlaneBilinearUp(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original width, using linear interpolation. // This is used to scale U and V planes of I422 to I444. -void ScalePlaneUp2_Linear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static void ScalePlaneUp2_Linear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_Any_C; int i; int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1407,6 +1521,11 @@ void ScalePlaneUp2_Linear(int src_width, ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; } #endif +#ifdef HAS_SCALEROWUP2_LINEAR_RVV + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp = ScaleRowUp2_Linear_RVV; + } +#endif if (dst_height == 1) { ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, @@ -1426,19 +1545,20 @@ void ScalePlaneUp2_Linear(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original size, using bilinear interpolation. // This is used to scale U and V planes of I420 to I444. -void ScalePlaneUp2_Bilinear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static void ScalePlaneUp2_Bilinear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1466,6 +1586,11 @@ void ScalePlaneUp2_Bilinear(int src_width, Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; } #endif +#ifdef HAS_SCALEROWUP2_BILINEAR_RVV + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp = ScaleRowUp2_Bilinear_RVV; + } +#endif Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); dst_ptr += dst_stride; @@ -1486,20 +1611,21 @@ void ScalePlaneUp2_Bilinear(int src_width, // its original width, using linear interpolation. // stride is in count of uint16_t. // This is used to scale U and V planes of I210 to I410 and I212 to I412. -void ScalePlaneUp2_12_Linear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScalePlaneUp2_12_Linear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_16_Any_C; int i; int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1540,19 +1666,20 @@ void ScalePlaneUp2_12_Linear(int src_width, // its original size, using bilinear interpolation. // stride is in count of uint16_t. // This is used to scale U and V planes of I010 to I410 and I012 to I412. -void ScalePlaneUp2_12_Bilinear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScalePlaneUp2_12_Bilinear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1587,20 +1714,21 @@ void ScalePlaneUp2_12_Bilinear(int src_width, } } -void ScalePlaneUp2_16_Linear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScalePlaneUp2_16_Linear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_16_Any_C; int i; int y; int dy; + (void)src_width; // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); @@ -1636,19 +1764,20 @@ void ScalePlaneUp2_16_Linear(int src_width, } } -void ScalePlaneUp2_16_Bilinear(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScalePlaneUp2_16_Bilinear(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; int x; + (void)src_width; // This function can only scale up by 2 times. assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); @@ -1683,15 +1812,15 @@ void ScalePlaneUp2_16_Bilinear(int src_width, } } -void ScalePlaneBilinearUp_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr, - enum FilterMode filtering) { +static int ScalePlaneBilinearUp_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, + enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1699,10 +1828,10 @@ void ScalePlaneBilinearUp_16(int src_width, int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr, + void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; - void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, + void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1766,12 +1895,13 @@ void ScalePlaneBilinearUp_16(int src_width, const uint16_t* src = src_ptr + yi * (int64_t)src_stride; // Allocate 2 row buffers. - const int kRowSize = (dst_width + 31) & ~31; - align_buffer_64(row, kRowSize * 4); - - uint16_t* rowptr = (uint16_t*)row; - int rowstride = kRowSize; + const int row_size = (dst_width + 31) & ~31; + align_buffer_64(row, row_size * 4); + int rowstride = row_size; int lasty = yi; + uint16_t* rowptr = (uint16_t*)row; + if (!row) + return 1; ScaleFilterCols(rowptr, src, dst_width, x, dx); if (src_height > 1) { @@ -1811,6 +1941,7 @@ void ScalePlaneBilinearUp_16(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale Plane to/from any dimensions, without interpolation. @@ -1827,7 +1958,7 @@ static void ScalePlaneSimple(int src_width, const uint8_t* src_ptr, uint8_t* dst_ptr) { int i; - void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, + void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) = ScaleCols_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1864,7 +1995,7 @@ static void ScalePlaneSimple_16(int src_width, const uint16_t* src_ptr, uint16_t* dst_ptr) { int i; - void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width, + void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = ScaleCols_16_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1895,15 +2026,15 @@ static void ScalePlaneSimple_16(int src_width, // Scale a plane. // This function dispatches to a specialized scaler based on scale factor. LIBYUV_API -void ScalePlane(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -1919,7 +2050,7 @@ void ScalePlane(const uint8_t* src, if (dst_width == src_width && dst_height == src_height) { // Straight copy. CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); - return; + return 0; } if (dst_width == src_width && filtering != kFilterBox) { int dy = 0; @@ -1935,7 +2066,7 @@ void ScalePlane(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); - return; + return 0; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. @@ -1943,69 +2074,67 @@ void ScalePlane(const uint8_t* src, // optimized, 3/4 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } // 3/8 rounded up for odd sized chroma height. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst); - return; + return ScalePlaneBox(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); } if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if (filtering && dst_height > src_height) { - ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } if (filtering) { - ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); + return 0; } LIBYUV_API -void ScalePlane_16(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2021,7 +2150,7 @@ void ScalePlane_16(const uint16_t* src, if (dst_width == src_width && dst_height == src_height) { // Straight copy. CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); - return; + return 0; } if (dst_width == src_width && filtering != kFilterBox) { int dy = 0; @@ -2040,7 +2169,7 @@ void ScalePlane_16(const uint16_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); - return; + return 0; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. @@ -2048,69 +2177,68 @@ void ScalePlane_16(const uint16_t* src, // optimized, 3/4 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } // 3/8 rounded up for odd sized chroma height. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); - return; + return 0; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst); - return; + return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst); } if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if (filtering && dst_height > src_height) { - ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); } if (filtering) { - ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; + return ScalePlaneBilinearDown_16(src_width, src_height, dst_width, + dst_height, src_stride, dst_stride, src, + dst, filtering); } ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); + return 0; } LIBYUV_API -void ScalePlane_12(const uint16_t* src, - int src_stride, - int src_width, - int src_height, - uint16_t* dst, - int dst_stride, - int dst_width, - int dst_height, - enum FilterMode filtering) { +int ScalePlane_12(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2125,17 +2253,17 @@ void ScalePlane_12(const uint16_t* src, if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) { ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); - return; + return 0; } - ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride, - dst_width, dst_height, filtering); + return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride, + dst_width, dst_height, filtering); } // Scale an I420 image. @@ -2163,6 +2291,7 @@ int I420Scale(const uint8_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2170,13 +2299,19 @@ int I420Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } LIBYUV_API @@ -2201,6 +2336,7 @@ int I420Scale_16(const uint16_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2208,13 +2344,19 @@ int I420Scale_16(const uint16_t* src_y, return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } LIBYUV_API @@ -2239,6 +2381,7 @@ int I420Scale_12(const uint16_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2246,13 +2389,19 @@ int I420Scale_12(const uint16_t* src_y, return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, - dst_stride_u, dst_halfwidth, dst_halfheight, filtering); - ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, - dst_stride_v, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); + return r; } // Scale an I444 image. @@ -2276,19 +2425,27 @@ int I444Scale(const uint8_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } LIBYUV_API @@ -2309,19 +2466,27 @@ int I444Scale_16(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } LIBYUV_API @@ -2342,19 +2507,27 @@ int I444Scale_12(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int r; + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u, - dst_width, dst_height, filtering); - ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v, - dst_width, dst_height, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, + dst_stride_u, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, + dst_stride_v, dst_width, dst_height, filtering); + return r; } // Scale an I422 image. @@ -2380,6 +2553,7 @@ int I422Scale(const uint8_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2387,13 +2561,19 @@ int I422Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } LIBYUV_API @@ -2416,6 +2596,7 @@ int I422Scale_16(const uint16_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2423,13 +2604,19 @@ int I422Scale_16(const uint16_t* src_y, return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } LIBYUV_API @@ -2452,6 +2639,7 @@ int I422Scale_12(const uint16_t* src_y, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); + int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || @@ -2459,13 +2647,19 @@ int I422Scale_12(const uint16_t* src_y, return -1; } - ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u, - dst_stride_u, dst_halfwidth, dst_height, filtering); - ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v, - dst_stride_v, dst_halfwidth, dst_height, filtering); - return 0; + r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u, + dst_stride_u, dst_halfwidth, dst_height, filtering); + if (r != 0) { + return r; + } + r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v, + dst_stride_v, dst_halfwidth, dst_height, filtering); + return r; } // Scale an NV12 image. @@ -2489,6 +2683,7 @@ int NV12Scale(const uint8_t* src_y, int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; if (!src_y || !src_uv || src_width <= 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv || @@ -2496,11 +2691,14 @@ int NV12Scale(const uint8_t* src_y, return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, - dst_width, dst_height, filtering); - UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv, - dst_stride_uv, dst_halfwidth, dst_halfheight, filtering); - return 0; + r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, + dst_stride_y, dst_width, dst_height, filtering); + if (r != 0) { + return r; + } + r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv, + dst_stride_uv, dst_halfwidth, dst_halfheight, filtering); + return r; } // Deprecated api diff --git a/files/source/scale_any.cc b/source/scale_any.cc index 317041f8..f6576874 100644 --- a/files/source/scale_any.cc +++ b/source/scale_any.cc @@ -128,6 +128,22 @@ SDODD(ScaleRowDown2Box_Odd_NEON, 1, 15) #endif +#ifdef HAS_SCALEUVROWDOWN2_NEON +SDANY(ScaleUVRowDown2_Any_NEON, + ScaleUVRowDown2_NEON, + ScaleUVRowDown2_C, + 2, + 2, + 7) +#endif +#ifdef HAS_SCALEUVROWDOWN2LINEAR_NEON +SDANY(ScaleUVRowDown2Linear_Any_NEON, + ScaleUVRowDown2Linear_NEON, + ScaleUVRowDown2Linear_C, + 2, + 2, + 7) +#endif #ifdef HAS_SCALEUVROWDOWN2BOX_NEON SDANY(ScaleUVRowDown2Box_Any_NEON, ScaleUVRowDown2Box_NEON, diff --git a/files/source/scale_argb.cc b/source/scale_argb.cc index 9c3acf7f..18bdeb86 100644 --- a/files/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -16,6 +16,7 @@ #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" // For CopyARGB #include "libyuv/row.h" +#include "libyuv/scale_argb.h" #include "libyuv/scale_row.h" #ifdef __cplusplus @@ -58,9 +59,9 @@ static void ScaleARGBDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; } else { - src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4; } #if defined(HAS_SCALEARGBROWDOWN2_SSE2) @@ -127,6 +128,15 @@ static void ScaleARGBDown2(int src_width, } } #endif +#if defined(HAS_SCALEARGBROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDown2 = + filtering == kFilterNone + ? ScaleARGBRowDown2_RVV + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_RVV + : ScaleARGBRowDown2Box_RVV); + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -141,28 +151,33 @@ static void ScaleARGBDown2(int src_width, // ScaleARGB ARGB, 1/4 // This is an optimized version for scaling down a ARGB to 1/4 of // its original size. -static void ScaleARGBDown4Box(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy) { +static int ScaleARGBDown4Box(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy) { int j; // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 2 * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 2 * 4 + 31) & ~31; + // TODO(fbarchard): Remove this row buffer and implement a ScaleARGBRowDown4 + // but implemented via a 2 pass wrapper that uses a very small array on the + // stack with a horizontal loop. + align_buffer_64(row, row_size * 2); + if (!row) + return 1; int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; // Advance to odd row, even column. - src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; (void)src_width; (void)src_height; (void)dx; @@ -184,16 +199,22 @@ static void ScaleARGBDown4Box(int src_width, } } #endif +#if defined(HAS_SCALEARGBROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDown2 = ScaleARGBRowDown2Box_RVV; + } +#endif for (j = 0; j < dst_height; ++j) { ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); - ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize, + ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + row_size, dst_width * 2); - ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); + ScaleARGBRowDown2(row, row_size, dst_argb, dst_width); src_argb += row_stride; dst_argb += dst_stride; } free_aligned_buffer_64(row); + return 0; } // ScaleARGB ARGB Even @@ -214,7 +235,7 @@ static void ScaleARGBDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - int row_stride = (dy >> 16) * (int64_t)src_stride; + ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride); void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, int src_step, uint8_t* dst_argb, int dst_width) = filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; @@ -222,7 +243,7 @@ static void ScaleARGBDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 @@ -263,6 +284,16 @@ static void ScaleARGBDownEven(int src_width, } } #endif +#if defined(HAS_SCALEARGBROWDOWNEVENBOX_RVV) + if (filtering && TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEvenBox_RVV; + } +#endif +#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV) + if (!filtering && TestCpuFlag(kCpuHasRVV)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV; + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -275,24 +306,24 @@ static void ScaleARGBDownEven(int src_width, } // Scale ARGB down with bilinear interpolation. -static void ScaleARGBBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleARGBBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, + void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, + void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; int64_t xlast = x + (int64_t)(dst_width - 1) * dx; @@ -348,6 +379,11 @@ static void ScaleARGBBilinearDown(int src_width, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; @@ -381,6 +417,8 @@ static void ScaleARGBBilinearDown(int src_width, // Allocate a row of ARGB. { align_buffer_64(row, clip_src_width * 4); + if (!row) + return 1; const int max_y = (src_height - 1) << 16; if (y > max_y) { @@ -388,7 +426,7 @@ static void ScaleARGBBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_argb + yi * (int64_t)src_stride; + const uint8_t* src = src_argb + yi * (intptr_t)src_stride; if (filtering == kFilterLinear) { ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); } else { @@ -404,27 +442,28 @@ static void ScaleARGBBilinearDown(int src_width, } free_aligned_buffer_64(row); } + return 0; } // Scale ARGB up with bilinear interpolation. -static void ScaleARGBBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_argb, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, + void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, + void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; const int max_y = (src_height - 1) << 16; @@ -468,6 +507,11 @@ static void ScaleARGBBilinearUp(int src_width, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif if (src_width >= 32768) { ScaleARGBFilterCols = filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; @@ -545,14 +589,16 @@ static void ScaleARGBBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_argb + yi * (int64_t)src_stride; + const uint8_t* src = src_argb + yi * (intptr_t)src_stride; // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -570,7 +616,7 @@ static void ScaleARGBBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_argb + yi * (int64_t)src_stride; + src = src_argb + yi * (intptr_t)src_stride; } if (yi != lasty) { ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -593,27 +639,28 @@ static void ScaleARGBBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } #ifdef YUVSCALEUP // Scale YUV to ARGB up with bilinear interpolation. -static void ScaleYUVToARGBBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleYUVToARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride_y, + int src_stride_u, + int src_stride_v, + int dst_stride_argb, + const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, int width) = @@ -659,6 +706,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } } #endif +#if defined(HAS_I422TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToARGBRow = I422ToARGBRow_Any_LSX; + if (IS_ALIGNED(src_width, 16)) { + I422ToARGBRow = I422ToARGBRow_LSX; + } + } +#endif #if defined(HAS_I422TOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToARGBRow = I422ToARGBRow_Any_LASX; @@ -667,8 +722,13 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } } #endif +#if defined(HAS_I422TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToARGBRow = I422ToARGBRow_RVV; + } +#endif - void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, + void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; #if defined(HAS_INTERPOLATEROW_SSSE3) @@ -711,8 +771,13 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif - void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, + void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; if (src_width >= 32768) { @@ -793,20 +858,21 @@ static void ScaleYUVToARGBBilinearUp(int src_width, const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. int yi = y >> 16; int uv_yi = yi >> kYShift; - const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y; - const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u; - const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v; + const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y; + const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; + const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; - // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); - - // Allocate 1 row of ARGB for source conversion. - align_buffer_64(argb_row, src_width * 4); + // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB + // scaled horizontally to the destination width. + const int row_size = (dst_width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2 + src_width * 4); + uint8_t* argb_row = row + row_size * 2; uint8_t* rowptr = row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; + if (!row) + return 1; // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); @@ -833,9 +899,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, y = max_y; yi = y >> 16; uv_yi = yi >> kYShift; - src_row_y = src_y + yi * (int64_t)src_stride_y; - src_row_u = src_u + uv_yi * (int64_t)src_stride_u; - src_row_v = src_v + uv_yi * (int64_t)src_stride_v; + src_row_y = src_y + yi * (intptr_t)src_stride_y; + src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; + src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; } if (yi != lasty) { // TODO(fbarchard): Convert the clipped region of row. @@ -861,7 +927,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, y += dy; } free_aligned_buffer_64(row); - free_aligned_buffer_64(row_argb); + return 0; } #endif @@ -883,7 +949,7 @@ static void ScaleARGBSimple(int src_width, int y, int dy) { int j; - void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb, + void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; (void)src_height; @@ -926,7 +992,7 @@ static void ScaleARGBSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride, + ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride, dst_width, x, dx); dst_argb += dst_stride; y += dy; @@ -936,19 +1002,19 @@ static void ScaleARGBSimple(int src_width, // ScaleARGB a ARGB. // This function in turn calls a scaling function // suitable for handling the desired resolutions. -static void ScaleARGB(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - int clip_x, - int clip_y, - int clip_width, - int clip_height, - enum FilterMode filtering) { +static int ScaleARGB(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -962,7 +1028,7 @@ static void ScaleARGB(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (int64_t)src_stride; + src = src + (src_height - 1) * (intptr_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -977,7 +1043,7 @@ static void ScaleARGB(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * (int64_t)src_stride; + src += (clipf >> 16) * (intptr_t)src_stride; dst += clip_y * dst_stride; } @@ -993,27 +1059,27 @@ static void ScaleARGB(const uint8_t* src, ScaleARGBDown2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } if (dx == 0x40000 && filtering == kFilterBox) { // Optimized 1/4 box downsample. - ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy); - return; + return ScaleARGBDown4Box(src_width, src_height, clip_width, + clip_height, src_stride, dst_stride, src, + dst, x, dx, y, dy); } ScaleARGBDownEven(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } // Optimized odd scale down. ie 3, 5, 7, 9x. if ((dx & 0x10000) && (dy & 0x10000)) { filtering = kFilterNone; if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4, + ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4, src_stride, dst, dst_stride, clip_width, clip_height); - return; + return 0; } } } @@ -1022,22 +1088,21 @@ static void ScaleARGB(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, y, dy, /*bpp=*/4, filtering); - return; + return 0; } if (filtering && dy < 65536) { - ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } if (filtering) { - ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy); + return 0; } LIBYUV_API @@ -1061,10 +1126,9 @@ int ARGBScaleClip(const uint8_t* src_argb, (clip_y + clip_height) > dst_height) { return -1; } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, - dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width, - clip_height, filtering); - return 0; + return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, clip_x, clip_y, + clip_width, clip_height, filtering); } // Scale an ARGB image. @@ -1082,10 +1146,9 @@ int ARGBScale(const uint8_t* src_argb, src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) { return -1; } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, - dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height, - filtering); - return 0; + return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, + dst_height, filtering); } // Scale with YUV conversion to ARGB and clipping. @@ -1109,8 +1172,11 @@ int YUVToARGBScaleClip(const uint8_t* src_y, int clip_width, int clip_height, enum FilterMode filtering) { - uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); int r; + uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); + if (!argb_buffer) { + return 1; // Out of memory runtime error. + } (void)src_fourcc; // TODO(fbarchard): implement and/or assert. (void)dst_fourcc; I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, diff --git a/files/source/scale_common.cc b/source/scale_common.cc index b02bdafd..d07a39af 100644 --- a/files/source/scale_common.cc +++ b/source/scale_common.cc @@ -23,6 +23,25 @@ namespace libyuv { extern "C" { #endif +#ifdef __cplusplus +#define STATIC_CAST(type, expr) static_cast<type>(expr) +#else +#define STATIC_CAST(type, expr) (type)(expr) +#endif + +// TODO(fbarchard): make clamp255 preserve negative values. +static __inline int32_t clamp255(int32_t v) { + return (-(v >= 255) | v) & 255; +} + +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +// TODO(fbarchard): change scale to bits +#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16) + static __inline int Abs(int v) { return v >= 0 ? v : -v; } @@ -62,6 +81,50 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr, } } +void ScaleRowDown2_16To8_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale) { + int x; + (void)src_stride; + assert(scale >= 256); + assert(scale <= 32768); + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale)); + dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale)); + dst += 2; + src_ptr += 4; + } + if (dst_width & 1) { + dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale)); + } +} + +void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale) { + int x; + (void)src_stride; + assert(scale >= 256); + assert(scale <= 32768); + dst_width -= 1; + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale)); + dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale)); + dst += 2; + src_ptr += 4; + } + if (dst_width & 1) { + dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale)); + dst += 1; + src_ptr += 2; + } + dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale)); +} + void ScaleRowDown2Linear_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, @@ -98,6 +161,52 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, } } +void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale) { + const uint16_t* s = src_ptr; + int x; + (void)src_stride; + assert(scale >= 256); + assert(scale <= 32768); + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale)); + dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale)); + dst += 2; + s += 4; + } + if (dst_width & 1) { + dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale)); + } +} + +void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale) { + const uint16_t* s = src_ptr; + int x; + (void)src_stride; + assert(scale >= 256); + assert(scale <= 32768); + dst_width -= 1; + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale)); + dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale)); + dst += 2; + s += 4; + } + if (dst_width & 1) { + dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale)); + dst += 1; + s += 2; + } + dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale)); +} + void ScaleRowDown2Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, @@ -160,6 +269,61 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, } } +void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale) { + const uint16_t* s = src_ptr; + const uint16_t* t = src_ptr + src_stride; + int x; + assert(scale >= 256); + assert(scale <= 32768); + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = STATIC_CAST(uint8_t, + C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale)); + dst[1] = STATIC_CAST(uint8_t, + C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale)); + dst += 2; + s += 4; + t += 4; + } + if (dst_width & 1) { + dst[0] = STATIC_CAST(uint8_t, + C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale)); + } +} + +void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width, + int scale) { + const uint16_t* s = src_ptr; + const uint16_t* t = src_ptr + src_stride; + int x; + assert(scale >= 256); + assert(scale <= 32768); + dst_width -= 1; + for (x = 0; x < dst_width - 1; x += 2) { + dst[0] = STATIC_CAST(uint8_t, + C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale)); + dst[1] = STATIC_CAST(uint8_t, + C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale)); + dst += 2; + s += 4; + t += 4; + } + if (dst_width & 1) { + dst[0] = STATIC_CAST(uint8_t, + C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale)); + dst += 1; + s += 2; + t += 2; + } + dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale)); +} + void ScaleRowDown4_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, @@ -1116,18 +1280,13 @@ void ScaleUVRowDown2_C(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) { - const uint16_t* src = (const uint16_t*)(src_uv); - uint16_t* dst = (uint16_t*)(dst_uv); int x; (void)src_stride; - for (x = 0; x < dst_width - 1; x += 2) { - dst[0] = src[1]; - dst[1] = src[3]; - src += 2; - dst += 2; - } - if (dst_width & 1) { - dst[0] = src[1]; + for (x = 0; x < dst_width; ++x) { + dst_uv[0] = src_uv[2]; // Store the 2nd UV + dst_uv[1] = src_uv[3]; + src_uv += 4; + dst_uv += 2; } } @@ -1469,7 +1628,7 @@ void ScalePlaneVertical(int src_height, enum FilterMode filtering) { // TODO(fbarchard): Allow higher bpp. int dst_width_bytes = dst_width * bpp; - void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, + void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; @@ -1519,6 +1678,12 @@ void ScalePlaneVertical(int src_height, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif + for (j = 0; j < dst_height; ++j) { int yi; int yf; @@ -1548,7 +1713,7 @@ void ScalePlaneVertical_16(int src_height, enum FilterMode filtering) { // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; - void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb, + void (*InterpolateRow)(uint16_t* dst_argb, const uint16_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; @@ -1627,7 +1792,7 @@ void ScalePlaneVertical_16To8(int src_height, // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions. - void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb, + void (*InterpolateRow_16To8)(uint8_t* dst_argb, const uint16_t* src_argb, ptrdiff_t src_stride, int scale, int dst_width, int source_y_fraction) = InterpolateRow_16To8_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; @@ -1799,35 +1964,6 @@ void ScaleSlope(int src_width, } #undef CENTERSTART -// Read 8x2 upsample with filtering and write 16x1. -// actually reads an extra pixel, so 9x2. -void ScaleRowUp2_16_C(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - const uint16_t* src2 = src_ptr + src_stride; - - int x; - for (x = 0; x < dst_width - 1; x += 2) { - uint16_t p0 = src_ptr[0]; - uint16_t p1 = src_ptr[1]; - uint16_t p2 = src2[0]; - uint16_t p3 = src2[1]; - dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; - dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4; - ++src_ptr; - ++src2; - dst += 2; - } - if (dst_width & 1) { - uint16_t p0 = src_ptr[0]; - uint16_t p1 = src_ptr[1]; - uint16_t p2 = src2[0]; - uint16_t p3 = src2[1]; - dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; - } -} - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/files/source/scale_gcc.cc b/source/scale_gcc.cc index edaf2e29..17eeffad 100644 --- a/files/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -1094,7 +1094,8 @@ void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr, : "r"((intptr_t)(src_stride)), // %3 "r"((intptr_t)(dst_stride)), // %4 "m"(kLinearShuffleFar) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif @@ -1294,7 +1295,7 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr, "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "m"(kLinearMadd31) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif diff --git a/files/source/scale_lsx.cc b/source/scale_lsx.cc index bfe5e9fb..bfe5e9fb 100644 --- a/files/source/scale_lsx.cc +++ b/source/scale_lsx.cc diff --git a/files/source/scale_msa.cc b/source/scale_msa.cc index 482a521f..482a521f 100644 --- a/files/source/scale_msa.cc +++ b/source/scale_msa.cc diff --git a/files/source/scale_neon.cc b/source/scale_neon.cc index 6a0d6e1b..ccc75106 100644 --- a/files/source/scale_neon.cc +++ b/source/scale_neon.cc @@ -1428,6 +1428,45 @@ void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, #undef LOAD2_DATA32_LANE +void ScaleUVRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "vld2.16 {d0, d2}, [%0]! \n" // load 8 UV pixels. + "vld2.16 {d1, d3}, [%0]! \n" // load next 8 UV + "subs %2, %2, #8 \n" // 8 processed per loop. + "vst1.16 {q1}, [%1]! \n" // store 8 UV + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc", "q0", "q1"); +} + +void ScaleUVRowDown2Linear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "vld2.16 {d0, d2}, [%0]! \n" // load 8 UV pixels. + "vld2.16 {d1, d3}, [%0]! \n" // load next 8 UV + "subs %2, %2, #8 \n" // 8 processed per loop. + "vrhadd.u8 q0, q0, q1 \n" // rounding half add + "vst1.16 {q0}, [%1]! \n" // store 8 UV + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc", "q0", "q1"); +} + void ScaleUVRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, diff --git a/files/source/scale_neon64.cc b/source/scale_neon64.cc index 9f9636e6..7c072380 100644 --- a/files/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -1118,101 +1118,6 @@ void ScaleFilterCols_NEON(uint8_t* dst_ptr, #undef LOAD2_DATA8_LANE -// 16x2 -> 16x1 -void ScaleFilterRows_NEON(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride, - int dst_width, - int source_y_fraction) { - int y_fraction = 256 - source_y_fraction; - asm volatile( - "cmp %w4, #0 \n" - "b.eq 100f \n" - "add %2, %2, %1 \n" - "cmp %w4, #64 \n" - "b.eq 75f \n" - "cmp %w4, #128 \n" - "b.eq 50f \n" - "cmp %w4, #192 \n" - "b.eq 25f \n" - - "dup v5.8b, %w4 \n" - "dup v4.8b, %w5 \n" - // General purpose row blend. - "1: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "umull v6.8h, v0.8b, v4.8b \n" - "umull2 v7.8h, v0.16b, v4.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "umlal v6.8h, v1.8b, v5.8b \n" - "umlal2 v7.8h, v1.16b, v5.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "rshrn v0.8b, v6.8h, #8 \n" - "rshrn2 v0.16b, v7.8h, #8 \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 1b \n" - "b 99f \n" - - // Blend 25 / 75. - "25: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 25b \n" - "b 99f \n" - - // Blend 50 / 50. - "50: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 50b \n" - "b 99f \n" - - // Blend 75 / 25. - "75: \n" - "ld1 {v1.16b}, [%1], #16 \n" - "ld1 {v0.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 75b \n" - "b 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "subs %w3, %w3, #16 \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 100b \n" - - "99: \n" - "st1 {v0.b}[15], [%0] \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_stride), // %2 - "+r"(dst_width), // %3 - "+r"(source_y_fraction), // %4 - "+r"(y_fraction) // %5 - : - : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc"); -} - void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, @@ -1568,6 +1473,45 @@ void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, ); } +void ScaleUVRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "ld2 {v0.8h,v1.8h}, [%0], #32 \n" // load 16 UV + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead + "st1 {v1.8h}, [%1], #16 \n" // store 8 UV + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc", "v0", "v1"); +} + +void ScaleUVRowDown2Linear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "ld2 {v0.8h,v1.8h}, [%0], #32 \n" // load 16 UV + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add + "prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead + "st1 {v0.8h}, [%1], #16 \n" // store 8 UV + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc", "v0", "v1"); +} + void ScaleUVRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, diff --git a/files/source/scale_rgb.cc b/source/scale_rgb.cc index 8db59b56..8db59b56 100644 --- a/files/source/scale_rgb.cc +++ b/source/scale_rgb.cc diff --git a/source/scale_rvv.cc b/source/scale_rvv.cc new file mode 100644 index 00000000..de037e45 --- /dev/null +++ b/source/scale_rvv.cc @@ -0,0 +1,1040 @@ +/* + * Copyright 2023 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Copyright (c) 2023 SiFive, Inc. All rights reserved. + * + * Contributed by Darren Hsieh <darren.hsieh@sifive.com> + * Contributed by Bruce Lai <bruce.lai@sifive.com> + */ + +#include "libyuv/row.h" +#include "libyuv/scale_row.h" + +// This module is for clang rvv. GCC hasn't supported segment load & store. +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \ + defined(__clang__) +#include <assert.h> +#include <riscv_vector.h> +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#ifdef HAS_SCALEADDROW_RVV +void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { + size_t w = (size_t)src_width; + do { + size_t vl = __riscv_vsetvl_e8m4(w); + vuint8m4_t v_src = __riscv_vle8_v_u8m4(src_ptr, vl); + vuint16m8_t v_dst = __riscv_vle16_v_u16m8(dst_ptr, vl); + // Use widening multiply-add instead of widening + add + v_dst = __riscv_vwmaccu_vx_u16m8(v_dst, 1, v_src, vl); + __riscv_vse16_v_u16m8(dst_ptr, v_dst, vl); + w -= vl; + src_ptr += vl; + dst_ptr += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWN2_RVV +void ScaleARGBRowDown2_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + size_t w = (size_t)dst_width; + const uint64_t* src = (const uint64_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); + do { + size_t vl = __riscv_vsetvl_e64m8(w); + vuint64m8_t v_data = __riscv_vle64_v_u64m8(src, vl); + vuint32m4_t v_dst = __riscv_vnsrl_wx_u32m4(v_data, 32, vl); + __riscv_vse32_v_u32m4(dst, v_dst, vl); + w -= vl; + src += vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWN2LINEAR_RVV +void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + size_t w = (size_t)dst_width; + const uint32_t* src = (const uint32_t*)(src_argb); + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_odd, v_even, v_dst; + vuint32m4_t v_odd_32, v_even_32; + size_t vl = __riscv_vsetvl_e32m4(w); + __riscv_vlseg2e32_v_u32m4(&v_even_32, &v_odd_32, src, vl); + v_even = __riscv_vreinterpret_v_u32m4_u8m4(v_even_32); + v_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_odd_32); + // Use round-to-nearest-up mode for averaging add + v_dst = __riscv_vaaddu_vv_u8m4(v_even, v_odd, vl * 4); + __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4); + w -= vl; + src += vl * 2; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWN2BOX_RVV +void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + size_t w = (size_t)dst_width; + const uint32_t* src0 = (const uint32_t*)(src_argb); + const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride); + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_row0_odd, v_row0_even, v_row1_odd, v_row1_even, v_dst; + vuint16m8_t v_row0_sum, v_row1_sum, v_dst_16; + vuint32m4_t v_row0_odd_32, v_row0_even_32, v_row1_odd_32, v_row1_even_32; + size_t vl = __riscv_vsetvl_e32m4(w); + __riscv_vlseg2e32_v_u32m4(&v_row0_even_32, &v_row0_odd_32, src0, vl); + __riscv_vlseg2e32_v_u32m4(&v_row1_even_32, &v_row1_odd_32, src1, vl); + v_row0_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_even_32); + v_row0_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_odd_32); + v_row1_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_even_32); + v_row1_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_odd_32); + v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_even, v_row0_odd, vl * 4); + v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_even, v_row1_odd, vl * 4); + v_dst_16 = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4); + // Use round-to-nearest-up mode for vnclip + v_dst = __riscv_vnclipu_wx_u8m4(v_dst_16, 2, vl * 4); + __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4); + w -= vl; + src0 += vl * 2; + src1 += vl * 2; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWNEVEN_RVV +void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { + size_t w = (size_t)dst_width; + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); + const int stride_byte = src_stepx * 4; + do { + size_t vl = __riscv_vsetvl_e32m8(w); + vuint32m8_t v_row = __riscv_vlse32_v_u32m8(src, stride_byte, vl); + __riscv_vse32_v_u32m8(dst, v_row, vl); + w -= vl; + src += vl * src_stepx; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEARGBROWDOWNEVENBOX_RVV +void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { + size_t w = (size_t)dst_width; + const uint32_t* src0 = (const uint32_t*)(src_argb); + const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride); + const int stride_byte = src_stepx * 4; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_row0_low, v_row0_high, v_row1_low, v_row1_high, v_dst; + vuint16m8_t v_row0_sum, v_row1_sum, v_sum; + vuint32m4_t v_row0_low_32, v_row0_high_32, v_row1_low_32, v_row1_high_32; + size_t vl = __riscv_vsetvl_e32m4(w); + __riscv_vlsseg2e32_v_u32m4(&v_row0_low_32, &v_row0_high_32, src0, + stride_byte, vl); + __riscv_vlsseg2e32_v_u32m4(&v_row1_low_32, &v_row1_high_32, src1, + stride_byte, vl); + v_row0_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_low_32); + v_row0_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_high_32); + v_row1_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_low_32); + v_row1_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_high_32); + v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_low, v_row0_high, vl * 4); + v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_low, v_row1_high, vl * 4); + v_sum = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4); + // Use round-to-nearest-up mode for vnclip + v_dst = __riscv_vnclipu_wx_u8m4(v_sum, 2, vl * 4); + __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4); + w -= vl; + src0 += vl * src_stepx; + src1 += vl * src_stepx; + dst_argb += vl * 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN2_RVV +void ScaleRowDown2_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + size_t w = (size_t)dst_width; + const uint16_t* src = (const uint16_t*)src_ptr; + (void)src_stride; + do { + size_t vl = __riscv_vsetvl_e16m8(w); + vuint16m8_t v_src = __riscv_vle16_v_u16m8(src, vl); + vuint8m4_t v_dst = __riscv_vnsrl_wx_u8m4(v_src, 8, vl); + __riscv_vse8_v_u8m4(dst, v_dst, vl); + w -= vl; + src += vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN2LINEAR_RVV +void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + size_t w = (size_t)dst_width; + (void)src_stride; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_s0, v_s1, v_dst; + size_t vl = __riscv_vsetvl_e8m4(w); + __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, src_ptr, vl); + // Use round-to-nearest-up mode for averaging add + v_dst = __riscv_vaaddu_vv_u8m4(v_s0, v_s1, vl); + __riscv_vse8_v_u8m4(dst, v_dst, vl); + w -= vl; + src_ptr += 2 * vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN2BOX_RVV +void ScaleRowDown2Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + size_t w = (size_t)dst_width; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + size_t vl = __riscv_vsetvl_e8m4(w); + vuint8m4_t v_s0, v_s1, v_t0, v_t1; + vuint16m8_t v_s01, v_t01, v_st01; + vuint8m4_t v_dst; + __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, s, vl); + __riscv_vlseg2e8_v_u8m4(&v_t0, &v_t1, t, vl); + v_s01 = __riscv_vwaddu_vv_u16m8(v_s0, v_s1, vl); + v_t01 = __riscv_vwaddu_vv_u16m8(v_t0, v_t1, vl); + v_st01 = __riscv_vadd_vv_u16m8(v_s01, v_t01, vl); + // Use round-to-nearest-up mode for vnclip + v_dst = __riscv_vnclipu_wx_u8m4(v_st01, 2, vl); + __riscv_vse8_v_u8m4(dst, v_dst, vl); + w -= vl; + s += 2 * vl; + t += 2 * vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN4_RVV +void ScaleRowDown4_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width; + (void)src_stride; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl); + __riscv_vse8_v_u8m2(dst_ptr, v_s2, vl); + w -= vl; + src_ptr += (4 * vl); + dst_ptr += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN4BOX_RVV +void ScaleRowDown4Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + const uint8_t* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr2 = src_ptr + src_stride * 2; + const uint8_t* src_ptr3 = src_ptr + src_stride * 3; + size_t w = (size_t)dst_width; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + vuint8m2_t v_t0, v_t1, v_t2, v_t3; + vuint8m2_t v_u0, v_u1, v_u2, v_u3; + vuint8m2_t v_v0, v_v1, v_v2, v_v3; + vuint16m4_t v_s01, v_s23, v_t01, v_t23; + vuint16m4_t v_u01, v_u23, v_v01, v_v23; + vuint16m4_t v_st01, v_st23, v_uv01, v_uv23; + vuint16m4_t v_st0123, v_uv0123, v_stuv0123; + vuint8m2_t v_dst; + size_t vl = __riscv_vsetvl_e8m2(w); + + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl); + v_s01 = __riscv_vwaddu_vv_u16m4(v_s0, v_s1, vl); + + __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, src_ptr1, vl); + v_t01 = __riscv_vwaddu_vv_u16m4(v_t0, v_t1, vl); + + __riscv_vlseg4e8_v_u8m2(&v_u0, &v_u1, &v_u2, &v_u3, src_ptr2, vl); + v_u01 = __riscv_vwaddu_vv_u16m4(v_u0, v_u1, vl); + v_u23 = __riscv_vwaddu_vv_u16m4(v_u2, v_u3, vl); + + v_s23 = __riscv_vwaddu_vv_u16m4(v_s2, v_s3, vl); + v_t23 = __riscv_vwaddu_vv_u16m4(v_t2, v_t3, vl); + v_st01 = __riscv_vadd_vv_u16m4(v_s01, v_t01, vl); + v_st23 = __riscv_vadd_vv_u16m4(v_s23, v_t23, vl); + + __riscv_vlseg4e8_v_u8m2(&v_v0, &v_v1, &v_v2, &v_v3, src_ptr3, vl); + + v_v01 = __riscv_vwaddu_vv_u16m4(v_v0, v_v1, vl); + v_v23 = __riscv_vwaddu_vv_u16m4(v_v2, v_v3, vl); + + v_uv01 = __riscv_vadd_vv_u16m4(v_u01, v_v01, vl); + v_uv23 = __riscv_vadd_vv_u16m4(v_u23, v_v23, vl); + + v_st0123 = __riscv_vadd_vv_u16m4(v_st01, v_st23, vl); + v_uv0123 = __riscv_vadd_vv_u16m4(v_uv01, v_uv23, vl); + v_stuv0123 = __riscv_vadd_vv_u16m4(v_st0123, v_uv0123, vl); + // Use round-to-nearest-up mode for vnclip + v_dst = __riscv_vnclipu_wx_u8m2(v_stuv0123, 4, vl); + __riscv_vse8_v_u8m2(dst_ptr, v_dst, vl); + w -= vl; + src_ptr += 4 * vl; + src_ptr1 += 4 * vl; + src_ptr2 += 4 * vl; + src_ptr3 += 4 * vl; + dst_ptr += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN34_RVV +void ScaleRowDown34_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + do { + size_t vl = __riscv_vsetvl_e8m2(w); + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl); + __riscv_vsseg3e8_v_u8m2(dst_ptr, v_s0, v_s1, v_s3, vl); + w -= vl; + src_ptr += 4 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN34_0_BOX_RVV +void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + vuint16m4_t v_t0_u16, v_t1_u16, v_t2_u16, v_t3_u16; + vuint8m2_t v_u0, v_u1, v_u2, v_u3; + vuint16m4_t v_u1_u16; + vuint8m2_t v_a0, v_a1, v_a2; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl); + + if (src_stride == 0) { + v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl); + v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl); + v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_s2, 2, vl); + v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_s3, 2, vl); + } else { + vuint8m2_t v_t0, v_t1, v_t2, v_t3; + __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl); + v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 0, vl); + v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 0, vl); + v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_t2, 0, vl); + v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_t3, 0, vl); + t += 4 * vl; + } + + v_t0_u16 = __riscv_vwmaccu_vx_u16m4(v_t0_u16, 3, v_s0, vl); + v_t1_u16 = __riscv_vwmaccu_vx_u16m4(v_t1_u16, 3, v_s1, vl); + v_t2_u16 = __riscv_vwmaccu_vx_u16m4(v_t2_u16, 3, v_s2, vl); + v_t3_u16 = __riscv_vwmaccu_vx_u16m4(v_t3_u16, 3, v_s3, vl); + + // Use round-to-nearest-up mode for vnclip & averaging add + v_u0 = __riscv_vnclipu_wx_u8m2(v_t0_u16, 2, vl); + v_u1 = __riscv_vnclipu_wx_u8m2(v_t1_u16, 2, vl); + v_u2 = __riscv_vnclipu_wx_u8m2(v_t2_u16, 2, vl); + v_u3 = __riscv_vnclipu_wx_u8m2(v_t3_u16, 2, vl); + + // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2 + v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u1, 0, vl); + v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u0, vl); + v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl); + + // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1 + v_a1 = __riscv_vaaddu_vv_u8m2(v_u1, v_u2, vl); + + // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2 + v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u2, 0, vl); + v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u3, vl); + v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl); + + __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl); + + w -= vl; + s += 4 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN34_1_BOX_RVV +void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m2_t v_s0, v_s1, v_s2, v_s3; + vuint8m2_t v_ave0, v_ave1, v_ave2, v_ave3; + vuint16m4_t v_u1_u16; + vuint8m2_t v_a0, v_a1, v_a2; + size_t vl = __riscv_vsetvl_e8m2(w); + __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl); + + // Use round-to-nearest-up mode for vnclip & averaging add + if (src_stride == 0) { + v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_s0, vl); + v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_s1, vl); + v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_s2, vl); + v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_s3, vl); + } else { + vuint8m2_t v_t0, v_t1, v_t2, v_t3; + __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl); + v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_t0, vl); + v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_t1, vl); + v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_t2, vl); + v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_t3, vl); + t += 4 * vl; + } + // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2 + v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave1, 0, vl); + v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave0, vl); + v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl); + + // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1 + v_a1 = __riscv_vaaddu_vv_u8m2(v_ave1, v_ave2, vl); + + // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2 + v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave2, 0, vl); + v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave3, vl); + v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl); + + __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl); + + w -= vl; + s += 4 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN38_RVV +void ScaleRowDown38_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + (void)src_stride; + assert(dst_width % 3 == 0); + do { + vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7; + size_t vl = __riscv_vsetvl_e8m1(w); + __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6, + &v_s7, src_ptr, vl); + __riscv_vsseg3e8_v_u8m1(dst_ptr, v_s0, v_s3, v_s6, vl); + w -= vl; + src_ptr += 8 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN38_2_BOX_RVV +void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + const uint16_t coeff_a = (65536u / 6u); + const uint16_t coeff_b = (65536u / 4u); + assert((dst_width % 3 == 0) && (dst_width > 0)); + do { + vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7; + vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7; + vuint16m2_t v_e0, v_e1, v_e2, v_e; + vuint16m2_t v_f0, v_f1, v_f2, v_f; + vuint16m2_t v_g0, v_g1, v_g; + vuint8m1_t v_dst_e, v_dst_f, v_dst_g; + size_t vl = __riscv_vsetvl_e8m1(w); + // s: e00, e10, e20, f00, f10, f20, g00, g10 + // t: e01, e11, e21, f01, f11, f21, g01, g11 + __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6, + &v_s7, src_ptr, vl); + __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6, + &v_t7, src_ptr + src_stride, vl); + // Calculate sum of [e00, e21] to v_e + // Calculate sum of [f00, f21] to v_f + // Calculate sum of [g00, g11] to v_g + v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl); + v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl); + v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl); + v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl); + v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl); + v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl); + v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl); + v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl); + + v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl); + v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl); + v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl); + v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl); + v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl); + + // Average in 16-bit fixed-point + v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl); + v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl); + v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl); + + v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl); + v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl); + v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl); + + __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl); + w -= vl; + src_ptr += 8 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEROWDOWN38_3_BOX_RVV +void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + size_t w = (size_t)dst_width / 3u; + const uint16_t coeff_a = (65536u / 9u); + const uint16_t coeff_b = (65536u / 6u); + assert((dst_width % 3 == 0) && (dst_width > 0)); + do { + vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7; + vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7; + vuint8m1_t v_u0, v_u1, v_u2, v_u3, v_u4, v_u5, v_u6, v_u7; + vuint16m2_t v_e0, v_e1, v_e2, v_e3, v_e4, v_e; + vuint16m2_t v_f0, v_f1, v_f2, v_f3, v_f4, v_f; + vuint16m2_t v_g0, v_g1, v_g2, v_g; + vuint8m1_t v_dst_e, v_dst_f, v_dst_g; + size_t vl = __riscv_vsetvl_e8m1(w); + // s: e00, e10, e20, f00, f10, f20, g00, g10 + // t: e01, e11, e21, f01, f11, f21, g01, g11 + // u: e02, e12, e22, f02, f12, f22, g02, g12 + __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6, + &v_s7, src_ptr, vl); + __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6, + &v_t7, src_ptr + src_stride, vl); + __riscv_vlseg8e8_v_u8m1(&v_u0, &v_u1, &v_u2, &v_u3, &v_u4, &v_u5, &v_u6, + &v_u7, src_ptr + 2 * src_stride, vl); + // Calculate sum of [e00, e22] + v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl); + v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl); + v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl); + v_e3 = __riscv_vwaddu_vv_u16m2(v_u0, v_u1, vl); + v_e4 = __riscv_vwaddu_vx_u16m2(v_u2, 0, vl); + + v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl); + v_e2 = __riscv_vadd_vv_u16m2(v_e2, v_e3, vl); + v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e4, vl); + v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl); + // Calculate sum of [f00, f22] + v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl); + v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl); + v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl); + v_f3 = __riscv_vwaddu_vv_u16m2(v_u3, v_u4, vl); + v_f4 = __riscv_vwaddu_vx_u16m2(v_u5, 0, vl); + + v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl); + v_f2 = __riscv_vadd_vv_u16m2(v_f2, v_f3, vl); + v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f4, vl); + v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl); + // Calculate sum of [g00, g12] + v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl); + v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl); + v_g2 = __riscv_vwaddu_vv_u16m2(v_u6, v_u7, vl); + + v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl); + v_g = __riscv_vadd_vv_u16m2(v_g, v_g2, vl); + + // Average in 16-bit fixed-point + v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl); + v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl); + v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl); + + v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl); + v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl); + v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl); + __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl); + w -= vl; + src_ptr += 8 * vl; + dst_ptr += 3 * vl; + } while (w > 0); +} +#endif + +// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms' +// ScaleRowUp2_(Bi)linear_Any_XXX. We process entire row in this function. Other +// platforms only implement non-edge part of image and process edge with scalar. + +#ifdef HAS_SCALEROWUP2_LINEAR_RVV +void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width) { + size_t work_width = (size_t)dst_width - 1u; + size_t src_width = work_width >> 1u; + const uint8_t* work_src_ptr = src_ptr; + uint8_t* work_dst_ptr = dst_ptr + 1; + size_t vl = __riscv_vsetvlmax_e8m4(); + vuint8m4_t v_3 = __riscv_vmv_v_x_u8m4(3, vl); + dst_ptr[0] = src_ptr[0]; + while (src_width > 0) { + vuint8m4_t v_src0, v_src1, v_dst_odd, v_dst_even; + vuint16m8_t v_src0_u16, v_src1_u16; + size_t vl = __riscv_vsetvl_e8m4(src_width); + v_src0 = __riscv_vle8_v_u8m4(work_src_ptr, vl); + v_src1 = __riscv_vle8_v_u8m4(work_src_ptr + 1, vl); + + v_src0_u16 = __riscv_vwaddu_vx_u16m8(v_src0, 2, vl); + v_src1_u16 = __riscv_vwaddu_vx_u16m8(v_src1, 2, vl); + v_src0_u16 = __riscv_vwmaccu_vv_u16m8(v_src0_u16, v_3, v_src1, vl); + v_src1_u16 = __riscv_vwmaccu_vv_u16m8(v_src1_u16, v_3, v_src0, vl); + + v_dst_odd = __riscv_vnsrl_wx_u8m4(v_src0_u16, 2, vl); + v_dst_even = __riscv_vnsrl_wx_u8m4(v_src1_u16, 2, vl); + + __riscv_vsseg2e8_v_u8m4(work_dst_ptr, v_dst_even, v_dst_odd, vl); + + src_width -= vl; + work_src_ptr += vl; + work_dst_ptr += 2 * vl; + } + dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; +} +#endif + +#ifdef HAS_SCALEROWUP2_BILINEAR_RVV +void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width) { + size_t work_width = ((size_t)dst_width - 1u) & ~1u; + size_t src_width = work_width >> 1u; + const uint8_t* work_s = src_ptr; + const uint8_t* work_t = src_ptr + src_stride; + const uint8_t* s = work_s; + const uint8_t* t = work_t; + uint8_t* d = dst_ptr; + uint8_t* e = dst_ptr + dst_stride; + uint8_t* work_d = d + 1; + uint8_t* work_e = e + 1; + size_t vl = __riscv_vsetvlmax_e16m4(); + vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl); + vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl); + d[0] = (3 * s[0] + t[0] + 2) >> 2; + e[0] = (s[0] + 3 * t[0] + 2) >> 2; + while (src_width > 0) { + vuint8m2_t v_s0, v_s1, v_t0, v_t1; + vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16; + vuint16m4_t v_t0_u16_, v_t1_u16_; + vuint8m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd; + size_t vl = __riscv_vsetvl_e8m2(src_width); + v_s0 = __riscv_vle8_v_u8m2(work_s, vl); + v_s1 = __riscv_vle8_v_u8m2(work_s + 1, vl); + + v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl); + v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl); + v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl); + v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl); + + v_t0 = __riscv_vle8_v_u8m2(work_t, vl); + v_t1 = __riscv_vle8_v_u8m2(work_t + 1, vl); + + v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl); + v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl); + v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl); + v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl); + + v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl); + v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl); + + v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl); + v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl); + v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl); + v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl); + + v_dst0_odd = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl); + v_dst0_even = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl); + v_dst1_odd = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl); + v_dst1_even = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl); + + __riscv_vsseg2e8_v_u8m2(work_d, v_dst0_even, v_dst0_odd, vl); + __riscv_vsseg2e8_v_u8m2(work_e, v_dst1_even, v_dst1_odd, vl); + + src_width -= vl; + work_s += vl; + work_t += vl; + work_d += 2 * vl; + work_e += 2 * vl; + } + d[dst_width - 1] = + (3 * s[(dst_width - 1) / 2] + t[(dst_width - 1) / 2] + 2) >> 2; + e[dst_width - 1] = + (s[(dst_width - 1) / 2] + 3 * t[(dst_width - 1) / 2] + 2) >> 2; +} +#endif + +#ifdef HAS_SCALEUVROWDOWN2_RVV +void ScaleUVRowDown2_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width) { + size_t w = (size_t)dst_width; + const uint32_t* src = (const uint32_t*)src_uv; + uint16_t* dst = (uint16_t*)dst_uv; + (void)src_stride; + do { + size_t vl = __riscv_vsetvl_e32m8(w); + vuint32m8_t v_data = __riscv_vle32_v_u32m8(src, vl); + vuint16m4_t v_u1v1 = __riscv_vnsrl_wx_u16m4(v_data, 16, vl); + __riscv_vse16_v_u16m4(dst, v_u1v1, vl); + w -= vl; + src += vl; + dst += vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEUVROWDOWN2LINEAR_RVV +void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width) { + size_t w = (size_t)dst_width; + const uint16_t* src = (const uint16_t*)src_uv; + (void)src_stride; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m4_t v_u0v0, v_u1v1, v_avg; + vuint16m4_t v_u0v0_16, v_u1v1_16; + size_t vl = __riscv_vsetvl_e16m4(w); + __riscv_vlseg2e16_v_u16m4(&v_u0v0_16, &v_u1v1_16, src, vl); + v_u0v0 = __riscv_vreinterpret_v_u16m4_u8m4(v_u0v0_16); + v_u1v1 = __riscv_vreinterpret_v_u16m4_u8m4(v_u1v1_16); + // Use round-to-nearest-up mode for averaging add + v_avg = __riscv_vaaddu_vv_u8m4(v_u0v0, v_u1v1, vl * 2); + __riscv_vse8_v_u8m4(dst_uv, v_avg, vl * 2); + w -= vl; + src += vl * 2; + dst_uv += vl * 2; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEUVROWDOWN2BOX_RVV +void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width) { + const uint8_t* src_uv_row1 = src_uv + src_stride; + size_t w = (size_t)dst_width; + // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode + // register) is set to round-to-nearest-up mode(0). + asm volatile("csrwi vxrm, 0"); + do { + vuint8m2_t v_u0_row0, v_v0_row0, v_u1_row0, v_v1_row0; + vuint8m2_t v_u0_row1, v_v0_row1, v_u1_row1, v_v1_row1; + vuint16m4_t v_u0u1_row0, v_u0u1_row1, v_v0v1_row0, v_v0v1_row1; + vuint16m4_t v_sum0, v_sum1; + vuint8m2_t v_dst_u, v_dst_v; + size_t vl = __riscv_vsetvl_e8m2(w); + + __riscv_vlseg4e8_v_u8m2(&v_u0_row0, &v_v0_row0, &v_u1_row0, &v_v1_row0, + src_uv, vl); + __riscv_vlseg4e8_v_u8m2(&v_u0_row1, &v_v0_row1, &v_u1_row1, &v_v1_row1, + src_uv_row1, vl); + + v_u0u1_row0 = __riscv_vwaddu_vv_u16m4(v_u0_row0, v_u1_row0, vl); + v_u0u1_row1 = __riscv_vwaddu_vv_u16m4(v_u0_row1, v_u1_row1, vl); + v_v0v1_row0 = __riscv_vwaddu_vv_u16m4(v_v0_row0, v_v1_row0, vl); + v_v0v1_row1 = __riscv_vwaddu_vv_u16m4(v_v0_row1, v_v1_row1, vl); + + v_sum0 = __riscv_vadd_vv_u16m4(v_u0u1_row0, v_u0u1_row1, vl); + v_sum1 = __riscv_vadd_vv_u16m4(v_v0v1_row0, v_v0v1_row1, vl); + // Use round-to-nearest-up mode for vnclip + v_dst_u = __riscv_vnclipu_wx_u8m2(v_sum0, 2, vl); + v_dst_v = __riscv_vnclipu_wx_u8m2(v_sum1, 2, vl); + + __riscv_vsseg2e8_v_u8m2(dst_uv, v_dst_u, v_dst_v, vl); + + dst_uv += 2 * vl; + src_uv += 4 * vl; + w -= vl; + src_uv_row1 += 4 * vl; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEUVROWDOWN4_RVV +void ScaleUVRowDown4_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width) { + // Overflow will never happen here, since sizeof(size_t)/sizeof(int)=2. + // dst_width = src_width / 4 and src_width is also int. + size_t w = (size_t)dst_width * 8; + (void)src_stride; + (void)src_stepx; + do { + size_t vl = __riscv_vsetvl_e8m8(w); + vuint8m8_t v_row = __riscv_vle8_v_u8m8(src_uv, vl); + vuint64m8_t v_row_64 = __riscv_vreinterpret_v_u8m8_u64m8(v_row); + // Narrowing without clipping + vuint32m4_t v_tmp = __riscv_vncvt_x_x_w_u32m4(v_row_64, vl / 8); + vuint16m2_t v_dst_16 = __riscv_vncvt_x_x_w_u16m2(v_tmp, vl / 8); + vuint8m2_t v_dst = __riscv_vreinterpret_v_u16m2_u8m2(v_dst_16); + __riscv_vse8_v_u8m2(dst_uv, v_dst, vl / 4); + w -= vl; + src_uv += vl; + dst_uv += vl / 4; + } while (w > 0); +} +#endif + +#ifdef HAS_SCALEUVROWDOWNEVEN_RVV +void ScaleUVRowDownEven_RVV(const uint8_t* src_uv, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width) { + size_t w = (size_t)dst_width; + const ptrdiff_t stride_byte = (ptrdiff_t)src_stepx * 2; + const uint16_t* src = (const uint16_t*)(src_uv); + uint16_t* dst = (uint16_t*)(dst_uv); + (void)src_stride; + do { + size_t vl = __riscv_vsetvl_e16m8(w); + vuint16m8_t v_row = __riscv_vlse16_v_u16m8(src, stride_byte, vl); + __riscv_vse16_v_u16m8(dst, v_row, vl); + w -= vl; + src += vl * src_stepx; + dst += vl; + } while (w > 0); +} +#endif + +// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms' +// ScaleUVRowUp2_(Bi)linear_Any_XXX. We process entire row in this function. +// Other platforms only implement non-edge part of image and process edge with +// scalar. + +#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV +void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width) { + size_t work_width = ((size_t)dst_width - 1u) & ~1u; + uint16_t* work_dst_ptr = (uint16_t*)dst_ptr + 1; + const uint8_t* work_src_ptr = src_ptr; + size_t vl = __riscv_vsetvlmax_e8m4(); + vuint8m4_t v_3_u8 = __riscv_vmv_v_x_u8m4(3, vl); + dst_ptr[0] = src_ptr[0]; + dst_ptr[1] = src_ptr[1]; + while (work_width > 0) { + vuint8m4_t v_uv0, v_uv1, v_dst_odd_u8, v_dst_even_u8; + vuint16m4_t v_dst_odd, v_dst_even; + vuint16m8_t v_uv0_u16, v_uv1_u16; + size_t vl = __riscv_vsetvl_e8m4(work_width); + v_uv0 = __riscv_vle8_v_u8m4(work_src_ptr, vl); + v_uv1 = __riscv_vle8_v_u8m4(work_src_ptr + 2, vl); + + v_uv0_u16 = __riscv_vwaddu_vx_u16m8(v_uv0, 2, vl); + v_uv1_u16 = __riscv_vwaddu_vx_u16m8(v_uv1, 2, vl); + + v_uv0_u16 = __riscv_vwmaccu_vv_u16m8(v_uv0_u16, v_3_u8, v_uv1, vl); + v_uv1_u16 = __riscv_vwmaccu_vv_u16m8(v_uv1_u16, v_3_u8, v_uv0, vl); + + v_dst_odd_u8 = __riscv_vnsrl_wx_u8m4(v_uv0_u16, 2, vl); + v_dst_even_u8 = __riscv_vnsrl_wx_u8m4(v_uv1_u16, 2, vl); + + v_dst_even = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_even_u8); + v_dst_odd = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_odd_u8); + + __riscv_vsseg2e16_v_u16m4(work_dst_ptr, v_dst_even, v_dst_odd, vl / 2); + + work_width -= vl; + work_src_ptr += vl; + work_dst_ptr += vl; + } + dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; + dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; +} +#endif + +#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV +void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width) { + size_t work_width = ((size_t)dst_width - 1u) & ~1u; + const uint8_t* work_s = src_ptr; + const uint8_t* work_t = src_ptr + src_stride; + const uint8_t* s = work_s; + const uint8_t* t = work_t; + uint8_t* d = dst_ptr; + uint8_t* e = dst_ptr + dst_stride; + uint16_t* work_d = (uint16_t*)d + 1; + uint16_t* work_e = (uint16_t*)e + 1; + size_t vl = __riscv_vsetvlmax_e16m4(); + vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl); + vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl); + d[0] = (3 * s[0] + t[0] + 2) >> 2; + e[0] = (s[0] + 3 * t[0] + 2) >> 2; + d[1] = (3 * s[1] + t[1] + 2) >> 2; + e[1] = (s[1] + 3 * t[1] + 2) >> 2; + while (work_width > 0) { + vuint8m2_t v_s0, v_s1, v_t0, v_t1; + vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16; + vuint16m4_t v_t0_u16_, v_t1_u16_; + vuint8m2_t v_dst0_odd_u8, v_dst0_even_u8, v_dst1_odd_u8, v_dst1_even_u8; + vuint16m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd; + size_t vl = __riscv_vsetvl_e8m2(work_width); + v_s0 = __riscv_vle8_v_u8m2(work_s, vl); + v_s1 = __riscv_vle8_v_u8m2(work_s + 2, vl); + + v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl); + v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl); + v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl); + v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl); + + v_t0 = __riscv_vle8_v_u8m2(work_t, vl); + v_t1 = __riscv_vle8_v_u8m2(work_t + 2, vl); + + v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl); + v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl); + v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl); + v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl); + + v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl); + v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl); + + v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl); + v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl); + v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl); + v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl); + + v_dst0_odd_u8 = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl); + v_dst0_even_u8 = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl); + v_dst1_odd_u8 = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl); + v_dst1_even_u8 = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl); + + v_dst0_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_even_u8); + v_dst0_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_odd_u8); + v_dst1_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_even_u8); + v_dst1_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_odd_u8); + + __riscv_vsseg2e16_v_u16m2(work_d, v_dst0_even, v_dst0_odd, vl / 2); + __riscv_vsseg2e16_v_u16m2(work_e, v_dst1_even, v_dst1_odd, vl / 2); + + work_width -= vl; + work_s += vl; + work_t += vl; + work_d += vl; + work_e += vl; + } + d[2 * dst_width - 2] = + (3 * s[((dst_width + 1) & ~1) - 2] + t[((dst_width + 1) & ~1) - 2] + 2) >> + 2; + e[2 * dst_width - 2] = + (s[((dst_width + 1) & ~1) - 2] + 3 * t[((dst_width + 1) & ~1) - 2] + 2) >> + 2; + d[2 * dst_width - 1] = + (3 * s[((dst_width + 1) & ~1) - 1] + t[((dst_width + 1) & ~1) - 1] + 2) >> + 2; + e[2 * dst_width - 1] = + (s[((dst_width + 1) & ~1) - 1] + 3 * t[((dst_width + 1) & ~1) - 1] + 2) >> + 2; +} +#endif + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && + // defined(__clang__) diff --git a/files/source/scale_uv.cc b/source/scale_uv.cc index 3b3d7b8e..0931c89a 100644 --- a/files/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; } else { - src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2; } #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) @@ -112,6 +112,31 @@ static void ScaleUVDown2(int src_width, } } #endif +#if defined(HAS_SCALEUVROWDOWN2_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleUVRowDown2 = + filtering == kFilterNone + ? ScaleUVRowDown2_Any_NEON + : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON + : ScaleUVRowDown2Box_Any_NEON); + if (IS_ALIGNED(dst_width, 8)) { + ScaleUVRowDown2 = + filtering == kFilterNone + ? ScaleUVRowDown2_NEON + : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON + : ScaleUVRowDown2Box_NEON); + } + } +#endif +#if defined(HAS_SCALEUVROWDOWN2_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleUVRowDown2 = + filtering == kFilterNone + ? ScaleUVRowDown2_RVV + : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_RVV + : ScaleUVRowDown2Box_RVV); + } +#endif // This code is not enabled. Only box filter is available at this time. #if defined(HAS_SCALEUVROWDOWN2_SSSE3) @@ -130,23 +155,7 @@ static void ScaleUVDown2(int src_width, } } #endif -// This code is not enabled. Only box filter is available at this time. -#if defined(HAS_SCALEUVROWDOWN2_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ScaleUVRowDown2 = - filtering == kFilterNone - ? ScaleUVRowDown2_Any_NEON - : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON - : ScaleUVRowDown2Box_Any_NEON); - if (IS_ALIGNED(dst_width, 8)) { - ScaleUVRowDown2 = - filtering == kFilterNone - ? ScaleUVRowDown2_NEON - : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON - : ScaleUVRowDown2Box_NEON); - } - } -#endif + #if defined(HAS_SCALEUVROWDOWN2_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleUVRowDown2 = @@ -179,28 +188,30 @@ static void ScaleUVDown2(int src_width, // This is an optimized version for scaling down a UV to 1/4 of // its original size. #if HAS_SCALEUVDOWN4BOX -static void ScaleUVDown4Box(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy) { +static int ScaleUVDown4Box(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy) { int j; // Allocate 2 rows of UV. - const int kRowSize = (dst_width * 2 * 2 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 2 * 2 + 15) & ~15; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; int row_stride = src_stride * (dy >> 16); void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) = ScaleUVRowDown2Box_C; // Advance to odd row, even column. - src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; (void)src_width; (void)src_height; (void)dx; @@ -231,16 +242,22 @@ static void ScaleUVDown4Box(int src_width, } } #endif +#if defined(HAS_SCALEUVROWDOWN2BOX_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ScaleUVRowDown2 = ScaleUVRowDown2Box_RVV; + } +#endif for (j = 0; j < dst_height; ++j) { ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2); - ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize, + ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + row_size, dst_width * 2); - ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width); + ScaleUVRowDown2(row, row_size, dst_uv, dst_width); src_uv += row_stride; dst_uv += dst_stride; } free_aligned_buffer_64(row); + return 0; } #endif // HAS_SCALEUVDOWN4BOX @@ -263,7 +280,7 @@ static void ScaleUVDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - int row_stride = (dy >> 16) * (int64_t)src_stride; + ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride); void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride, int src_step, uint8_t* dst_uv, int dst_width) = filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C; @@ -271,7 +288,7 @@ static void ScaleUVDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3 @@ -310,6 +327,12 @@ static void ScaleUVDownEven(int src_width, } } #endif +#if defined(HAS_SCALEUVROWDOWNEVEN_RVV) + if (TestCpuFlag(kCpuHasRVV) && !filtering) { + ScaleUVRowDownEven = + (col_step == 4) ? ScaleUVRowDown4_RVV : ScaleUVRowDownEven_RVV; + } +#endif if (filtering == kFilterLinear) { src_stride = 0; @@ -324,24 +347,24 @@ static void ScaleUVDownEven(int src_width, // Scale UV down with bilinear interpolation. #if HAS_SCALEUVBILINEARDOWN -static void ScaleUVBilinearDown(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleUVBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv, + void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv, + void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C; int64_t xlast = x + (int64_t)(dst_width - 1) * dx; @@ -397,6 +420,11 @@ static void ScaleUVBilinearDown(int src_width, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif #if defined(HAS_SCALEUVFILTERCOLS_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleUVFilterCols = ScaleUVFilterCols_SSSE3; @@ -421,15 +449,16 @@ static void ScaleUVBilinearDown(int src_width, // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row of UV. { - align_buffer_64(row, clip_src_width * 2); - const int max_y = (src_height - 1) << 16; + align_buffer_64(row, clip_src_width * 2); + if (!row) + return 1; if (y > max_y) { y = max_y; } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_uv + yi * (int64_t)src_stride; + const uint8_t* src = src_uv + yi * (intptr_t)src_stride; if (filtering == kFilterLinear) { ScaleUVFilterCols(dst_uv, src, dst_width, x, dx); } else { @@ -445,29 +474,30 @@ static void ScaleUVBilinearDown(int src_width, } free_aligned_buffer_64(row); } + return 0; } #endif // Scale UV up with bilinear interpolation. #if HAS_SCALEUVBILINEARUP -static void ScaleUVBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { +static int ScaleUVBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv, + void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv, + void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width, int x, int dx) = filtering ? ScaleUVFilterCols_C : ScaleUVCols_C; const int max_y = (src_height - 1) << 16; @@ -511,6 +541,11 @@ static void ScaleUVBilinearUp(int src_width, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif if (src_width >= 32768) { ScaleUVFilterCols = filtering ? ScaleUVFilterCols64_C : ScaleUVCols64_C; } @@ -571,14 +606,16 @@ static void ScaleUVBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_uv + yi * (int64_t)src_stride; + const uint8_t* src = src_uv + yi * (intptr_t)src_stride; // Allocate 2 rows of UV. - const int kRowSize = (dst_width * 2 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); + const int row_size = (dst_width * 2 + 15) & ~15; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; uint8_t* rowptr = row; - int rowstride = kRowSize; + int rowstride = row_size; int lasty = yi; ScaleUVFilterCols(rowptr, src, dst_width, x, dx); @@ -596,7 +633,7 @@ static void ScaleUVBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_uv + yi * (int64_t)src_stride; + src = src_uv + yi * (intptr_t)src_stride; } if (yi != lasty) { ScaleUVFilterCols(rowptr, src, dst_width, x, dx); @@ -619,6 +656,7 @@ static void ScaleUVBilinearUp(int src_width, } free_aligned_buffer_64(row); } + return 0; } #endif // HAS_SCALEUVBILINEARUP @@ -627,14 +665,14 @@ static void ScaleUVBilinearUp(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original width, using linear interpolation. // This is used to scale U and V planes of NV16 to NV24. -void ScaleUVLinearUp2(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_uv, - uint8_t* dst_uv) { +static void ScaleUVLinearUp2(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_uv, + uint8_t* dst_uv) { void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) = ScaleUVRowUp2_Linear_Any_C; int i; @@ -644,32 +682,38 @@ void ScaleUVLinearUp2(int src_width, // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); -#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3 +#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3 if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowUp = ScaleUVRowUp2_Linear_Any_SSSE3; } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2 +#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleUVRowUp2_Linear_Any_AVX2; } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_NEON +#ifdef HAS_SCALEUVROWUP2_LINEAR_NEON if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleUVRowUp2_Linear_Any_NEON; } #endif +#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV + if (TestCpuFlag(kCpuHasRVV)) { + ScaleRowUp = ScaleUVRowUp2_Linear_RVV; + } +#endif + if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, + ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -680,14 +724,14 @@ void ScaleUVLinearUp2(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original size, using bilinear interpolation. // This is used to scale U and V planes of NV12 to NV24. -void ScaleUVBilinearUp2(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint8_t* src_ptr, - uint8_t* dst_ptr) { +static void ScaleUVBilinearUp2(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_Any_C; @@ -697,24 +741,30 @@ void ScaleUVBilinearUp2(int src_width, assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); -#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3 if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_SSSE3; } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_AVX2; } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_NEON +#ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_NEON; } #endif +#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV + if (TestCpuFlag(kCpuHasRVV)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_RVV; + } +#endif + Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width); dst_ptr += dst_stride; for (x = 0; x < src_height - 1; ++x) { @@ -734,14 +784,14 @@ void ScaleUVBilinearUp2(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original width, using linear interpolation. // This is used to scale U and V planes of P210 to P410. -void ScaleUVLinearUp2_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_uv, - uint16_t* dst_uv) { +static void ScaleUVLinearUp2_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_uv, + uint16_t* dst_uv) { void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; int i; @@ -751,32 +801,32 @@ void ScaleUVLinearUp2_16(int src_width, // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); -#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; } #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, + ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -787,14 +837,14 @@ void ScaleUVLinearUp2_16(int src_width, // This is an optimized version for scaling up a plane to 2 times of // its original size, using bilinear interpolation. // This is used to scale U and V planes of P010 to P410. -void ScaleUVBilinearUp2_16(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_ptr, - uint16_t* dst_ptr) { +static void ScaleUVBilinearUp2_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; @@ -804,19 +854,19 @@ void ScaleUVBilinearUp2_16(int src_width, assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; } @@ -854,7 +904,7 @@ static void ScaleUVSimple(int src_width, int y, int dy) { int j; - void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width, + void (*ScaleUVCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C; (void)src_height; @@ -889,7 +939,7 @@ static void ScaleUVSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x, + ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x, dx); dst_uv += dst_stride; y += dy; @@ -910,7 +960,7 @@ static int UVCopy(const uint8_t* src_uv, // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv; + src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -930,7 +980,7 @@ static int UVCopy_16(const uint16_t* src_uv, // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv; + src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -942,19 +992,19 @@ static int UVCopy_16(const uint16_t* src_uv, // Scale a UV plane (from NV12) // This function in turn calls a scaling function // suitable for handling the desired resolutions. -static void ScaleUV(const uint8_t* src, - int src_stride, - int src_width, - int src_height, - uint8_t* dst, - int dst_stride, - int dst_width, - int dst_height, - int clip_x, - int clip_y, - int clip_width, - int clip_height, - enum FilterMode filtering) { +static int ScaleUV(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, + enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; @@ -968,7 +1018,7 @@ static void ScaleUV(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (int64_t)src_stride; + src = src + (src_height - 1) * (intptr_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -983,7 +1033,7 @@ static void ScaleUV(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * (int64_t)src_stride; + src += (clipf >> 16) * (intptr_t)src_stride; dst += clip_y * dst_stride; } @@ -1000,22 +1050,22 @@ static void ScaleUV(const uint8_t* src, ScaleUVDown2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; } #endif #if HAS_SCALEUVDOWN4BOX if (dx == 0x40000 && filtering == kFilterBox) { // Optimized 1/4 box downsample. - ScaleUVDown4Box(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy); - return; + return ScaleUVDown4Box(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, + dy); } #endif #if HAS_SCALEUVDOWNEVEN ScaleUVDownEven(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); - return; + return 0; #endif } // Optimized odd scale down. ie 3, 5, 7, 9x. @@ -1024,9 +1074,9 @@ static void ScaleUV(const uint8_t* src, #ifdef HAS_UVCOPY if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2, + UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2, src_stride, dst, dst_stride, clip_width, clip_height); - return; + return 0; } #endif } @@ -1037,38 +1087,37 @@ static void ScaleUV(const uint8_t* src, // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, y, dy, /*bpp=*/2, filtering); - return; + return 0; } - if (filtering && (dst_width + 1) / 2 == src_width) { + if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) { ScaleUVLinearUp2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst); - return; + return 0; } if ((clip_height + 1) / 2 == src_height && (clip_width + 1) / 2 == src_width && (filtering == kFilterBilinear || filtering == kFilterBox)) { ScaleUVBilinearUp2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst); - return; + return 0; } #if HAS_SCALEUVBILINEARUP if (filtering && dy < 65536) { - ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } #endif #if HAS_SCALEUVBILINEARDOWN if (filtering) { - ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, x, dx, y, dy, - filtering); - return; + return ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); } #endif ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy); + return 0; } // Scale an UV image. @@ -1086,9 +1135,9 @@ int UVScale(const uint8_t* src_uv, src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) { return -1; } - ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv, - dst_width, dst_height, 0, 0, dst_width, dst_height, filtering); - return 0; + return ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, + dst_stride_uv, dst_width, dst_height, 0, 0, dst_width, + dst_height, filtering); } // Scale a 16 bit UV image. @@ -1118,7 +1167,7 @@ int UVScale_16(const uint16_t* src_uv, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv; + src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } src_width = Abs(src_width); @@ -1126,20 +1175,20 @@ int UVScale_16(const uint16_t* src_uv, #ifdef HAS_UVCOPY if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) { if (dst_height == 1) { - UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv, + UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv, src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height); } else { dy = src_height / dst_height; - UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv, - dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width, - dst_height); + UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv, + (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv, + dst_width, dst_height); } return 0; } #endif - if (filtering && (dst_width + 1) / 2 == src_width) { + if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) { ScaleUVLinearUp2_16(src_width, src_height, dst_width, dst_height, src_stride_uv, dst_stride_uv, src_uv, dst_uv); return 0; diff --git a/files/source/scale_win.cc b/source/scale_win.cc index ea1f95c6..ea1f95c6 100644 --- a/files/source/scale_win.cc +++ b/source/scale_win.cc diff --git a/files/source/test.sh b/source/test.sh index 7f12c3c1..7f12c3c1 100755 --- a/files/source/test.sh +++ b/source/test.sh diff --git a/files/source/video_common.cc b/source/video_common.cc index 92384c05..92384c05 100644 --- a/files/source/video_common.cc +++ b/source/video_common.cc diff --git a/tools_libyuv/OWNERS b/tools_libyuv/OWNERS new file mode 100644 index 00000000..aae4fb6e --- /dev/null +++ b/tools_libyuv/OWNERS @@ -0,0 +1,4 @@ +mbonadei@chromium.org +fbarchard@chromium.org +pbos@chromium.org + diff --git a/tools_libyuv/autoroller/roll_deps.py b/tools_libyuv/autoroller/roll_deps.py new file mode 100755 index 00000000..d5c1089f --- /dev/null +++ b/tools_libyuv/autoroller/roll_deps.py @@ -0,0 +1,822 @@ +#!/usr/bin/env vpython3 + +# Copyright (c) 2017 The LibYUV project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Script to automatically roll dependencies in the LibYUV DEPS file.""" + + +import argparse +import base64 +import collections +import logging +import os +import re +import subprocess +import sys +import urllib.request + + +def FindSrcDirPath(): + """Returns the abs path to the src/ dir of the project.""" + src_dir = os.path.dirname(os.path.abspath(__file__)) + while os.path.basename(src_dir) != 'src': + src_dir = os.path.normpath(os.path.join(src_dir, os.pardir)) + return src_dir + + +# Skip these dependencies (list without solution name prefix). +DONT_AUTOROLL_THESE = [ + 'third_party/fuchsia-gn-sdk', + 'src/third_party/gflags/src', + 'src/third_party/mockito/src', +] + +# These dependencies are missing in chromium/src/DEPS, either unused or already +# in-tree. For instance, src/base is a part of the Chromium source git repo, +# but we pull it through a subtree mirror, so therefore it isn't listed in +# Chromium's deps but it is in ours. +LIBYUV_ONLY_DEPS = [ + 'src/base', + 'src/build', + 'src/buildtools', + 'src/ios', + 'src/testing', + 'src/third_party', + 'src/third_party/android_support_test_runner', + 'src/third_party/bazel', + 'src/third_party/bouncycastle', + 'src/third_party/errorprone/lib', + 'src/third_party/findbugs', + 'src/third_party/gson', + 'src/third_party/gtest-parallel', + 'src/third_party/guava', + 'src/third_party/intellij', + 'src/third_party/jsr-305/src', + 'src/third_party/ow2_asm', + 'src/third_party/proguard', + 'src/third_party/ub-uiautomator/lib', + 'src/tools', + 'src/tools/clang/dsymutil', +] + +LIBYUV_URL = 'https://chromium.googlesource.com/libyuv/libyuv' +CHROMIUM_SRC_URL = 'https://chromium.googlesource.com/chromium/src' +CHROMIUM_COMMIT_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s' +CHROMIUM_LOG_TEMPLATE = CHROMIUM_SRC_URL + '/+log/%s' +CHROMIUM_FILE_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s/%s' + +COMMIT_POSITION_RE = re.compile('^Cr-Commit-Position: .*#([0-9]+).*$') +CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'([-0-9a-z]+)\'$') +ROLL_BRANCH_NAME = 'roll_chromium_revision' + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +CHECKOUT_SRC_DIR = FindSrcDirPath() +CHECKOUT_ROOT_DIR = os.path.realpath(os.path.join(CHECKOUT_SRC_DIR, os.pardir)) + +# Copied from tools/android/roll/android_deps/.../BuildConfigGenerator.groovy. +ANDROID_DEPS_START = r'=== ANDROID_DEPS Generated Code Start ===' +ANDROID_DEPS_END = r'=== ANDROID_DEPS Generated Code End ===' +# Location of automically gathered android deps. +ANDROID_DEPS_PATH = 'src/third_party/android_deps/' + +sys.path.append(os.path.join(CHECKOUT_SRC_DIR, 'build')) +import find_depot_tools + +find_depot_tools.add_depot_tools_to_path() + +CLANG_UPDATE_SCRIPT_URL_PATH = 'tools/clang/scripts/update.py' +CLANG_UPDATE_SCRIPT_LOCAL_PATH = os.path.join(CHECKOUT_SRC_DIR, 'tools', + 'clang', 'scripts', 'update.py') + +DepsEntry = collections.namedtuple('DepsEntry', 'path url revision') +ChangedDep = collections.namedtuple('ChangedDep', + 'path url current_rev new_rev') +CipdDepsEntry = collections.namedtuple('CipdDepsEntry', 'path packages') +VersionEntry = collections.namedtuple('VersionEntry', 'version') +ChangedCipdPackage = collections.namedtuple( + 'ChangedCipdPackage', 'path package current_version new_version') +ChangedVersionEntry = collections.namedtuple( + 'ChangedVersionEntry', 'path current_version new_version') + +ChromiumRevisionUpdate = collections.namedtuple('ChromiumRevisionUpdate', + ('current_chromium_rev ' + 'new_chromium_rev ')) + + +class RollError(Exception): + pass + + +def StrExpansion(): + return lambda str_value: str_value + + +def VarLookup(local_scope): + return lambda var_name: local_scope['vars'][var_name] + + +def ParseDepsDict(deps_content): + local_scope = {} + global_scope = { + 'Str': StrExpansion(), + 'Var': VarLookup(local_scope), + 'deps_os': {}, + } + exec(deps_content, global_scope, local_scope) + return local_scope + + +def ParseLocalDepsFile(filename): + with open(filename, 'rb') as f: + deps_content = f.read().decode('utf-8') + return ParseDepsDict(deps_content) + + +def ParseCommitPosition(commit_message): + for line in reversed(commit_message.splitlines()): + m = COMMIT_POSITION_RE.match(line.strip()) + if m: + return int(m.group(1)) + logging.error('Failed to parse commit position id from:\n%s\n', + commit_message) + sys.exit(-1) + + +def _RunCommand(command, + working_dir=None, + ignore_exit_code=False, + extra_env=None, + input_data=None): + """Runs a command and returns the output from that command. + + If the command fails (exit code != 0), the function will exit the process. + + Returns: + A tuple containing the stdout and stderr outputs as strings. + """ + working_dir = working_dir or CHECKOUT_SRC_DIR + logging.debug('CMD: %s CWD: %s', ' '.join(command), working_dir) + env = os.environ.copy() + if extra_env: + assert all(isinstance(value, str) for value in extra_env.values()) + logging.debug('extra env: %s', extra_env) + env.update(extra_env) + p = subprocess.Popen(command, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + cwd=working_dir, + universal_newlines=True) + std_output, err_output = p.communicate(input_data) + p.stdout.close() + p.stderr.close() + if not ignore_exit_code and p.returncode != 0: + logging.error('Command failed: %s\n' + 'stdout:\n%s\n' + 'stderr:\n%s\n', ' '.join(command), std_output, err_output) + sys.exit(p.returncode) + return std_output, err_output + + +def _GetBranches(): + """Returns a tuple of active,branches. + + The 'active' is the name of the currently active branch and 'branches' is a + list of all branches. + """ + lines = _RunCommand(['git', 'branch'])[0].split('\n') + branches = [] + active = '' + for line in lines: + if '*' in line: + # The assumption is that the first char will always be the '*'. + active = line[1:].strip() + branches.append(active) + else: + branch = line.strip() + if branch: + branches.append(branch) + return active, branches + + +def _ReadGitilesContent(url): + # Download and decode BASE64 content until + # https://code.google.com/p/gitiles/issues/detail?id=7 is fixed. + base64_content = ReadUrlContent(url + '?format=TEXT') + return base64.b64decode(base64_content[0]).decode('utf-8') + + +def ReadRemoteCrFile(path_below_src, revision): + """Reads a remote Chromium file of a specific revision. + + Args: + path_below_src: A path to the target file relative to src dir. + revision: Revision to read. + Returns: + A string with file content. + """ + return _ReadGitilesContent(CHROMIUM_FILE_TEMPLATE % + (revision, path_below_src)) + + +def ReadRemoteCrCommit(revision): + """Reads a remote Chromium commit message. Returns a string.""" + return _ReadGitilesContent(CHROMIUM_COMMIT_TEMPLATE % revision) + + +def ReadUrlContent(url): + """Connect to a remote host and read the contents. + + Args: + url: URL to connect to. + Returns: + A list of lines. + """ + conn = urllib.request.urlopen(url) + try: + return conn.readlines() + except IOError as e: + logging.exception('Error connecting to %s. Error: %s', url, e) + raise + finally: + conn.close() + + +def GetMatchingDepsEntries(depsentry_dict, dir_path): + """Gets all deps entries matching the provided path. + + This list may contain more than one DepsEntry object. + Example: dir_path='src/testing' would give results containing both + 'src/testing/gtest' and 'src/testing/gmock' deps entries for Chromium's + DEPS. + Example 2: dir_path='src/build' should return 'src/build' but not + 'src/buildtools'. + + Returns: + A list of DepsEntry objects. + """ + result = [] + for path, depsentry in depsentry_dict.items(): + if path == dir_path: + result.append(depsentry) + else: + parts = path.split('/') + if all(part == parts[i] for i, part in enumerate(dir_path.split('/'))): + result.append(depsentry) + return result + + +def BuildDepsentryDict(deps_dict): + """Builds a dict of paths to DepsEntry objects from a raw deps dict.""" + result = {} + + def AddDepsEntries(deps_subdict): + for path, dep in deps_subdict.items(): + if path in result: + continue + if not isinstance(dep, dict): + dep = {'url': dep} + if dep.get('dep_type') == 'cipd': + result[path] = CipdDepsEntry(path, dep['packages']) + else: + if '@' not in dep['url']: + continue + url, revision = dep['url'].split('@') + result[path] = DepsEntry(path, url, revision) + + def AddVersionEntry(vars_subdict): + for key, value in vars_subdict.items(): + if key in result: + continue + if not key.endswith('_version'): + continue + key = re.sub('_version$', '', key) + result[key] = VersionEntry(value) + + AddDepsEntries(deps_dict['deps']) + for deps_os in ['win', 'mac', 'unix', 'android', 'ios', 'unix']: + AddDepsEntries(deps_dict.get('deps_os', {}).get(deps_os, {})) + AddVersionEntry(deps_dict.get('vars', {})) + return result + + +def _FindChangedCipdPackages(path, old_pkgs, new_pkgs): + old_pkgs_names = {p['package'] for p in old_pkgs} + new_pkgs_names = {p['package'] for p in new_pkgs} + pkgs_equal = (old_pkgs_names == new_pkgs_names) + added_pkgs = [p for p in new_pkgs_names if p not in old_pkgs_names] + removed_pkgs = [p for p in old_pkgs_names if p not in new_pkgs_names] + + assert pkgs_equal, ('Old: %s\n New: %s.\nYou need to do a manual roll ' + 'and remove/add entries in DEPS so the old and new ' + 'list match.\nMost likely, you should add \"%s\" and ' + 'remove \"%s\"' % + (old_pkgs, new_pkgs, added_pkgs, removed_pkgs)) + + for old_pkg in old_pkgs: + for new_pkg in new_pkgs: + old_version = old_pkg['version'] + new_version = new_pkg['version'] + if (old_pkg['package'] == new_pkg['package'] + and old_version != new_version): + logging.debug('Roll dependency %s to %s', path, new_version) + yield ChangedCipdPackage(path, old_pkg['package'], old_version, + new_version) + + +def _FindChangedVars(name, old_version, new_version): + if old_version != new_version: + logging.debug('Roll dependency %s to %s', name, new_version) + yield ChangedVersionEntry(name, old_version, new_version) + + +def _FindNewDeps(old, new): + """ Gather dependencies only in `new` and return corresponding paths. """ + old_entries = set(BuildDepsentryDict(old)) + new_entries = set(BuildDepsentryDict(new)) + return [ + path for path in new_entries - old_entries + if path not in DONT_AUTOROLL_THESE + ] + + +def FindAddedDeps(libyuv_deps, new_cr_deps): + """ + Calculate new deps entries of interest. + + Ideally, that would mean: only appearing in chromium DEPS + but transitively used in LibYUV. + + Since it's hard to compute, we restrict ourselves to a well defined subset: + deps sitting in `ANDROID_DEPS_PATH`. + Otherwise, assumes that's a Chromium-only dependency. + + Args: + libyuv_deps: dict of deps as defined in the LibYUV DEPS file. + new_cr_deps: dict of deps as defined in the chromium DEPS file. + + Caveat: Doesn't detect a new package in existing dep. + + Returns: + A tuple consisting of: + A list of paths added dependencies sitting in `ANDROID_DEPS_PATH`. + A list of paths for other added dependencies. + """ + all_added_deps = _FindNewDeps(libyuv_deps, new_cr_deps) + generated_android_deps = [ + path for path in all_added_deps if path.startswith(ANDROID_DEPS_PATH) + ] + other_deps = [ + path for path in all_added_deps if path not in generated_android_deps + ] + return generated_android_deps, other_deps + + +def FindRemovedDeps(libyuv_deps, new_cr_deps): + """ + Calculate obsolete deps entries. + + Ideally, that would mean: no more appearing in chromium DEPS + and not used in LibYUV. + + Since it's hard to compute: + 1/ We restrict ourselves to a well defined subset: + deps sitting in `ANDROID_DEPS_PATH`. + 2/ We rely on existing behavior of CalculateChangeDeps. + I.e. Assumes non-CIPD dependencies are LibYUV-only, don't remove them. + + Args: + libyuv_deps: dict of deps as defined in the LibYUV DEPS file. + new_cr_deps: dict of deps as defined in the chromium DEPS file. + + Caveat: Doesn't detect a deleted package in existing dep. + + Returns: + A tuple consisting of: + A list of paths of dependencies removed from `ANDROID_DEPS_PATH`. + A list of paths of unexpected disappearing dependencies. + """ + all_removed_deps = _FindNewDeps(new_cr_deps, libyuv_deps) + generated_android_deps = sorted( + [path for path in all_removed_deps if path.startswith(ANDROID_DEPS_PATH)]) + # Webrtc-only dependencies are handled in CalculateChangedDeps. + other_deps = sorted([ + path for path in all_removed_deps + if path not in generated_android_deps and path not in LIBYUV_ONLY_DEPS + ]) + return generated_android_deps, other_deps + + +def CalculateChangedDeps(libyuv_deps, new_cr_deps): + """ + Calculate changed deps entries based on entries defined in the LibYUV DEPS + file: + - If a shared dependency with the Chromium DEPS file: roll it to the same + revision as Chromium (i.e. entry in the new_cr_deps dict) + - If it's a Chromium sub-directory, roll it to the HEAD revision (notice + this means it may be ahead of the chromium_revision, but generally these + should be close). + - If it's another DEPS entry (not shared with Chromium), roll it to HEAD + unless it's configured to be skipped. + + Returns: + A list of ChangedDep objects representing the changed deps. + """ + result = [] + libyuv_entries = BuildDepsentryDict(libyuv_deps) + new_cr_entries = BuildDepsentryDict(new_cr_deps) + for path, libyuv_deps_entry in libyuv_entries.items(): + if path in DONT_AUTOROLL_THESE: + continue + cr_deps_entry = new_cr_entries.get(path) + if cr_deps_entry: + assert type(cr_deps_entry) is type(libyuv_deps_entry) + + if isinstance(cr_deps_entry, CipdDepsEntry): + result.extend( + _FindChangedCipdPackages(path, libyuv_deps_entry.packages, + cr_deps_entry.packages)) + continue + + if isinstance(cr_deps_entry, VersionEntry): + result.extend( + _FindChangedVars(path, libyuv_deps_entry.version, + cr_deps_entry.version)) + continue + + # Use the revision from Chromium's DEPS file. + new_rev = cr_deps_entry.revision + assert libyuv_deps_entry.url == cr_deps_entry.url, ( + 'LibYUV DEPS entry %s has a different URL %s than Chromium %s.' % + (path, libyuv_deps_entry.url, cr_deps_entry.url)) + else: + if isinstance(libyuv_deps_entry, DepsEntry): + # Use the HEAD of the deps repo. + stdout, _ = _RunCommand( + ['git', 'ls-remote', libyuv_deps_entry.url, 'HEAD']) + new_rev = stdout.strip().split('\t')[0] + else: + # The dependency has been removed from chromium. + # This is handled by FindRemovedDeps. + continue + + # Check if an update is necessary. + if libyuv_deps_entry.revision != new_rev: + logging.debug('Roll dependency %s to %s', path, new_rev) + result.append( + ChangedDep(path, libyuv_deps_entry.url, libyuv_deps_entry.revision, + new_rev)) + return sorted(result) + + +def CalculateChangedClang(new_cr_rev): + + def GetClangRev(lines): + for line in lines: + match = CLANG_REVISION_RE.match(line) + if match: + return match.group(1) + raise RollError('Could not parse Clang revision!') + + with open(CLANG_UPDATE_SCRIPT_LOCAL_PATH, 'r') as f: + current_lines = f.readlines() + current_rev = GetClangRev(current_lines) + + new_clang_update_py = ReadRemoteCrFile(CLANG_UPDATE_SCRIPT_URL_PATH, + new_cr_rev).splitlines() + new_rev = GetClangRev(new_clang_update_py) + return ChangedDep(CLANG_UPDATE_SCRIPT_LOCAL_PATH, None, current_rev, new_rev) + + +def GenerateCommitMessage( + rev_update, + current_commit_pos, + new_commit_pos, + changed_deps_list, + added_deps_paths=None, + removed_deps_paths=None, + clang_change=None, +): + current_cr_rev = rev_update.current_chromium_rev[0:10] + new_cr_rev = rev_update.new_chromium_rev[0:10] + rev_interval = '%s..%s' % (current_cr_rev, new_cr_rev) + git_number_interval = '%s:%s' % (current_commit_pos, new_commit_pos) + + commit_msg = [ + 'Roll chromium_revision %s (%s)\n' % (rev_interval, git_number_interval), + 'Change log: %s' % (CHROMIUM_LOG_TEMPLATE % rev_interval), + 'Full diff: %s\n' % (CHROMIUM_COMMIT_TEMPLATE % rev_interval) + ] + + def Section(adjective, deps): + noun = 'dependency' if len(deps) == 1 else 'dependencies' + commit_msg.append('%s %s' % (adjective, noun)) + + if changed_deps_list: + Section('Changed', changed_deps_list) + + for c in changed_deps_list: + if isinstance(c, ChangedCipdPackage): + commit_msg.append('* %s: %s..%s' % + (c.path, c.current_version, c.new_version)) + elif isinstance(c, ChangedVersionEntry): + commit_msg.append('* %s_vesion: %s..%s' % + (c.path, c.current_version, c.new_version)) + else: + commit_msg.append('* %s: %s/+log/%s..%s' % + (c.path, c.url, c.current_rev[0:10], c.new_rev[0:10])) + + if added_deps_paths: + Section('Added', added_deps_paths) + commit_msg.extend('* %s' % p for p in added_deps_paths) + + if removed_deps_paths: + Section('Removed', removed_deps_paths) + commit_msg.extend('* %s' % p for p in removed_deps_paths) + + if any([changed_deps_list, added_deps_paths, removed_deps_paths]): + change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval, 'DEPS') + commit_msg.append('DEPS diff: %s\n' % change_url) + else: + commit_msg.append('No dependencies changed.') + + if clang_change and clang_change.current_rev != clang_change.new_rev: + commit_msg.append('Clang version changed %s:%s' % + (clang_change.current_rev, clang_change.new_rev)) + change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval, + CLANG_UPDATE_SCRIPT_URL_PATH) + commit_msg.append('Details: %s\n' % change_url) + else: + commit_msg.append('No update to Clang.\n') + + commit_msg.append('BUG=None') + return '\n'.join(commit_msg) + + +def UpdateDepsFile(deps_filename, rev_update, changed_deps, new_cr_content): + """Update the DEPS file with the new revision.""" + + with open(deps_filename, 'rb') as deps_file: + deps_content = deps_file.read().decode('utf-8') + + # Update the chromium_revision variable. + deps_content = deps_content.replace(rev_update.current_chromium_rev, + rev_update.new_chromium_rev) + + # Add and remove dependencies. For now: only generated android deps. + # Since gclient cannot add or remove deps, we on the fact that + # these android deps are located in one place we can copy/paste. + deps_re = re.compile(ANDROID_DEPS_START + '.*' + ANDROID_DEPS_END, re.DOTALL) + new_deps = deps_re.search(new_cr_content) + old_deps = deps_re.search(deps_content) + if not new_deps or not old_deps: + faulty = 'Chromium' if not new_deps else 'LibYUV' + raise RollError('Was expecting to find "%s" and "%s"\n' + 'in %s DEPS' % + (ANDROID_DEPS_START, ANDROID_DEPS_END, faulty)) + deps_content = deps_re.sub(new_deps.group(0), deps_content) + + for dep in changed_deps: + if isinstance(dep, ChangedVersionEntry): + deps_content = deps_content.replace(dep.current_version, dep.new_version) + + with open(deps_filename, 'wb') as deps_file: + deps_file.write(deps_content.encode('utf-8')) + + # Update each individual DEPS entry. + for dep in changed_deps: + # ChangedVersionEntry types are already been processed. + if isinstance(dep, ChangedVersionEntry): + continue + local_dep_dir = os.path.join(CHECKOUT_ROOT_DIR, dep.path) + if not os.path.isdir(local_dep_dir): + raise RollError( + 'Cannot find local directory %s. Either run\n' + 'gclient sync --deps=all\n' + 'or make sure the .gclient file for your solution contains all ' + 'platforms in the target_os list, i.e.\n' + 'target_os = ["android", "unix", "mac", "ios", "win"];\n' + 'Then run "gclient sync" again.' % local_dep_dir) + if isinstance(dep, ChangedCipdPackage): + package = dep.package.format() # Eliminate double curly brackets + update = '%s:%s@%s' % (dep.path, package, dep.new_version) + else: + update = '%s@%s' % (dep.path, dep.new_rev) + _RunCommand(['gclient', 'setdep', '--revision', update], + working_dir=CHECKOUT_SRC_DIR) + + +def _IsTreeClean(): + stdout, _ = _RunCommand(['git', 'status', '--porcelain']) + if len(stdout) == 0: + return True + + logging.error('Dirty/unversioned files:\n%s', stdout) + return False + + +def _EnsureUpdatedMainBranch(dry_run): + current_branch = _RunCommand(['git', 'rev-parse', '--abbrev-ref', + 'HEAD'])[0].splitlines()[0] + if current_branch != 'main': + logging.error('Please checkout the main branch and re-run this script.') + if not dry_run: + sys.exit(-1) + + logging.info('Updating main branch...') + _RunCommand(['git', 'pull']) + + +def _CreateRollBranch(dry_run): + logging.info('Creating roll branch: %s', ROLL_BRANCH_NAME) + if not dry_run: + _RunCommand(['git', 'checkout', '-b', ROLL_BRANCH_NAME]) + + +def _RemovePreviousRollBranch(dry_run): + active_branch, branches = _GetBranches() + if active_branch == ROLL_BRANCH_NAME: + active_branch = 'main' + if ROLL_BRANCH_NAME in branches: + logging.info('Removing previous roll branch (%s)', ROLL_BRANCH_NAME) + if not dry_run: + _RunCommand(['git', 'checkout', active_branch]) + _RunCommand(['git', 'branch', '-D', ROLL_BRANCH_NAME]) + + +def _LocalCommit(commit_msg, dry_run): + logging.info('Committing changes locally.') + if not dry_run: + _RunCommand(['git', 'add', '--update', '.']) + _RunCommand(['git', 'commit', '-m', commit_msg]) + + +def ChooseCQMode(skip_cq, cq_over, current_commit_pos, new_commit_pos): + if skip_cq: + return 0 + if (new_commit_pos - current_commit_pos) < cq_over: + return 1 + return 2 + + +def _GetCcRecipients(changed_deps_list): + """Returns a list of emails to notify based on the changed deps list. + """ + cc_recipients = [] + for c in changed_deps_list: + pass + return cc_recipients + + +def _UploadCL(commit_queue_mode, add_cc=None): + """Upload the committed changes as a changelist to Gerrit. + + commit_queue_mode: + - 2: Submit to commit queue. + - 1: Run trybots but do not submit to CQ. + - 0: Skip CQ, upload only. + + add_cc: A list of email addresses to add as CC recipients. + """ + cc_recipients = [] + if add_cc: + cc_recipients.extend(add_cc) + cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks'] + if commit_queue_mode >= 2: + logging.info('Sending the CL to the CQ...') + cmd.extend(['-o', 'label=Bot-Commit+1']) + cmd.extend(['-o', 'label=Commit-Queue+2']) + cmd.extend(['--send-mail', '--cc', ','.join(cc_recipients)]) + elif commit_queue_mode >= 1: + logging.info('Starting CQ dry run...') + cmd.extend(['-o', 'label=Commit-Queue+1']) + extra_env = { + 'EDITOR': 'true', + 'SKIP_GCE_AUTH_FOR_GIT': '1', + } + stdout, stderr = _RunCommand(cmd, extra_env=extra_env) + logging.debug('Output from "git cl upload":\nstdout:\n%s\n\nstderr:\n%s', + stdout, stderr) + + +def GetRollRevisionRanges(opts, libyuv_deps): + current_cr_rev = libyuv_deps['vars']['chromium_revision'] + new_cr_rev = opts.revision + if not new_cr_rev: + stdout, _ = _RunCommand(['git', 'ls-remote', CHROMIUM_SRC_URL, 'HEAD']) + head_rev = stdout.strip().split('\t')[0] + logging.info('No revision specified. Using HEAD: %s', head_rev) + new_cr_rev = head_rev + + return ChromiumRevisionUpdate(current_cr_rev, new_cr_rev) + + +def main(): + p = argparse.ArgumentParser() + p.add_argument('--clean', + action='store_true', + default=False, + help='Removes any previous local roll branch.') + p.add_argument('-r', + '--revision', + help=('Chromium Git revision to roll to. Defaults to the ' + 'Chromium HEAD revision if omitted.')) + p.add_argument('--dry-run', + action='store_true', + default=False, + help=('Calculate changes and modify DEPS, but don\'t create ' + 'any local branch, commit, upload CL or send any ' + 'tryjobs.')) + p.add_argument('-i', + '--ignore-unclean-workdir', + action='store_true', + default=False, + help=('Ignore if the current branch is not main or if there ' + 'are uncommitted changes (default: %(default)s).')) + grp = p.add_mutually_exclusive_group() + grp.add_argument('--skip-cq', + action='store_true', + default=False, + help='Skip sending the CL to the CQ (default: %(default)s)') + grp.add_argument('--cq-over', + type=int, + default=1, + help=('Commit queue dry run if the revision difference ' + 'is below this number (default: %(default)s)')) + p.add_argument('-v', + '--verbose', + action='store_true', + default=False, + help='Be extra verbose in printing of log messages.') + opts = p.parse_args() + + if opts.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + + if not opts.ignore_unclean_workdir and not _IsTreeClean(): + logging.error('Please clean your local checkout first.') + return 1 + + if opts.clean: + _RemovePreviousRollBranch(opts.dry_run) + + if not opts.ignore_unclean_workdir: + _EnsureUpdatedMainBranch(opts.dry_run) + + deps_filename = os.path.join(CHECKOUT_SRC_DIR, 'DEPS') + libyuv_deps = ParseLocalDepsFile(deps_filename) + + rev_update = GetRollRevisionRanges(opts, libyuv_deps) + + current_commit_pos = ParseCommitPosition( + ReadRemoteCrCommit(rev_update.current_chromium_rev)) + new_commit_pos = ParseCommitPosition( + ReadRemoteCrCommit(rev_update.new_chromium_rev)) + + new_cr_content = ReadRemoteCrFile('DEPS', rev_update.new_chromium_rev) + new_cr_deps = ParseDepsDict(new_cr_content) + changed_deps = CalculateChangedDeps(libyuv_deps, new_cr_deps) + # Discard other deps, assumed to be chromium-only dependencies. + new_generated_android_deps, _ = FindAddedDeps(libyuv_deps, new_cr_deps) + removed_generated_android_deps, other_deps = FindRemovedDeps( + libyuv_deps, new_cr_deps) + if other_deps: + raise RollError('LibYUV DEPS entries are missing from Chromium: %s.\n' + 'Remove them or add them to either ' + 'LIBYUV_ONLY_DEPS or DONT_AUTOROLL_THESE.' % other_deps) + clang_change = CalculateChangedClang(rev_update.new_chromium_rev) + commit_msg = GenerateCommitMessage( + rev_update, + current_commit_pos, + new_commit_pos, + changed_deps, + added_deps_paths=new_generated_android_deps, + removed_deps_paths=removed_generated_android_deps, + clang_change=clang_change) + logging.debug('Commit message:\n%s', commit_msg) + + _CreateRollBranch(opts.dry_run) + if not opts.dry_run: + UpdateDepsFile(deps_filename, rev_update, changed_deps, new_cr_content) + if _IsTreeClean(): + logging.info("No DEPS changes detected, skipping CL creation.") + else: + _LocalCommit(commit_msg, opts.dry_run) + commit_queue_mode = ChooseCQMode(opts.skip_cq, opts.cq_over, + current_commit_pos, new_commit_pos) + logging.info('Uploading CL...') + if not opts.dry_run: + _UploadCL(commit_queue_mode, _GetCcRecipients(changed_deps)) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/files/tools_libyuv/autoroller/unittests/roll_deps_test.py b/tools_libyuv/autoroller/unittests/roll_deps_test.py index af86bdd5..af86bdd5 100755 --- a/files/tools_libyuv/autoroller/unittests/roll_deps_test.py +++ b/tools_libyuv/autoroller/unittests/roll_deps_test.py diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS b/tools_libyuv/autoroller/unittests/testdata/DEPS index 4f45860c..4f45860c 100644 --- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS +++ b/tools_libyuv/autoroller/unittests/testdata/DEPS diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new index d53083ce..d53083ce 100644 --- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new +++ b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old index dd6ddaec..dd6ddaec 100644 --- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old +++ b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old diff --git a/files/tools_libyuv/get_landmines.py b/tools_libyuv/get_landmines.py index 8b33483e..8b33483e 100755 --- a/files/tools_libyuv/get_landmines.py +++ b/tools_libyuv/get_landmines.py diff --git a/tools_libyuv/msan/OWNERS b/tools_libyuv/msan/OWNERS new file mode 100644 index 00000000..9b67a8f6 --- /dev/null +++ b/tools_libyuv/msan/OWNERS @@ -0,0 +1,3 @@ +mbonadei@chromium.org +fbarchard@chromium.org +pbos@chromium.org diff --git a/files/tools_libyuv/msan/blacklist.txt b/tools_libyuv/msan/blacklist.txt index 8b5e42a7..8b5e42a7 100644 --- a/files/tools_libyuv/msan/blacklist.txt +++ b/tools_libyuv/msan/blacklist.txt diff --git a/tools_libyuv/ubsan/OWNERS b/tools_libyuv/ubsan/OWNERS new file mode 100644 index 00000000..9b67a8f6 --- /dev/null +++ b/tools_libyuv/ubsan/OWNERS @@ -0,0 +1,3 @@ +mbonadei@chromium.org +fbarchard@chromium.org +pbos@chromium.org diff --git a/files/tools_libyuv/ubsan/blacklist.txt b/tools_libyuv/ubsan/blacklist.txt index 8bcb2907..8bcb2907 100644 --- a/files/tools_libyuv/ubsan/blacklist.txt +++ b/tools_libyuv/ubsan/blacklist.txt diff --git a/files/tools_libyuv/ubsan/vptr_blacklist.txt b/tools_libyuv/ubsan/vptr_blacklist.txt index 23cfca53..23cfca53 100644 --- a/files/tools_libyuv/ubsan/vptr_blacklist.txt +++ b/tools_libyuv/ubsan/vptr_blacklist.txt diff --git a/files/unit_test/basictypes_test.cc b/unit_test/basictypes_test.cc index 9aaa2dcd..9aaa2dcd 100644 --- a/files/unit_test/basictypes_test.cc +++ b/unit_test/basictypes_test.cc diff --git a/files/unit_test/color_test.cc b/unit_test/color_test.cc index 01267ff1..01267ff1 100644 --- a/files/unit_test/color_test.cc +++ b/unit_test/color_test.cc diff --git a/files/unit_test/compare_test.cc b/unit_test/compare_test.cc index c29562cb..c29562cb 100644 --- a/files/unit_test/compare_test.cc +++ b/unit_test/compare_test.cc diff --git a/files/unit_test/convert_test.cc b/unit_test/convert_argb_test.cc index 1f975825..aeee8a7f 100644 --- a/files/unit_test/convert_test.cc +++ b/unit_test/convert_argb_test.cc @@ -1,5 +1,5 @@ /* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * Copyright 2023 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -31,6 +31,13 @@ #include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ #endif +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + // Some functions fail on big endian. Enable these tests on all cpus except // PowerPC, but they are not optimized so disabled by default. #if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__) @@ -48,500 +55,15 @@ namespace libyuv { #define AR30ToAR30 ARGBCopy #define ABGRToABGR ARGBCopy +// subsample amount uses a divide. #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) -// Planar test - -#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - SRC_DEPTH) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ - align_buffer_page_end(src_u, \ - kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ - align_buffer_page_end(src_v, \ - kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ - MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ - MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \ - SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ - } \ - for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ - src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ - src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ - reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ - reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \ - reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \ - NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ - reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ - reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \ - reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \ - NEG kHeight); \ - } \ - for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ - EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ - } \ - for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ - EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ - EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_u_c); \ - free_aligned_buffer_page_end(dst_v_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_u_opt); \ - free_aligned_buffer_page_end(dst_v_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - } - -#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2, SRC_DEPTH) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0, SRC_DEPTH) - -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8) -TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8) -TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8) -TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8) -TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8) -TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8) -TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8) -TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8) -TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10) -TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8) -TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8) -TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10) -TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12) -TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12) -TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12) -TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12) -TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10) -TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10) -TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12) -TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12) -TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) - -// Test Android 420 to I420 -#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - W1280, N, NEG, OFF, PN, OFF_U, OFF_V) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSizeUV = \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_uv, \ - kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - uint8_t* src_u = src_uv + OFF_U; \ - uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ - int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ - (fastrand() & 0xff); \ - src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ - (fastrand() & 0xff); \ - } \ - } \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_u_c, 2, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_c, 3, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_u_opt, 102, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_opt, 103, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \ - kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \ - dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ - dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ - dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_u_c); \ - free_aligned_buffer_page_end(dst_v_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_u_opt); \ - free_aligned_buffer_page_end(dst_v_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ - } - -#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ - SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ - SUBSAMP_Y) \ - TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \ - _Any, +, 0, PN, OFF_U, OFF_V) \ - TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \ - _Unaligned, +, 2, PN, OFF_U, OFF_V) \ - TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \ - -, 0, PN, OFF_U, OFF_V) \ - TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ - 0, PN, OFF_U, OFF_V) - -TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) -TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) -TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) -#undef TESTAPLANARTOP -#undef TESTAPLANARTOPI - -// wrapper to keep API the same -int I400ToNV21(const uint8_t* src_y, - int src_stride_y, - const uint8_t* /* src_u */, - int /* src_stride_u */, - const uint8_t* /* src_v */, - int /* src_stride_v */, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_vu, - int width, - int height) { - return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu, - dst_stride_vu, width, height); -} - -#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - SRC_DEPTH) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ - align_buffer_page_end(src_u, \ - kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ - align_buffer_page_end(src_v, \ - kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_c, \ - kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_opt, \ - kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ - MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ - MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ - MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \ - SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ - } \ - for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ - src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ - src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \ - src_v_p, kSrcHalfWidth, \ - reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_c), \ - kDstHalfWidth * 2, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ - reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth, \ - NEG kHeight); \ - } \ - for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ - EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ - } \ - for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \ - EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - } - -#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ - SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) - -TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) -TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10) -TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10) -TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12) -TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) - -#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ - static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ - static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ - "SRC_SUBSAMP_X unsupported"); \ - static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ - "SRC_SUBSAMP_Y unsupported"); \ - static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ - "DST_SUBSAMP_X unsupported"); \ - static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ - "DST_SUBSAMP_Y unsupported"); \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ - const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ - const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ - const int kPaddedHeight = \ - (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ - const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ - const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ - align_buffer_page_end( \ - src_uv, \ - 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_c, \ - 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ - align_buffer_page_end(dst_uv_opt, \ - 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ - SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ - for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \ - src_y_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \ - src_uv_p[i] = \ - (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ - } \ - memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ - memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - MaskCpuFlags(disable_cpu_flags_); \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \ - DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ - NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \ - DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \ - reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ - NEG kHeight); \ - } \ - if (DOY) { \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - } \ - for (int i = 0; i < kDstHalfHeight; ++i) { \ - for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ - EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ - dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ - } +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) -#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) - -TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) -TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) -TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) -TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) -TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) - -#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ +#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ @@ -621,30 +143,39 @@ TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) free_aligned_buffer_page_end(src_uv); \ } -#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \ - TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ - SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ - DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \ - TILE_WIDTH, TILE_HEIGHT) +#if defined(ENABLE_FULL_TESTS) +#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) +#else +#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) +#endif -TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) -TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1) +TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) // Provide matrix wrappers for full range bt.709 #define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ @@ -680,8 +211,12 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) #define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ kFilterBilinear) - -#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) +#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, W1280, N, NEG, OFF) \ @@ -746,8 +281,6 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) #define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN) \ TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, benchmark_width_, _Opt, +, 0) #endif @@ -792,8 +325,12 @@ TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) +TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1) TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1) @@ -816,7 +353,9 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1) #endif TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) -#else +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) +TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1) +#else // FULL_TESTS TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1) @@ -832,232 +371,21 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) -TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) #endif -#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, W1280, N, NEG, OFF, ATTEN) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, kSizeUV + OFF); \ - align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (fastrand() & 0xff); \ - src_a[i + OFF] = (fastrand() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (fastrand() & 0xff); \ - src_v[i + OFF] = (fastrand() & 0xff); \ - } \ - memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ - memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ - dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ - ATTEN); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ - src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ - dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \ - ATTEN); \ - } \ - for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_u); \ - free_aligned_buffer_page_end(src_v); \ - free_aligned_buffer_page_end(src_a); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Premult, +, 0, 1) -#else -#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN) \ - TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0) -#endif - -#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ - l, m) -#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ - l, m) -#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ - l, m) -#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ - l, m) -#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) -#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ - l, m) - -#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ - &kYuvI601Constants, k, l, m, kFilterBilinear) -#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ - &kYuvI601Constants, k, l, m, kFilterBilinear) - -#if defined(ENABLE_FULL_TESTS) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) -#else -TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) -TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) -TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) -#endif - -#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \ - BPP_B, W1280, N, NEG, OFF) \ +#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = benchmark_height_; \ @@ -1110,15 +438,21 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) free_aligned_buffer_page_end(dst_argb32_opt); \ } -#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Invert, -, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - benchmark_width_, _Opt, +, 0) +#if defined(ENABLE_FULL_TESTS) +#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Invert, -, 0) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \ + TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, _Opt, +, 0) +#endif #define JNV12ToARGB(a, b, c, d, e, f, g, h) \ NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) @@ -1139,187 +473,30 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) #define JNV12ToRGB565(a, b, c, d, e, f, g, h) \ NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h) -TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3) -#ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2) -#endif - -TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4) -TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4) -TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3) -TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3) -TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3) +TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2) -#endif - -#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ - const int kWidth = W1280; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ - align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ - kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ - dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ - kStrideUV * 2, kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ - for (int j = 0; j < kStrideUV; ++j) { \ - EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_argb); \ - } - -#if defined(ENABLE_FULL_TESTS) -#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) -#else -#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) +TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2) #endif -TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2) -TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2) -TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1) -TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1) -TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2) -TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1) +TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4) +TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4) +TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3) +TESTBPTOB(NV12, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, RAW, RAW, 3) +TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2) -TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2) -TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2) +TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) #endif -TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2) -TESTATOPLANAR(I400, 1, 1, I420, 2, 2) -TESTATOPLANAR(J400, 1, 1, J420, 2, 2) -TESTATOPLANAR(RAW, 3, 1, I420, 2, 2) -TESTATOPLANAR(RAW, 3, 1, J420, 2, 2) -TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2) -TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2) -TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2) -TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2) -TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1) -TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2) -TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) - -#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \ - SUBSAMP_Y, W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, \ - kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ - kStrideUV * 2, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ - dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \ - } \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - } \ - } \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < kStrideUV * 2; ++j) { \ - EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \ - dst_uv_opt[i * kStrideUV * 2 + j]); \ - } \ - } \ - free_aligned_buffer_page_end(dst_y_c); \ - free_aligned_buffer_page_end(dst_uv_c); \ - free_aligned_buffer_page_end(dst_y_opt); \ - free_aligned_buffer_page_end(dst_uv_opt); \ - free_aligned_buffer_page_end(src_argb); \ - } - -#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ + 1, _Any, +, 0) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 2) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) - -TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2) -TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2) -TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2) -TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2) -TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) -TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2) -TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) #define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ @@ -1440,6 +617,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) @@ -1450,7 +628,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) #endif TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) -TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) // 4 +TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) @@ -1484,6 +662,127 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// in place test +#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i + OFF] = (fastrand() & 0xff); \ + } \ + memcpy(dst_argb_c + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + memcpy(dst_argb_opt + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + } \ + memcpy(dst_argb_opt + OFF, src_argb, \ + kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ + (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ + EPP_B, STRIDE_B, HEIGHT_B) \ + TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ + STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0) + +TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1) +#endif +TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1) +// TODO(fbarchard): Support in place for mirror. +// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1) +TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1) +#endif +TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1) +TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) +// TODO(fbarchard): Support in place for conversions that increase bpp. +// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1) +// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1) +// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1) +// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1) +TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1) +// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1) +TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1) +#endif +TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1) +// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) +// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) +TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) +TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) + #define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \ @@ -1554,6 +853,7 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) } \ } +#if defined(ENABLE_FULL_TESTS) #define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B) \ TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ @@ -1566,6 +866,12 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) HEIGHT_B, benchmark_width_, _Opt, +, 0) \ TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B) +#else +#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) \ + TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ + HEIGHT_B) +#endif #ifdef LITTLE_ENDIAN_ONLY_TEST TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) @@ -1634,1081 +940,217 @@ TESTEND(BGRAToARGB, uint8_t, 4, 4, 1) TESTEND(ABGRToARGB, uint8_t, 4, 4, 1) TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) -#ifdef HAVE_JPEG -TEST_F(LibYUVConvertTest, ValidateJpeg) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg - ? benchmark_width_ * benchmark_height_ - : kMinJpeg; - const int kSize = kImageSize + kOff; - align_buffer_page_end(orig_pixels, kSize); - - // No SOI or EOI. Expect fail. - memset(orig_pixels, 0, kSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - // Test special value that matches marker start. - memset(orig_pixels, 0xff, kSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - // EOI, SOI. Expect pass. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - for (int times = 0; times < benchmark_iterations_; ++times) { - EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); - } - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVConvertTest, ValidateJpegLarge) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg - ? benchmark_width_ * benchmark_height_ - : kMinJpeg; - const int kSize = kImageSize + kOff; - const int kMultiple = 10; - const int kBufSize = kImageSize * kMultiple + kOff; - align_buffer_page_end(orig_pixels, kBufSize); - - // No SOI or EOI. Expect fail. - memset(orig_pixels, 0, kBufSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize)); - - // EOI, SOI. Expect pass. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - for (int times = 0; times < benchmark_iterations_; ++times) { - EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize)); - } - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVConvertTest, InvalidateJpeg) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg - ? benchmark_width_ * benchmark_height_ - : kMinJpeg; - const int kSize = kImageSize + kOff; - align_buffer_page_end(orig_pixels, kSize); - - // NULL pointer. Expect fail. - EXPECT_FALSE(ValidateJpeg(NULL, kSize)); - - // Negative size. Expect fail. - EXPECT_FALSE(ValidateJpeg(orig_pixels, -1)); - - // Too large size. Expect fail. - EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull)); - - // No SOI or EOI. Expect fail. - memset(orig_pixels, 0, kSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - // SOI but no EOI. Expect fail. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - for (int times = 0; times < benchmark_iterations_; ++times) { - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - } - - // EOI but no SOI. Expect fail. - orig_pixels[0] = 0; - orig_pixels[1] = 0; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVConvertTest, FuzzJpeg) { - // SOI but no EOI. Expect fail. - for (int times = 0; times < benchmark_iterations_; ++times) { - const int kSize = fastrand() % 5000 + 3; - align_buffer_page_end(orig_pixels, kSize); - MemRandomize(orig_pixels, kSize); - - // Add SOI so frame will be scanned. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[2] = 0xff; - orig_pixels[kSize - 1] = 0xff; - ValidateJpeg(orig_pixels, - kSize); // Failure normally expected. - free_aligned_buffer_page_end(orig_pixels); - } -} - -// Test data created in GIMP. In export jpeg, disable -// thumbnails etc, choose a subsampling, and use low quality -// (50) to keep size small. Generated with xxd -i test.jpg -// test 0 is J400 -static const uint8_t kTest0Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10, - 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01, - 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4, - 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, - 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, - 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, - 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, - 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, - 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, - 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, - 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, - 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, - 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, - 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10, - 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, - 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, - 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b, - 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, - 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, - 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, - 0xd9}; -static const size_t kTest0JpgLen = 421; - -// test 1 is J444 -static const uint8_t kTest1Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, - 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, - 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, - 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, - 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda, - 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01, - 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb, - 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, - 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, - 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, - 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00, - 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31, - 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, - 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72, - 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, - 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, - 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, - 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, - 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, - 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, - 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, - 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, - 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, - 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff, - 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, - 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, - 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26, - 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, - 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5, - 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00, - 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, - 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, - 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, - 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, - 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, - 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, - 0xd4, 0xff, 0xd9}; -static const size_t kTest1JpgLen = 735; - -// test 2 is J420 -static const uint8_t kTest2Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, - 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, - 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, - 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff, - 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff, - 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, - 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00, - 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10, - 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02, - 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62, - 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, - 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, - 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, - 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, - 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, - 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, - 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, - 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, - 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, - 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, - 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, - 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f, - 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, - 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e, - 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, - 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10, - 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, - 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, - 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, - 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, - 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, - 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, - 0xd9}; -static const size_t kTest2JpgLen = 685; - -// test 3 is J422 -static const uint8_t kTest3Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, - 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, - 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, - 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, - 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff, - 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, - 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4, - 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, - 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, - 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, - 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, - 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18, - 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda, - 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84, - 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, - 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, - 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, - 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, - 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, - 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, - 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, - 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, - 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff, - 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, - 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53, - 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, - 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca, - 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, - 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, - 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, - 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, - 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, - 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, - 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; -static const size_t kTest3JpgLen = 704; - -// test 4 is J422 vertical - not supported -static const uint8_t kTest4Jpg[] = { - 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, - 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, - 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, - 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, - 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, - 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, - 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, - 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, - 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, - 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, - 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, - 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff, - 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff, - 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, - 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4, - 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, - 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, - 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, - 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, - 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, - 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff, - 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02, - 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01, - 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9, - 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01, - 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0, - 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e, - 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde, - 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a, - 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, - 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19, - 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff, - 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca, - 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01, - 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff, - 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, - 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, - 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, - 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, - 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, - 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; -static const size_t kTest4JpgLen = 701; - -TEST_F(LibYUVConvertTest, TestMJPGSize) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - printf("test jpeg size %d x %d\n", width, height); -} - -TEST_F(LibYUVConvertTest, TestMJPGToI420) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_u, half_width * half_height); - align_buffer_page_end(dst_v, half_width * half_height); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width, - dst_v, half_width, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381); - uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_u_hash, 2501859930u); - EXPECT_EQ(dst_v_hash, 2126459123u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_u); - free_aligned_buffer_page_end(dst_v); -} - -TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - // Convert to NV21 - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Convert to I420 - align_buffer_page_end(dst2_y, width * height); - align_buffer_page_end(dst2_u, half_width * half_height); - align_buffer_page_end(dst2_v, half_width * half_height); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, - dst2_v, half_width, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Convert I420 to NV21 - align_buffer_page_end(dst3_y, width * height); - align_buffer_page_end(dst3_vu, half_width * half_height * 2); - - I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, - width, dst3_vu, half_width * 2, width, height); - - for (int i = 0; i < width * height; ++i) { - EXPECT_EQ(dst_y[i], dst3_y[i]); - } - for (int i = 0; i < half_width * half_height * 2; ++i) { - EXPECT_EQ(dst_vu[i], dst3_vu[i]); - EXPECT_EQ(dst_vu[i], dst3_vu[i]); - } - - free_aligned_buffer_page_end(dst3_y); - free_aligned_buffer_page_end(dst3_vu); - - free_aligned_buffer_page_end(dst2_y); - free_aligned_buffer_page_end(dst2_u); - free_aligned_buffer_page_end(dst2_v); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_vu); -} - -TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - // Convert to NV12 - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Convert to I420 - align_buffer_page_end(dst2_y, width * height); - align_buffer_page_end(dst2_u, half_width * half_height); - align_buffer_page_end(dst2_v, half_width * half_height); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, - dst2_v, half_width, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Convert I420 to NV12 - align_buffer_page_end(dst3_y, width * height); - align_buffer_page_end(dst3_uv, half_width * half_height * 2); - - I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, - width, dst3_uv, half_width * 2, width, height); - - for (int i = 0; i < width * height; ++i) { - EXPECT_EQ(dst_y[i], dst3_y[i]); - } - for (int i = 0; i < half_width * half_height * 2; ++i) { - EXPECT_EQ(dst_uv[i], dst3_uv[i]); - EXPECT_EQ(dst_uv[i], dst3_uv[i]); - } - - free_aligned_buffer_page_end(dst3_y); - free_aligned_buffer_page_end(dst3_uv); - - free_aligned_buffer_page_end(dst2_y); - free_aligned_buffer_page_end(dst2_u); - free_aligned_buffer_page_end(dst2_v); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_uv_hash, 1069662856u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. Hashes are for VU so flip the plane. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, - half_height); - uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_vu_hash, 1069662856u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); - free_aligned_buffer_page_end(dst_vu); -} - -// TODO(fbarchard): Improve test to compare against I422, not checksum -TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_uv_hash, 493520167u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. Hashes are for VU so flip the plane. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, - half_height); - uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_vu_hash, 493520167u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); - free_aligned_buffer_page_end(dst_vu); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 330644005u); - EXPECT_EQ(dst_uv_hash, 135214341u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. Hashes are for VU so flip the plane. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, - half_height); - uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 330644005u); - EXPECT_EQ(dst_vu_hash, 135214341u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); - free_aligned_buffer_page_end(dst_vu); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_uv_hash, 506143297u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); -} - -TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int half_width = (width + 1) / 2; - int half_height = (height + 1) / 2; - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_uv, half_width * half_height * 2); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, - half_width * 2, width, height, width, height); - } - // Expect sucesss - EXPECT_EQ(0, ret); - - // Test result matches known hash value. Hashes are for VU so flip the plane. - uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); - align_buffer_page_end(dst_vu, half_width * half_height * 2); - SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, - half_height); - uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_vu_hash, 506143297u); - - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_uv); - free_aligned_buffer_page_end(dst_vu); -} - -TEST_F(LibYUVConvertTest, TestMJPGToARGB) { - int width = 0; - int height = 0; - int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - EXPECT_EQ(0, ret); - - int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * - benchmark_height_ / (width * height); - - align_buffer_page_end(dst_argb, width * height * 4); - for (int times = 0; times < benchmark_iterations; ++times) { - ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width, - height, width, height); +#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, OFF, ATTEN) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_u, kSizeUV + OFF); \ + align_buffer_page_end(src_v, kSizeUV + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (fastrand() & 0xff); \ + src_a[i + OFF] = (fastrand() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (fastrand() & 0xff); \ + src_v[i + OFF] = (fastrand() & 0xff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ + dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \ + src_v + OFF, kStrideUV, src_a + OFF, kWidth, \ + dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \ + ATTEN); \ + } \ + for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(src_a); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ } - // Expect sucesss - EXPECT_EQ(0, ret); - // Test result matches known hash value. - uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381); -#ifdef LIBYUV_UNLIMITED_DATA - EXPECT_EQ(dst_argb_hash, 3900633302u); +#if defined(ENABLE_FULL_TESTS) +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Invert, -, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Premult, +, 0, 1) #else - EXPECT_EQ(dst_argb_hash, 2355976473u); +#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN) \ + TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, 0) #endif - free_aligned_buffer_page_end(dst_argb); -} - -static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) { - MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); - - int width = mjpeg_decoder.GetWidth(); - int height = mjpeg_decoder.GetHeight(); - - // YUV420 - if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 2 && - mjpeg_decoder.GetHorizSampFactor(0) == 2 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - printf("JPeg is J420, %dx%d %d bytes\n", width, height, - static_cast<int>(sample_size)); - // YUV422 - } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 2 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - printf("JPeg is J422, %dx%d %d bytes\n", width, height, - static_cast<int>(sample_size)); - // YUV444 - } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 1 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - printf("JPeg is J444, %dx%d %d bytes\n", width, height, - static_cast<int>(sample_size)); - // YUV400 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceGrayscale && - mjpeg_decoder.GetNumComponents() == 1 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 1) { - printf("JPeg is J400, %dx%d %d bytes\n", width, height, - static_cast<int>(sample_size)); - } else { - // Unknown colorspace. - printf("JPeg is Unknown colorspace.\n"); - } - mjpeg_decoder.UnloadFrame(); - return ret; -} - -TEST_F(LibYUVConvertTest, TestMJPGInfo) { - EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); - EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); - EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); - EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); - EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, - kTest4JpgLen)); // Valid but unsupported. -} -#endif // HAVE_JPEG - -TEST_F(LibYUVConvertTest, NV12Crop) { - const int SUBSAMP_X = 2; - const int SUBSAMP_Y = 2; - const int kWidth = benchmark_width_; - const int kHeight = benchmark_height_; - const int crop_y = - ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1; - const int kDestWidth = benchmark_width_; - const int kDestHeight = benchmark_height_ - crop_y * 2; - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); - const int sample_size = - kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; - align_buffer_page_end(src_y, sample_size); - uint8_t* src_uv = src_y + kWidth * kHeight; - - align_buffer_page_end(dst_y, kDestWidth * kDestHeight); - align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight); - align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - for (int i = 0; i < kHeight * kWidth; ++i) { - src_y[i] = (fastrand() & 0xff); - } - for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) { - src_uv[i] = (fastrand() & 0xff); - } - memset(dst_y, 1, kDestWidth * kDestHeight); - memset(dst_u, 2, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - memset(dst_v, 3, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - memset(dst_y_2, 1, kDestWidth * kDestHeight); - memset(dst_u_2, 2, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - memset(dst_v_2, 3, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2, - SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2, - SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, - kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12); - - NV12ToI420(src_y + crop_y * kWidth, kWidth, - src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y, - kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, - SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight); - - for (int i = 0; i < kDestHeight; ++i) { - for (int j = 0; j < kDestWidth; ++j) { - EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]); - } - } - for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { - for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], - dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); - } - } - for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { - for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], - dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); - } - } - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_u); - free_aligned_buffer_page_end(dst_v); - free_aligned_buffer_page_end(dst_y_2); - free_aligned_buffer_page_end(dst_u_2); - free_aligned_buffer_page_end(dst_v_2); - free_aligned_buffer_page_end(src_y); -} - -TEST_F(LibYUVConvertTest, I420CropOddY) { - const int SUBSAMP_X = 2; - const int SUBSAMP_Y = 2; - const int kWidth = benchmark_width_; - const int kHeight = benchmark_height_; - const int crop_y = benchmark_height_ > 1 ? 1 : 0; - const int kDestWidth = benchmark_width_; - const int kDestHeight = benchmark_height_ - crop_y * 2; - const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X); - const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X); - const int sample_size = kWidth * kHeight + - kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) + - kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y); - align_buffer_page_end(src_y, sample_size); - uint8_t* src_u = src_y + kWidth * kHeight; - uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y); - - align_buffer_page_end(dst_y, kDestWidth * kDestHeight); - align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * - SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - for (int i = 0; i < kHeight * kWidth; ++i) { - src_y[i] = (fastrand() & 0xff); - } - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) { - src_u[i] = (fastrand() & 0xff); - } - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) { - src_v[i] = (fastrand() & 0xff); - } - memset(dst_y, 1, kDestWidth * kDestHeight); - memset(dst_u, 2, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - memset(dst_v, 3, - SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); - - MaskCpuFlags(benchmark_cpu_info_); - for (int i = 0; i < benchmark_iterations_; ++i) { - ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u, - SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, - SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, - kDestWidth, kDestHeight, libyuv::kRotate0, - libyuv::FOURCC_I420); - } +#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ + l, m) +#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ + l, m) +#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \ + l, m) +#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) +#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ + l, m) - for (int i = 0; i < kDestHeight; ++i) { - for (int j = 0; j < kDestWidth; ++j) { - EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j], - dst_y[i * kDestWidth + j]); - } - } - for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { - for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], - dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); - } - } - for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { - for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], - dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); - } - } +#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) +#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_u); - free_aligned_buffer_page_end(dst_v); - free_aligned_buffer_page_end(src_y); -} +#if defined(ENABLE_FULL_TESTS) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) +#else +TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) +#endif TEST_F(LibYUVConvertTest, TestYToARGB) { uint8_t y[32]; @@ -2846,6 +1288,7 @@ TEST_F(LibYUVConvertTest, TestDither) { free_aligned_buffer_page_end(dst_argb32_opt); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, FMT_C, BPP_C) \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ @@ -2856,71 +1299,17 @@ TEST_F(LibYUVConvertTest, TestDither) { YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#else +#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, FMT_C, BPP_C) \ + TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C) +#endif #ifdef LITTLE_ENDIAN_ONLY_TEST TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) #endif -#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ - TEST_F(LibYUVConvertTest, NAME) { \ - const int kWidth = benchmark_width_; \ - const int kHeight = benchmark_height_; \ - \ - align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ - align_buffer_page_end(orig_y, kWidth* kHeight); \ - align_buffer_page_end(orig_u, \ - SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ - align_buffer_page_end(orig_v, \ - SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ - \ - align_buffer_page_end(dst_y_orig, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_orig, \ - 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ - \ - align_buffer_page_end(dst_y, kWidth* kHeight); \ - align_buffer_page_end(dst_uv, \ - 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ - \ - MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ - \ - /* Convert UYVY to NV12 in 2 steps for reference */ \ - libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth, \ - orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ - SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ - libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ - SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \ - 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ - \ - /* Convert to NV12 */ \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth, \ - dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ - } \ - \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - EXPECT_EQ(orig_y[i], dst_y[i]); \ - } \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - EXPECT_EQ(dst_y_orig[i], dst_y[i]); \ - } \ - for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); \ - ++i) { \ - EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \ - } \ - \ - free_aligned_buffer_page_end(orig_uyvy); \ - free_aligned_buffer_page_end(orig_y); \ - free_aligned_buffer_page_end(orig_u); \ - free_aligned_buffer_page_end(orig_v); \ - free_aligned_buffer_page_end(dst_y_orig); \ - free_aligned_buffer_page_end(dst_uv_orig); \ - free_aligned_buffer_page_end(dst_y); \ - free_aligned_buffer_page_end(dst_uv); \ - } - -TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) -TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) - // Transitive test. A to B to C is same as A to C. // Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere. #define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \ @@ -3223,6 +1612,7 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) free_aligned_buffer_page_end(dst_argb_bc); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \ benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \ @@ -3232,6 +1622,11 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) _Invert, -, 0, FMT_C, BPP_C) \ TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ _Opt, +, 0, FMT_C, BPP_C) +#else +#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \ + TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \ + _Opt, +, 0, FMT_C, BPP_C) +#endif // Caveat: Destination needs to be 4 bytes #ifdef LITTLE_ENDIAN_ONLY_TEST @@ -3348,11 +1743,15 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { } #endif // HAS_ABGRTOAR30ROW_AVX2 +#if !defined(LEAN_TESTS) + // Provide matrix wrappers for 12 bit YUV #define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \ I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) #define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \ I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) +#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \ + I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) #define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \ I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) @@ -3440,6 +1839,7 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { free_aligned_buffer_page_end(dst_argb_opt); \ } +#if defined(ENABLE_FULL_TESTS) #define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ BPP_B, ALIGN, YALIGN) \ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ @@ -3450,6 +1850,12 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0) +#else +#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ + BPP_B, ALIGN, YALIGN) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \ + ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0) +#endif // These conversions are only optimized for x86 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) @@ -3495,6 +1901,7 @@ TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1) TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1) TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1) TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1) #endif // LITTLE_ENDIAN_ONLY_TEST @@ -3733,8 +2140,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10) TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) #endif // DISABLE_SLOW_TESTS -#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ +#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ @@ -3777,16 +2184,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) free_aligned_buffer_page_end(dst_argb_opt); \ } -#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - ALIGN, YALIGN, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ - TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#if defined(ENABLE_FULL_TESTS) +#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#else +#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, S_DEPTH) \ + TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \ + benchmark_width_, _Opt, +, 0, 0, S_DEPTH) +#endif #define P010ToARGB(a, b, c, d, e, f, g, h) \ P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) @@ -3829,23 +2243,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) kFilterBilinear) #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) -TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) +TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) #ifdef LITTLE_ENDIAN_ONLY_TEST -TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) -TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) -TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) -TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) +TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) +TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) +TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) +TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) +TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) +TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) +TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) #endif // LITTLE_ENDIAN_ONLY_TEST #endif // DISABLE_SLOW_TESTS @@ -4281,61 +2695,6 @@ TEST_F(LibYUVConvertTest, Test565) { uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); EXPECT_EQ(610919429u, checksum); } - -// Test RGB24 to J420 is exact -#if defined(LIBYUV_BIT_EXACT) -TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { - const int kSize = 256; - align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 - align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2); - int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / - (kSize * 2) * benchmark_iterations_; - - for (int i = 0; i < kSize * 3 * 2; ++i) { - orig_rgb24[i] = i; - } - - for (int i = 0; i < iterations256; ++i) { - RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane - dest_j420 + kSize * 2, kSize / 2, // U plane - dest_j420 + kSize * 5 / 2, kSize / 2, // V plane - kSize, 2); - } - - uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381); - EXPECT_EQ(2755440272u, checksum); - - free_aligned_buffer_page_end(orig_rgb24); - free_aligned_buffer_page_end(dest_j420); -} -#endif - -// Test RGB24 to I420 is exact -#if defined(LIBYUV_BIT_EXACT) -TEST_F(LibYUVConvertTest, TestRGB24ToI420) { - const int kSize = 256; - align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 - align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2); - int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / - (kSize * 2) * benchmark_iterations_; - - for (int i = 0; i < kSize * 3 * 2; ++i) { - orig_rgb24[i] = i; - } - - for (int i = 0; i < iterations256; ++i) { - RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize, // Y plane - dest_i420 + kSize * 2, kSize / 2, // U plane - dest_i420 + kSize * 5 / 2, kSize / 2, // V plane - kSize, 2); - } - - uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381); - EXPECT_EQ(1526656597u, checksum); - - free_aligned_buffer_page_end(orig_rgb24); - free_aligned_buffer_page_end(dest_i420); -} -#endif +#endif // !defined(LEAN_TESTS) } // namespace libyuv diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc new file mode 100644 index 00000000..f55bace3 --- /dev/null +++ b/unit_test/convert_test.cc @@ -0,0 +1,2110 @@ +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <stdlib.h> +#include <time.h> + +#include "libyuv/basic_types.h" +#include "libyuv/compare.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" +#ifdef HAVE_JPEG +#include "libyuv/mjpeg_decoder.h" +#endif +#include "../unit_test/unit_test.h" +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "libyuv/video_common.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */ +#endif + +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + +// Some functions fail on big endian. Enable these tests on all cpus except +// PowerPC, but they are not optimized so disabled by default. +#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__) +#define LITTLE_ENDIAN_ONLY_TEST 1 +#endif +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +namespace libyuv { + +// Alias to copy pixels as is +#define AR30ToAR30 ARGBCopy +#define ABGRToABGR ARGBCopy + +// subsample amount uses a divide. +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) + +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) + +// Planar test + +#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ + SRC_DEPTH) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \ + SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ + src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ + src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ + reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \ + reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ + reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \ + reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ + } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#else +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#endif + +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8) +TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10) +TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10) +TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12) +TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) + +// Test Android 420 to I420 +#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF, PN, OFF_U, OFF_V) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, \ + kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + uint8_t* src_u = src_uv + OFF_U; \ + uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ + int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ + (fastrand() & 0xff); \ + src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_u_c, 2, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_c, 3, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_u_opt, 102, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_opt, 103, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \ + kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \ + dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_u_c); \ + free_aligned_buffer_page_end(dst_v_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_u_opt); \ + free_aligned_buffer_page_end(dst_v_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ + SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \ + _Any, +, 0, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \ + _Unaligned, +, 2, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \ + -, 0, PN, OFF_U, OFF_V) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ + 0, PN, OFF_U, OFF_V) +#else +#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \ + SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y) \ + TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \ + 0, PN, OFF_U, OFF_V) +#endif + +TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) +TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) +TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) +#undef TESTAPLANARTOP +#undef TESTAPLANARTOPI + +// wrapper to keep API the same +int I400ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* /* src_u */, + int /* src_stride_u */, + const uint8_t* /* src_v */, + int /* src_stride_v */, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu, + dst_stride_vu, width, height); +} + +#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ + SRC_DEPTH) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_c, \ + kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_opt, \ + kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \ + SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \ + src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ + src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \ + src_v_p, kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_c), \ + kDstHalfWidth * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ + reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ + } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \ + EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ + SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#else +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) +#endif + +TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8) +TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10) +TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10) +TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12) +TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) + +#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \ + TILE_WIDTH, TILE_HEIGHT) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ + static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ + "SRC_SUBSAMP_X unsupported"); \ + static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ + "SRC_SUBSAMP_Y unsupported"); \ + static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ + "DST_SUBSAMP_X unsupported"); \ + static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ + "DST_SUBSAMP_Y unsupported"); \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ + const int kPaddedHeight = \ + (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ + const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ + align_buffer_page_end( \ + src_uv, \ + 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_c, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_uv_opt, \ + 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ + SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ + for (int i = 0; \ + i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_y_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \ + SRC_BPC / (int)sizeof(SRC_T); \ + ++i) { \ + src_uv_p[i] = \ + (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ + } \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ + DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \ + reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ + NEG kHeight); \ + } \ + if (DOY) { \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + } \ + for (int i = 0; i < kDstHalfHeight; ++i) { \ + for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ + EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ + dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) +#else +#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ + DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \ + TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \ + TILE_HEIGHT) +#endif + +TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1) +TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1) +TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1) +TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1) +TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32) +TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32) + +#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ + kStrideUV * 2, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1) +TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1) +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1) +TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2) +TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1) +#ifdef LITTLE_ENDIAN_ONLY_TEST +TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2) +TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2) +TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2) +#endif +TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2) +TESTATOPLANAR(I400, 1, 1, I420, 2, 2) +TESTATOPLANAR(J400, 1, 1, J420, 2, 2) +TESTATOPLANAR(RAW, 3, 1, I420, 2, 2) +TESTATOPLANAR(RAW, 3, 1, J420, 2, 2) +TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2) +TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2) +TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2) +TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2) +TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1) +TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) + +#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, \ + SUBSAMP_Y, W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_a_c, kWidth* kHeight); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_a_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_a_c, 1, kWidth* kHeight); \ + memset(dst_y_c, 2, kWidth* kHeight); \ + memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_a_opt, 101, kWidth* kHeight); \ + memset(dst_y_opt, 102, kWidth* kHeight); \ + memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \ + dst_a_c, kWidth, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \ + kStrideUV * 2, dst_a_opt, kWidth, kWidth, \ + NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ + for (int j = 0; j < kStrideUV; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_a_c); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_a_opt); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) + +#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, \ + kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ + kStrideUV * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \ + dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \ + } \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + } \ + } \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV * 2; ++j) { \ + EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \ + dst_uv_opt[i * kStrideUV * 2 + j]); \ + } \ + } \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_argb); \ + } + +#if defined(ENABLE_FULL_TESTS) +#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ + 1, _Any, +, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 2) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#else +#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) +#endif + +TESTATOBP(ARGB, 1, 4, NV12, 2, 2) +TESTATOBP(ARGB, 1, 4, NV21, 2, 2) +TESTATOBP(ABGR, 1, 4, NV12, 2, 2) +TESTATOBP(ABGR, 1, 4, NV21, 2, 2) +TESTATOBP(RAW, 1, 3, JNV21, 2, 2) +TESTATOBP(YUY2, 2, 4, NV12, 2, 2) +TESTATOBP(UYVY, 2, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV12, 2, 2) +TESTATOBP(AYUV, 1, 4, NV21, 2, 2) + +#if !defined(LEAN_TESTS) + +#ifdef HAVE_JPEG +TEST_F(LibYUVConvertTest, ValidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // Test special value that matches marker start. + memset(orig_pixels, 0xff, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, ValidateJpegLarge) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + const int kMultiple = 10; + const int kBufSize = kImageSize * kMultiple + kOff; + align_buffer_page_end(orig_pixels, kBufSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kBufSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize)); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, InvalidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg + ? benchmark_width_ * benchmark_height_ + : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_page_end(orig_pixels, kSize); + + // NULL pointer. Expect fail. + EXPECT_FALSE(ValidateJpeg(NULL, kSize)); + + // Negative size. Expect fail. + EXPECT_FALSE(ValidateJpeg(orig_pixels, -1)); + + // Too large size. Expect fail. + EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull)); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // SOI but no EOI. Expect fail. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + } + + // EOI but no SOI. Expect fail. + orig_pixels[0] = 0; + orig_pixels[1] = 0; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVConvertTest, FuzzJpeg) { + // SOI but no EOI. Expect fail. + for (int times = 0; times < benchmark_iterations_; ++times) { + const int kSize = fastrand() % 5000 + 3; + align_buffer_page_end(orig_pixels, kSize); + MemRandomize(orig_pixels, kSize); + + // Add SOI so frame will be scanned. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; + orig_pixels[kSize - 1] = 0xff; + ValidateJpeg(orig_pixels, + kSize); // Failure normally expected. + free_aligned_buffer_page_end(orig_pixels); + } +} + +// Test data created in GIMP. In export jpeg, disable +// thumbnails etc, choose a subsampling, and use low quality +// (50) to keep size small. Generated with xxd -i test.jpg +// test 0 is J400 +static const uint8_t kTest0Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10, + 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01, + 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, + 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, + 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, + 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, + 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, + 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10, + 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, + 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b, + 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, + 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, + 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, + 0xd9}; +static const size_t kTest0JpgLen = 421; + +// test 1 is J444 +static const uint8_t kTest1Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, + 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda, + 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01, + 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb, + 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, + 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, + 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, + 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31, + 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, + 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72, + 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, + 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, + 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, + 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, + 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, + 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, + 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, + 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, + 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, + 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff, + 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, + 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, + 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26, + 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, + 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5, + 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00, + 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, + 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, + 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, + 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, + 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, + 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, + 0xd4, 0xff, 0xd9}; +static const size_t kTest1JpgLen = 735; + +// test 2 is J420 +static const uint8_t kTest2Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff, + 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00, + 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10, + 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02, + 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62, + 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, + 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, + 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, + 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, + 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, + 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, + 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, + 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, + 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f, + 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, + 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e, + 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, + 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10, + 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, + 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, + 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, + 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, + 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, + 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, + 0xd9}; +static const size_t kTest2JpgLen = 685; + +// test 3 is J422 +static const uint8_t kTest3Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4, + 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, + 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18, + 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda, + 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84, + 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, + 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, + 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, + 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, + 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, + 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, + 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, + 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, + 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff, + 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, + 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53, + 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca, + 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, + 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, + 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, + 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, + 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, + 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; +static const size_t kTest3JpgLen = 704; + +// test 4 is J422 vertical - not supported +static const uint8_t kTest4Jpg[] = { + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43, + 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12, + 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23, + 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40, + 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51, + 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64, + 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12, + 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03, + 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00, + 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff, + 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff, + 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, + 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4, + 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, + 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, + 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, + 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, + 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00, + 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff, + 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02, + 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9, + 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01, + 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0, + 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e, + 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde, + 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a, + 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19, + 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff, + 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca, + 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01, + 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff, + 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, + 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, + 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, + 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, + 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, + 0x0b, 0xb7, 0xd4, 0xff, 0xd9}; +static const size_t kTest4JpgLen = 701; + +TEST_F(LibYUVConvertTest, TestMJPGSize) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + printf("test jpeg size %d x %d\n", width, height); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_u, half_width * half_height); + align_buffer_page_end(dst_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width, + dst_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381); + uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_u_hash, 2501859930u); + EXPECT_EQ(dst_v_hash, 2126459123u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + // Convert to NV21 + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert to I420 + align_buffer_page_end(dst2_y, width * height); + align_buffer_page_end(dst2_u, half_width * half_height); + align_buffer_page_end(dst2_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, + dst2_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert I420 to NV21 + align_buffer_page_end(dst3_y, width * height); + align_buffer_page_end(dst3_vu, half_width * half_height * 2); + + I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, + width, dst3_vu, half_width * 2, width, height); + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y[i], dst3_y[i]); + } + for (int i = 0; i < half_width * half_height * 2; ++i) { + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + } + + free_aligned_buffer_page_end(dst3_y); + free_aligned_buffer_page_end(dst3_vu); + + free_aligned_buffer_page_end(dst2_y); + free_aligned_buffer_page_end(dst2_u); + free_aligned_buffer_page_end(dst2_v); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + // Convert to NV12 + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert to I420 + align_buffer_page_end(dst2_y, width * height); + align_buffer_page_end(dst2_u, half_width * half_height); + align_buffer_page_end(dst2_v, half_width * half_height); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width, + dst2_v, half_width, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Convert I420 to NV12 + align_buffer_page_end(dst3_y, width * height); + align_buffer_page_end(dst3_uv, half_width * half_height * 2); + + I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y, + width, dst3_uv, half_width * 2, width, height); + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_y[i], dst3_y[i]); + } + for (int i = 0; i < half_width * half_height * 2; ++i) { + EXPECT_EQ(dst_uv[i], dst3_uv[i]); + EXPECT_EQ(dst_uv[i], dst3_uv[i]); + } + + free_aligned_buffer_page_end(dst3_y); + free_aligned_buffer_page_end(dst3_uv); + + free_aligned_buffer_page_end(dst2_y); + free_aligned_buffer_page_end(dst2_u); + free_aligned_buffer_page_end(dst2_v); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 1069662856u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 1069662856u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +// TODO(fbarchard): Improve test to compare against I422, not checksum +TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 493520167u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 493520167u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_uv_hash, 135214341u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_vu_hash, 135214341u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 506143297u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); +} + +TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int half_width = (width + 1) / 2; + int half_height = (height + 1) / 2; + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_y, width * height); + align_buffer_page_end(dst_uv, half_width * half_height * 2); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv, + half_width * 2, width, height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. Hashes are for VU so flip the plane. + uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); + align_buffer_page_end(dst_vu, half_width * half_height * 2); + SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, + half_height); + uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 506143297u); + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_uv); + free_aligned_buffer_page_end(dst_vu); +} + +TEST_F(LibYUVConvertTest, TestMJPGToARGB) { + int width = 0; + int height = 0; + int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); + EXPECT_EQ(0, ret); + + int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * + benchmark_height_ / (width * height); + if (benchmark_iterations < 1) { + benchmark_iterations = 1; + } + + align_buffer_page_end(dst_argb, width * height * 4); + for (int times = 0; times < benchmark_iterations; ++times) { + ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width, + height, width, height); + } + // Expect sucesss + EXPECT_EQ(0, ret); + + // Test result matches known hash value. + uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381); +#ifdef LIBYUV_UNLIMITED_DATA + EXPECT_EQ(dst_argb_hash, 3900633302u); +#else + EXPECT_EQ(dst_argb_hash, 2355976473u); +#endif + + free_aligned_buffer_page_end(dst_argb); +} + +static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) { + MJpegDecoder mjpeg_decoder; + LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); + + int width = mjpeg_decoder.GetWidth(); + int height = mjpeg_decoder.GetHeight(); + + // YUV420 + if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 2 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J420, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV422 + } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J422, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV444 + } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + printf("JPeg is J444, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + // YUV400 + } else if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceGrayscale && + mjpeg_decoder.GetNumComponents() == 1 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1) { + printf("JPeg is J400, %dx%d %d bytes\n", width, height, + static_cast<int>(sample_size)); + } else { + // Unknown colorspace. + printf("JPeg is Unknown colorspace.\n"); + } + mjpeg_decoder.UnloadFrame(); + return ret; +} + +TEST_F(LibYUVConvertTest, TestMJPGInfo) { + EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, + kTest4JpgLen)); // Valid but unsupported. +} +#endif // HAVE_JPEG + +TEST_F(LibYUVConvertTest, NV12Crop) { + const int SUBSAMP_X = 2; + const int SUBSAMP_Y = 2; + const int kWidth = benchmark_width_; + const int kHeight = benchmark_height_; + const int crop_y = + ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1; + const int kDestWidth = benchmark_width_; + const int kDestHeight = benchmark_height_ - crop_y * 2; + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); + const int sample_size = + kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; + align_buffer_page_end(src_y, sample_size); + uint8_t* src_uv = src_y + kWidth * kHeight; + + align_buffer_page_end(dst_y, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + for (int i = 0; i < kHeight * kWidth; ++i) { + src_y[i] = (fastrand() & 0xff); + } + for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) { + src_uv[i] = (fastrand() & 0xff); + } + memset(dst_y, 1, kDestWidth * kDestHeight); + memset(dst_u, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_y_2, 1, kDestWidth * kDestHeight); + memset(dst_u_2, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v_2, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2, + SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2, + SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, + kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12); + + NV12ToI420(src_y + crop_y * kWidth, kWidth, + src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y, + kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, + SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight); + + for (int i = 0; i < kDestHeight; ++i) { + for (int j = 0; j < kDestWidth; ++j) { + EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(dst_y_2); + free_aligned_buffer_page_end(dst_u_2); + free_aligned_buffer_page_end(dst_v_2); + free_aligned_buffer_page_end(src_y); +} + +TEST_F(LibYUVConvertTest, I420CropOddY) { + const int SUBSAMP_X = 2; + const int SUBSAMP_Y = 2; + const int kWidth = benchmark_width_; + const int kHeight = benchmark_height_; + const int crop_y = benchmark_height_ > 1 ? 1 : 0; + const int kDestWidth = benchmark_width_; + const int kDestHeight = benchmark_height_ - crop_y * 2; + const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X); + const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X); + const int sample_size = kWidth * kHeight + + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) + + kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y); + align_buffer_page_end(src_y, sample_size); + uint8_t* src_u = src_y + kWidth * kHeight; + uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y); + + align_buffer_page_end(dst_y, kDestWidth * kDestHeight); + align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * + SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + for (int i = 0; i < kHeight * kWidth; ++i) { + src_y[i] = (fastrand() & 0xff); + } + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) { + src_u[i] = (fastrand() & 0xff); + } + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) { + src_v[i] = (fastrand() & 0xff); + } + memset(dst_y, 1, kDestWidth * kDestHeight); + memset(dst_u, 2, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + memset(dst_v, 3, + SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); + + MaskCpuFlags(benchmark_cpu_info_); + for (int i = 0; i < benchmark_iterations_; ++i) { + ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u, + SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, + SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight, + kDestWidth, kDestHeight, libyuv::kRotate0, + libyuv::FOURCC_I420); + } + + for (int i = 0; i < kDestHeight; ++i) { + for (int j = 0; j < kDestWidth; ++j) { + EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j], + dst_y[i * kDestWidth + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], + dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { + for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { + EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], + dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); + } + } + + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(src_y); +} + +#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ + TEST_F(LibYUVConvertTest, NAME) { \ + const int kWidth = benchmark_width_; \ + const int kHeight = benchmark_height_; \ + \ + align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ + align_buffer_page_end(orig_y, kWidth* kHeight); \ + align_buffer_page_end(orig_u, \ + SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + align_buffer_page_end(orig_v, \ + SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + align_buffer_page_end(dst_y_orig, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_orig, \ + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + align_buffer_page_end(dst_y, kWidth* kHeight); \ + align_buffer_page_end(dst_uv, \ + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ + \ + MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ + \ + /* Convert UYVY to NV12 in 2 steps for reference */ \ + libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth, \ + orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ + SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v, \ + SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \ + 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + \ + /* Convert to NV12 */ \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth, \ + dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \ + } \ + \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + EXPECT_EQ(orig_y[i], dst_y[i]); \ + } \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + EXPECT_EQ(dst_y_orig[i], dst_y[i]); \ + } \ + for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); \ + ++i) { \ + EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \ + } \ + \ + free_aligned_buffer_page_end(orig_uyvy); \ + free_aligned_buffer_page_end(orig_y); \ + free_aligned_buffer_page_end(orig_u); \ + free_aligned_buffer_page_end(orig_v); \ + free_aligned_buffer_page_end(dst_y_orig); \ + free_aligned_buffer_page_end(dst_uv_orig); \ + free_aligned_buffer_page_end(dst_y); \ + free_aligned_buffer_page_end(dst_uv); \ + } + +TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12) +TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12) + +TEST_F(LibYUVConvertTest, MM21ToYUY2) { + const int kWidth = (benchmark_width_ + 15) & (~15); + const int kHeight = (benchmark_height_ + 31) & (~31); + + align_buffer_page_end(orig_y, kWidth * kHeight); + align_buffer_page_end(orig_uv, + 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + align_buffer_page_end(tmp_y, kWidth * kHeight); + align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); + align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight); + + MemRandomize(orig_y, kWidth * kHeight); + MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); + + /* Convert MM21 to YUY2 in 2 steps for reference */ + libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y, + kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, + SUBSAMPLE(kWidth, 2), kWidth, kHeight); + libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v, + SUBSAMPLE(kWidth, 2), golden_yuyv, + 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); + + /* Convert to NV12 */ + for (int i = 0; i < benchmark_iterations_; ++i) { + libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), + dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); + } + + for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) { + EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]); + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tmp_y); + free_aligned_buffer_page_end(tmp_u); + free_aligned_buffer_page_end(tmp_v); + free_aligned_buffer_page_end(dst_yuyv); + free_aligned_buffer_page_end(golden_yuyv); +} + +// Test RGB24 to J420 is exact +#if defined(LIBYUV_BIT_EXACT) +TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 + align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2); + int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / + (kSize * 2) * benchmark_iterations_; + + for (int i = 0; i < kSize * 3 * 2; ++i) { + orig_rgb24[i] = i; + } + + for (int i = 0; i < iterations256; ++i) { + RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane + dest_j420 + kSize * 2, kSize / 2, // U plane + dest_j420 + kSize * 5 / 2, kSize / 2, // V plane + kSize, 2); + } + + uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381); + EXPECT_EQ(2755440272u, checksum); + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(dest_j420); +} +#endif + +// Test RGB24 to I420 is exact +#if defined(LIBYUV_BIT_EXACT) +TEST_F(LibYUVConvertTest, TestRGB24ToI420) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 + align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2); + int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / + (kSize * 2) * benchmark_iterations_; + + for (int i = 0; i < kSize * 3 * 2; ++i) { + orig_rgb24[i] = i; + } + + for (int i = 0; i < iterations256; ++i) { + RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize, // Y plane + dest_i420 + kSize * 2, kSize / 2, // U plane + dest_i420 + kSize * 5 / 2, kSize / 2, // V plane + kSize, 2); + } + + uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381); + EXPECT_EQ(1526656597u, checksum); + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(dest_i420); +} +#endif + +#endif // !defined(LEAN_TESTS) + +} // namespace libyuv diff --git a/files/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 080778f5..437b6632 100644 --- a/files/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -20,13 +20,23 @@ namespace libyuv { TEST_F(LibYUVBaseTest, TestCpuHas) { int cpu_flags = TestCpuFlag(-1); - printf("Cpu Flags %d\n", cpu_flags); + printf("Cpu Flags 0x%x\n", cpu_flags); #if defined(__arm__) || defined(__aarch64__) int has_arm = TestCpuFlag(kCpuHasARM); - printf("Has ARM %d\n", has_arm); + printf("Has ARM 0x%x\n", has_arm); int has_neon = TestCpuFlag(kCpuHasNEON); - printf("Has NEON %d\n", has_neon); + printf("Has NEON 0x%x\n", has_neon); #endif +#if defined(__riscv) && defined(__linux__) + int has_riscv = TestCpuFlag(kCpuHasRISCV); + printf("Has RISCV 0x%x\n", has_riscv); + int has_rvv = TestCpuFlag(kCpuHasRVV); + printf("Has RVV 0x%x\n", has_rvv); + int has_rvvzvfh = TestCpuFlag(kCpuHasRVVZVFH); + printf("Has RVVZVFH 0x%x\n", has_rvvzvfh); +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \ + defined(_M_X64) int has_x86 = TestCpuFlag(kCpuHasX86); int has_sse2 = TestCpuFlag(kCpuHasSSE2); int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); @@ -37,47 +47,48 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { int has_erms = TestCpuFlag(kCpuHasERMS); int has_fma3 = TestCpuFlag(kCpuHasFMA3); int has_f16c = TestCpuFlag(kCpuHasF16C); - int has_gfni = TestCpuFlag(kCpuHasGFNI); int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW); int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL); int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI); int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); - int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); - printf("Has X86 %d\n", has_x86); - printf("Has SSE2 %d\n", has_sse2); - printf("Has SSSE3 %d\n", has_ssse3); - printf("Has SSE41 %d\n", has_sse41); - printf("Has SSE42 %d\n", has_sse42); - printf("Has AVX %d\n", has_avx); - printf("Has AVX2 %d\n", has_avx2); - printf("Has ERMS %d\n", has_erms); - printf("Has FMA3 %d\n", has_fma3); - printf("Has F16C %d\n", has_f16c); - printf("Has GFNI %d\n", has_gfni); - printf("Has AVX512BW %d\n", has_avx512bw); - printf("Has AVX512VL %d\n", has_avx512vl); - printf("Has AVX512VNNI %d\n", has_avx512vnni); - printf("Has AVX512VBMI %d\n", has_avx512vbmi); - printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2); - printf("Has AVX512VBITALG %d\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ %d\n", has_avx512vpopcntdq); - + int has_avx10 = TestCpuFlag(kCpuHasAVX10); + int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); + int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); + printf("Has X86 0x%x\n", has_x86); + printf("Has SSE2 0x%x\n", has_sse2); + printf("Has SSSE3 0x%x\n", has_ssse3); + printf("Has SSE41 0x%x\n", has_sse41); + printf("Has SSE42 0x%x\n", has_sse42); + printf("Has AVX 0x%x\n", has_avx); + printf("Has AVX2 0x%x\n", has_avx2); + printf("Has ERMS 0x%x\n", has_erms); + printf("Has FMA3 0x%x\n", has_fma3); + printf("Has F16C 0x%x\n", has_f16c); + printf("Has AVX512BW 0x%x\n", has_avx512bw); + printf("Has AVX512VL 0x%x\n", has_avx512vl); + printf("Has AVX512VNNI 0x%x\n", has_avx512vnni); + printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); + printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); + printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); + printf("Has AVX10 0x%x\n", has_avx10); + printf("HAS AVXVNNI 0x%x\n", has_avxvnni); + printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); +#endif #if defined(__mips__) int has_mips = TestCpuFlag(kCpuHasMIPS); - printf("Has MIPS %d\n", has_mips); + printf("Has MIPS 0x%x\n", has_mips); int has_msa = TestCpuFlag(kCpuHasMSA); - printf("Has MSA %d\n", has_msa); + printf("Has MSA 0x%x\n", has_msa); #endif - #if defined(__loongarch__) int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH); - printf("Has LOONGARCH %d\n", has_loongarch); + printf("Has LOONGARCH 0x%x\n", has_loongarch); int has_lsx = TestCpuFlag(kCpuHasLSX); - printf("Has LSX %d\n", has_lsx); + printf("Has LSX 0x%x\n", has_lsx); int has_lasx = TestCpuFlag(kCpuHasLASX); - printf("Has LASX %d\n", has_lasx); + printf("Has LASX 0x%x\n", has_lasx); #endif } @@ -104,27 +115,36 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { #ifdef __i386__ printf("__i386__ %d\n", __i386__); #endif -#ifdef __mips - printf("__mips %d\n", __mips); -#endif -#ifdef __mips_isa_rev - printf("__mips_isa_rev %d\n", __mips_isa_rev); -#endif #ifdef __x86_64__ printf("__x86_64__ %d\n", __x86_64__); #endif +#ifdef _M_IX86 + printf("_M_IX86 %d\n", _M_IX86); +#endif +#ifdef _M_X64 + printf("_M_X64 %d\n", _M_X64); +#endif #ifdef _MSC_VER printf("_MSC_VER %d\n", _MSC_VER); #endif #ifdef __aarch64__ printf("__aarch64__ %d\n", __aarch64__); #endif -#ifdef __APPLE__ - printf("__APPLE__ %d\n", __APPLE__); -#endif #ifdef __arm__ printf("__arm__ %d\n", __arm__); #endif +#ifdef __riscv + printf("__riscv %d\n", __riscv); +#endif +#ifdef __riscv_vector + printf("__riscv_vector %d\n", __riscv_vector); +#endif +#ifdef __riscv_v_intrinsic + printf("__riscv_v_intrinsic %d\n", __riscv_v_intrinsic); +#endif +#ifdef __APPLE__ + printf("__APPLE__ %d\n", __APPLE__); +#endif #ifdef __clang__ printf("__clang__ %d\n", __clang__); #endif @@ -140,20 +160,11 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { #ifdef __mips_msa printf("__mips_msa %d\n", __mips_msa); #endif -#ifdef __native_client__ - printf("__native_client__ %d\n", __native_client__); -#endif -#ifdef __pic__ - printf("__pic__ %d\n", __pic__); -#endif -#ifdef __pnacl__ - printf("__pnacl__ %d\n", __pnacl__); -#endif -#ifdef _M_IX86 - printf("_M_IX86 %d\n", _M_IX86); +#ifdef __mips + printf("__mips %d\n", __mips); #endif -#ifdef _M_X64 - printf("_M_X64 %d\n", _M_X64); +#ifdef __mips_isa_rev + printf("__mips_isa_rev %d\n", __mips_isa_rev); #endif #ifdef _MIPS_ARCH_LOONGSON3A printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A); @@ -164,8 +175,17 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { #ifdef _WIN32 printf("_WIN32 %d\n", _WIN32); #endif +#ifdef __native_client__ + printf("__native_client__ %d\n", __native_client__); +#endif +#ifdef __pic__ + printf("__pic__ %d\n", __pic__); +#endif +#ifdef __pnacl__ + printf("__pnacl__ %d\n", __pnacl__); +#endif #ifdef GG_LONGLONG - printf("GG_LONGLONG %d\n", GG_LONGLONG); + printf("GG_LONGLONG %lld\n", GG_LONGLONG(1)); #endif #ifdef INT_TYPES_DEFINED printf("INT_TYPES_DEFINED\n"); @@ -200,8 +220,9 @@ TEST_F(LibYUVBaseTest, TestCpuId) { cpu_info[0] = cpu_info[1]; // Reorder output cpu_info[1] = cpu_info[3]; cpu_info[3] = 0; - printf("Cpu Vendor: %s %x %x %x\n", reinterpret_cast<char*>(&cpu_info[0]), - cpu_info[0], cpu_info[1], cpu_info[2]); + printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n", + reinterpret_cast<char*>(&cpu_info[0]), cpu_info[0], cpu_info[1], + cpu_info[2]); EXPECT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0]))); // CPU Family and Model @@ -264,6 +285,32 @@ TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) { } } +TEST_F(LibYUVBaseTest, TestLinuxRVV) { + if (FileExists("../../unit_test/testdata/riscv64.txt")) { + printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n"); + + EXPECT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt")); + EXPECT_EQ(kCpuHasRVV, + RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv.txt")); + EXPECT_EQ(kCpuHasRVV | kCpuHasRVVZVFH, + RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv_zvfh.txt")); + } else { + printf( + "WARNING: unable to load " + "\"../../unit_test/testdata/riscv64.txt\"\n"); + } +#if defined(__linux__) && defined(__riscv) + if (FileExists("/proc/cpuinfo")) { + if (!(kCpuHasRVV & RiscvCpuCaps("/proc/cpuinfo"))) { + // This can happen on RVV emulator but /proc/cpuinfo is from host. + printf("WARNING: RVV build enabled but CPU does not have RVV\n"); + } + } else { + printf("WARNING: unable to load \"/proc/cpuinfo\"\n"); + } +#endif +} + // TODO(fbarchard): Fix clangcl test of cpuflags. #ifdef _MSC_VER TEST_F(LibYUVBaseTest, DISABLED_TestSetCpuFlags) { diff --git a/files/unit_test/cpu_thread_test.cc b/unit_test/cpu_thread_test.cc index 69aab74e..69aab74e 100644 --- a/files/unit_test/cpu_thread_test.cc +++ b/unit_test/cpu_thread_test.cc diff --git a/files/unit_test/math_test.cc b/unit_test/math_test.cc index a1544c12..a1544c12 100644 --- a/files/unit_test/math_test.cc +++ b/unit_test/math_test.cc diff --git a/files/unit_test/planar_test.cc b/unit_test/planar_test.cc index 3a8c470b..ec1d72eb 100644 --- a/files/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -30,9 +30,9 @@ #endif #if defined(LIBYUV_BIT_EXACT) -#define EXPECTED_ATTENUATE_DIFF 0 +#define EXPECTED_UNATTENUATE_DIFF 0 #else -#define EXPECTED_ATTENUATE_DIFF 2 +#define EXPECTED_UNATTENUATE_DIFF 2 #endif namespace libyuv { @@ -57,12 +57,17 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { orig_pixels[2 * 4 + 0] = 16u; orig_pixels[2 * 4 + 1] = 64u; orig_pixels[2 * 4 + 2] = 192u; - orig_pixels[2 * 4 + 3] = 255u; + orig_pixels[2 * 4 + 3] = 128u; orig_pixels[3 * 4 + 0] = 16u; orig_pixels[3 * 4 + 1] = 64u; orig_pixels[3 * 4 + 2] = 192u; - orig_pixels[3 * 4 + 3] = 128u; - ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1); + orig_pixels[3 * 4 + 3] = 255u; + orig_pixels[4 * 4 + 0] = 255u; + orig_pixels[4 * 4 + 1] = 255u; + orig_pixels[4 * 4 + 2] = 255u; + orig_pixels[4 * 4 + 3] = 255u; + + ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1); EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]); EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]); EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]); @@ -71,14 +76,55 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]); EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]); EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]); - EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]); - EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]); - EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]); - EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]); - EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]); - EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]); - EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]); - EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]); + EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]); + EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]); + EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]); + EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]); + EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]); + EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]); + EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]); + EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]); + + ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1); + EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]); + EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]); + EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]); + EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]); + EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]); + EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]); + EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]); + EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]); + EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]); + EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]); + EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]); + EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]); + + // test 255 + for (int i = 0; i < 256; ++i) { + orig_pixels[i * 4 + 0] = i; + orig_pixels[i * 4 + 1] = 0; + orig_pixels[i * 4 + 2] = 0; + orig_pixels[i * 4 + 3] = 255; + } + ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1); + for (int i = 0; i < 256; ++i) { + EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]); + EXPECT_EQ(0, atten_pixels[i * 4 + 1]); + EXPECT_EQ(0, atten_pixels[i * 4 + 2]); + EXPECT_EQ(255, atten_pixels[i * 4 + 3]); + } for (int i = 0; i < 1280; ++i) { orig_pixels[i * 4 + 0] = i; @@ -92,10 +138,10 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1); } for (int i = 0; i < 1280; ++i) { - EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2); - EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2); - EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2); - EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2); + EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1); } // Make sure transparent, 50% and opaque are fully accurate. EXPECT_EQ(0, atten_pixels[0 * 4 + 0]); @@ -106,9 +152,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { EXPECT_EQ(32, atten_pixels[128 * 4 + 1]); EXPECT_EQ(21, atten_pixels[128 * 4 + 2]); EXPECT_EQ(128, atten_pixels[128 * 4 + 3]); - EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF); - EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF); - EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(255, atten_pixels[255 * 4 + 0]); + EXPECT_EQ(127, atten_pixels[255 * 4 + 1]); + EXPECT_EQ(85, atten_pixels[255 * 4 + 2]); EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); free_aligned_buffer_page_end(atten2_pixels); @@ -165,28 +211,28 @@ TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_EQ(max_diff, 0); } static int TestUnattenuateI(int width, @@ -238,28 +284,28 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { @@ -1638,29 +1684,29 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int y_plane_size = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_y, orig_plane_size); + align_buffer_page_end(tile_y, tile_plane_size); align_buffer_page_end(dst_c, y_plane_size); align_buffer_page_end(dst_opt, y_plane_size); - MemRandomize(orig_y, orig_plane_size); + MemRandomize(tile_y, tile_plane_size); memset(dst_c, 0, y_plane_size); memset(dst_opt, 0, y_plane_size); // Disable all optimizations. MaskCpuFlags(disable_cpu_flags_); for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_, + DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_, benchmark_height_, 16); } // Enable optimizations. MaskCpuFlags(benchmark_cpu_info_); for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_, + DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_, benchmark_height_, 16); } @@ -1668,7 +1714,46 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { EXPECT_EQ(dst_c[i], dst_opt[i]); } - free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(tile_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +TEST_F(LibYUVPlanarTest, TestDetilePlane_16) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height * 2; + int y_plane_size = benchmark_width_ * benchmark_height_ * 2; + align_buffer_page_end(tile_y, tile_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(tile_y, tile_plane_size); + memset(dst_c, 0, y_plane_size); + memset(dst_opt, 0, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c, + benchmark_width_, benchmark_width_, benchmark_height_, 16); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (j = 0; j < benchmark_iterations_; j++) { + DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt, + benchmark_width_, benchmark_width_, benchmark_height_, 16); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(tile_y); free_aligned_buffer_page_end(dst_c); free_aligned_buffer_page_end(dst_opt); } @@ -1678,33 +1763,33 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; - align_buffer_page_end(orig_uv, orig_plane_size); - align_buffer_page_end(detiled_uv, orig_plane_size); + align_buffer_page_end(tile_uv, tile_plane_size); + align_buffer_page_end(detiled_uv, tile_plane_size); align_buffer_page_end(dst_u_two_stage, uv_plane_size); align_buffer_page_end(dst_u_opt, uv_plane_size); align_buffer_page_end(dst_v_two_stage, uv_plane_size); align_buffer_page_end(dst_v_opt, uv_plane_size); - MemRandomize(orig_uv, orig_plane_size); - memset(detiled_uv, 0, orig_plane_size); + MemRandomize(tile_uv, tile_plane_size); + memset(detiled_uv, 0, tile_plane_size); memset(dst_u_two_stage, 0, uv_plane_size); memset(dst_u_opt, 0, uv_plane_size); memset(dst_v_two_stage, 0, uv_plane_size); memset(dst_v_opt, 0, uv_plane_size); - DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, + DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); // Benchmark 2 step conversion for comparison. for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, + DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_, benchmark_width_, benchmark_height_, 16); - SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage, + SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage, (benchmark_width_ + 1) / 2, dst_v_two_stage, (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2, benchmark_height_); @@ -1715,7 +1800,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); } - free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tile_uv); free_aligned_buffer_page_end(detiled_uv); free_aligned_buffer_page_end(dst_u_two_stage); free_aligned_buffer_page_end(dst_u_opt); @@ -1727,17 +1812,17 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { int i, j; // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; + int tile_width = (benchmark_width_ + 15) & ~15; + int tile_height = (benchmark_height_ + 15) & ~15; + int tile_plane_size = tile_width * tile_height; int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; - align_buffer_page_end(orig_uv, orig_plane_size); + align_buffer_page_end(tile_uv, tile_plane_size); align_buffer_page_end(dst_u_c, uv_plane_size); align_buffer_page_end(dst_u_opt, uv_plane_size); align_buffer_page_end(dst_v_c, uv_plane_size); align_buffer_page_end(dst_v_opt, uv_plane_size); - MemRandomize(orig_uv, orig_plane_size); + MemRandomize(tile_uv, tile_plane_size); memset(dst_u_c, 0, uv_plane_size); memset(dst_u_opt, 0, uv_plane_size); memset(dst_v_c, 0, uv_plane_size); @@ -1746,7 +1831,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { // Disable all optimizations. MaskCpuFlags(disable_cpu_flags_); - DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2, + DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2, dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); @@ -1755,7 +1840,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { for (j = 0; j < benchmark_iterations_; j++) { DetileSplitUVPlane( - orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, + tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); } @@ -1764,7 +1849,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); } - free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(tile_uv); free_aligned_buffer_page_end(dst_u_c); free_aligned_buffer_page_end(dst_u_opt); free_aligned_buffer_page_end(dst_v_c); @@ -2710,12 +2795,23 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { MaskCpuFlags(disable_cpu_flags_); ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c, benchmark_width_, benchmark_width_, benchmark_height_); - MaskCpuFlags(benchmark_cpu_info_); + double c_time = get_time(); + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c, + benchmark_width_, benchmark_width_, benchmark_height_); + c_time = (get_time() - c_time); + MaskCpuFlags(benchmark_cpu_info_); + ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt, + benchmark_width_, benchmark_width_, benchmark_height_); + double opt_time = get_time(); for (int i = 0; i < benchmark_iterations_; ++i) { ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt, benchmark_width_, benchmark_width_, benchmark_height_); } + opt_time = (get_time() - opt_time) / benchmark_iterations_; + // Report performance of C vs OPT + printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6), + static_cast<int>(opt_time * 1e6)); for (int i = 0; i < kPixels; ++i) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } @@ -2738,12 +2834,24 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { MaskCpuFlags(disable_cpu_flags_); ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - MaskCpuFlags(benchmark_cpu_info_); + double c_time = get_time(); + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + c_time = (get_time() - c_time); + MaskCpuFlags(benchmark_cpu_info_); + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt, + benchmark_width_ * 4, benchmark_width_, benchmark_height_); + double opt_time = get_time(); for (int i = 0; i < benchmark_iterations_; ++i) { ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_ * 4, benchmark_width_, benchmark_height_); } + opt_time = (get_time() - opt_time) / benchmark_iterations_; + + // Report performance of C vs OPT + printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6), + static_cast<int>(opt_time * 1e6)); for (int i = 0; i < kPixels * 4; ++i) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } @@ -3495,8 +3603,8 @@ TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16) // TODO(fbarchard): improve test for platforms and cpu detect #ifdef HAS_MERGEUVROW_16_AVX2 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { - // Round count up to multiple of 16 - const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; + // Round count up to multiple of 8 + const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7; align_buffer_page_end(src_pixels_u, kPixels * 2); align_buffer_page_end(src_pixels_v, kPixels * 2); @@ -4429,4 +4537,83 @@ TEST_F(LibYUVPlanarTest, NV21Copy) { free_aligned_buffer_page_end(dst_vu); } +#if defined(ENABLE_ROW_TESTS) && !defined(LIBYUV_DISABLE_NEON) && \ + defined(__aarch64__) + +TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) { + int i, j; + const int y_plane_size = benchmark_width_ * benchmark_height_; + + align_buffer_page_end(orig_f, y_plane_size * 4); + align_buffer_page_end(orig_y, y_plane_size * 2); + align_buffer_page_end(dst_opt, y_plane_size * 4); + align_buffer_page_end(rec_opt, y_plane_size * 2); + + for (i = 0; i < y_plane_size; ++i) { + ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f; + } + memset(orig_y, 1, y_plane_size * 2); + memset(dst_opt, 2, y_plane_size * 4); + memset(rec_opt, 3, y_plane_size * 2); + + ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y, + y_plane_size); + + for (j = 0; j < benchmark_iterations_; j++) { + ConvertFP16ToFP32Row_NEON((const uint16_t*)orig_y, (float*)dst_opt, + y_plane_size); + } + + ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt, + y_plane_size); + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]); + } + + free_aligned_buffer_page_end(orig_f); + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(rec_opt); +} + +TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) { + int i, j; + const int y_plane_size = benchmark_width_ * benchmark_height_; + + align_buffer_page_end(orig_f, y_plane_size * 4); + align_buffer_page_end(orig_y, y_plane_size * 2); + align_buffer_page_end(dst_opt, y_plane_size * 4); + align_buffer_page_end(rec_opt, y_plane_size * 2); + + for (i = 0; i < y_plane_size; ++i) { + ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f; + } + memset(orig_y, 1, y_plane_size * 2); + memset(dst_opt, 2, y_plane_size * 4); + memset(rec_opt, 3, y_plane_size * 2); + + ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y, + y_plane_size); + + for (j = 0; j < benchmark_iterations_; j++) { + ConvertFP16ToFP32Column_NEON((const uint16_t*)orig_y, 1, (float*)dst_opt, + y_plane_size); + } + + ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt, + y_plane_size); + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]); + } + + free_aligned_buffer_page_end(orig_f); + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_opt); + free_aligned_buffer_page_end(rec_opt); +} + +#endif // defined(ENABLE_ROW_TESTS) && defined(__aarch64__) + } // namespace libyuv diff --git a/files/unit_test/rotate_argb_test.cc b/unit_test/rotate_argb_test.cc index 01ed69ca..74952c4e 100644 --- a/files/unit_test/rotate_argb_test.cc +++ b/unit_test/rotate_argb_test.cc @@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) { free_aligned_buffer_page_end(src_argb); } +static void TestRotatePlane_16(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height < 1) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_stride = src_width; + int src_plane_size = src_stride * abs(src_height); + align_buffer_page_end_16(src, src_plane_size); + for (int i = 0; i < src_plane_size; ++i) { + src[i] = fastrand() & 0xff; + } + + int dst_stride = dst_width; + int dst_plane_size = dst_stride * dst_height; + align_buffer_page_end_16(dst_c, dst_plane_size); + align_buffer_page_end_16(dst_opt, dst_plane_size); + memset(dst_c, 2, dst_plane_size); + memset(dst_opt, 3, dst_plane_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height, + mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height, + mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_c); + free_aligned_buffer_page_end_16(dst_opt); + free_aligned_buffer_page_end_16(src); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) { + TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_width_ + 1, benchmark_height_ + 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) { + TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1, + benchmark_height_ + 1, benchmark_width_ + 1, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + } // namespace libyuv diff --git a/files/unit_test/rotate_test.cc b/unit_test/rotate_test.cc index d3887414..abc08efa 100644 --- a/files/unit_test/rotate_test.cc +++ b/unit_test/rotate_test.cc @@ -14,6 +14,10 @@ #include "libyuv/cpu_id.h" #include "libyuv/rotate.h" +#ifdef ENABLE_ROW_TESTS +#include "libyuv/rotate_row.h" +#endif + namespace libyuv { #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) @@ -596,4 +600,363 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) #undef TESTAPLANARTOP #undef TESTAPLANARTOPI +static void I010TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i010_y_size = src_width * Abs(src_height); + int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2); + int src_i010_size = src_i010_y_size + src_i010_uv_size * 2; + align_buffer_page_end_16(src_i010, src_i010_size); + for (int i = 0; i < src_i010_size; ++i) { + src_i010[i] = fastrand() & 0x3ff; + } + + int dst_i010_y_size = dst_width * dst_height; + int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2); + int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2; + align_buffer_page_end_16(dst_i010_c, dst_i010_size); + align_buffer_page_end_16(dst_i010_opt, dst_i010_size); + memset(dst_i010_c, 2, dst_i010_size * 2); + memset(dst_i010_opt, 3, dst_i010_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size, + (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size, + (src_width + 1) / 2, dst_i010_c, dst_width, + dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2, + dst_i010_c + dst_i010_y_size + dst_i010_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I010Rotate( + src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2, + src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2, + dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size, + (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i010_size; ++i) { + EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i010_c); + free_aligned_buffer_page_end_16(dst_i010_opt); + free_aligned_buffer_page_end_16(src_i010); +} + +TEST_F(LibYUVRotateTest, I010Rotate0_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate90_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate180_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I010Rotate270_Opt) { + I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I210TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i210_y_size = src_width * Abs(src_height); + int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height); + int src_i210_size = src_i210_y_size + src_i210_uv_size * 2; + align_buffer_page_end_16(src_i210, src_i210_size); + for (int i = 0; i < src_i210_size; ++i) { + src_i210[i] = fastrand() & 0x3ff; + } + + int dst_i210_y_size = dst_width * dst_height; + int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height; + int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2; + align_buffer_page_end_16(dst_i210_c, dst_i210_size); + align_buffer_page_end_16(dst_i210_opt, dst_i210_size); + memset(dst_i210_c, 2, dst_i210_size * 2); + memset(dst_i210_opt, 3, dst_i210_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size, + (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size, + (src_width + 1) / 2, dst_i210_c, dst_width, + dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2, + dst_i210_c + dst_i210_y_size + dst_i210_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I210Rotate( + src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2, + src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2, + dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size, + (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size, + (dst_width + 1) / 2, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i210_size; ++i) { + EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i210_c); + free_aligned_buffer_page_end_16(dst_i210_opt); + free_aligned_buffer_page_end_16(src_i210); +} + +TEST_F(LibYUVRotateTest, I210Rotate0_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate90_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate180_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I210Rotate270_Opt) { + I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +static void I410TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i410_y_size = src_width * Abs(src_height); + int src_i410_uv_size = src_width * Abs(src_height); + int src_i410_size = src_i410_y_size + src_i410_uv_size * 2; + align_buffer_page_end_16(src_i410, src_i410_size); + for (int i = 0; i < src_i410_size; ++i) { + src_i410[i] = fastrand() & 0x3ff; + } + + int dst_i410_y_size = dst_width * dst_height; + int dst_i410_uv_size = dst_width * dst_height; + int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2; + align_buffer_page_end_16(dst_i410_c, dst_i410_size); + align_buffer_page_end_16(dst_i410_opt, dst_i410_size); + memset(dst_i410_c, 2, dst_i410_size * 2); + memset(dst_i410_opt, 3, dst_i410_size * 2); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width, + src_i410 + src_i410_y_size + src_i410_uv_size, src_width, + dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width, + dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width, + src_i410 + src_i410_y_size + src_i410_uv_size, src_width, + dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size, + dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size, + dst_width, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i410_size; ++i) { + EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]); + } + + free_aligned_buffer_page_end_16(dst_i410_c); + free_aligned_buffer_page_end_16(dst_i410_opt); + free_aligned_buffer_page_end_16(src_i410); +} + +TEST_F(LibYUVRotateTest, I410Rotate0_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate90_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate180_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I410Rotate270_Opt) { + I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +#if defined(ENABLE_ROW_TESTS) + +TEST_F(LibYUVRotateTest, Transpose4x4_Test) { + // dst width and height + const int width = 4; + const int height = 4; + int src_pixels[4][4]; + int dst_pixels_c[4][4]; + int dst_pixels_opt[4][4]; + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + src_pixels[i][j] = i * 10 + j; + } + } + memset(dst_pixels_c, 1, width * height * 4); + memset(dst_pixels_opt, 2, width * height * 4); + + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_c, width * 4, width); + + const int benchmark_iterations = + (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) / + (4 * 4); + for (int i = 0; i < benchmark_iterations; ++i) { +#if defined(HAS_TRANSPOSE4X4_32_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#elif defined(HAS_TRANSPOSE4X4_32_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#endif + { + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } + } + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]); + EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]); + } + } +} + +TEST_F(LibYUVRotateTest, Transpose4x4_Opt) { + // dst width and height + const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3; + const int height = 4; + align_buffer_page_end(src_pixels, height * width * 4); + align_buffer_page_end(dst_pixels_c, width * height * 4); + align_buffer_page_end(dst_pixels_opt, width * height * 4); + + MemRandomize(src_pixels, height * width * 4); + memset(dst_pixels_c, 1, width * height * 4); + memset(dst_pixels_opt, 2, width * height * 4); + + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_c, width * 4, width); + + for (int i = 0; i < benchmark_iterations_; ++i) { +#if defined(HAS_TRANSPOSE4X4_32_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#elif defined(HAS_TRANSPOSE4X4_32_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else if (TestCpuFlag(kCpuHasSSE2)) { + Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#endif + { + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } + } + + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_c); + free_aligned_buffer_page_end(dst_pixels_opt); +} + +#endif // ENABLE_ROW_TESTS + } // namespace libyuv diff --git a/files/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index f54a68f1..f54a68f1 100644 --- a/files/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc new file mode 100644 index 00000000..9ce47a02 --- /dev/null +++ b/unit_test/scale_plane_test.cc @@ -0,0 +1,470 @@ +/* + * Copyright 2023 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <time.h> + +#include "../unit_test/unit_test.h" +#include "libyuv/cpu_id.h" +#include "libyuv/scale.h" + +#ifdef ENABLE_ROW_TESTS +#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C +#endif + +#define STRINGIZE(line) #line +#define FILELINESTR(file, line) file ":" STRINGIZE(line) + +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#undef ENABLE_ROW_TESTS +#define LEAN_TESTS +#endif + +#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) +// SLOW TESTS are those that are unoptimized C code. +// FULL TESTS are optimized but test many variations of the same code. +#define ENABLE_FULL_TESTS +#endif + +namespace libyuv { + +#ifdef ENABLE_ROW_TESTS +#ifdef HAS_SCALEROWDOWN2_SSSE3 +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { + SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); + SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); + SIMD_ALIGNED(uint8_t dst_pixels_c[64]); + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + if (!has_ssse3) { + printf("Warning SSSE3 not detected; Skipping test.\n"); + } else { + // TL. + orig_pixels[0] = 255u; + orig_pixels[1] = 0u; + orig_pixels[128 + 0] = 0u; + orig_pixels[128 + 1] = 0u; + // TR. + orig_pixels[2] = 0u; + orig_pixels[3] = 100u; + orig_pixels[128 + 2] = 0u; + orig_pixels[128 + 3] = 0u; + // BL. + orig_pixels[4] = 0u; + orig_pixels[5] = 0u; + orig_pixels[128 + 4] = 50u; + orig_pixels[128 + 5] = 0u; + // BR. + orig_pixels[6] = 0u; + orig_pixels[7] = 0u; + orig_pixels[128 + 6] = 0u; + orig_pixels[128 + 7] = 20u; + // Odd. + orig_pixels[126] = 4u; + orig_pixels[127] = 255u; + orig_pixels[128 + 126] = 16u; + orig_pixels[128 + 127] = 255u; + + // Test regular half size. + ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(133u, dst_pixels_c[63]); + + // Test Odd width version - Last pixel is just 1 horizontal pixel. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(10u, dst_pixels_c[63]); + + // Test one pixel less, should skip the last pixel. + memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); + + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(0u, dst_pixels_c[63]); + + // Test regular half size SSSE3. + ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + + EXPECT_EQ(64u, dst_pixels_opt[0]); + EXPECT_EQ(25u, dst_pixels_opt[1]); + EXPECT_EQ(13u, dst_pixels_opt[2]); + EXPECT_EQ(5u, dst_pixels_opt[3]); + EXPECT_EQ(0u, dst_pixels_opt[4]); + EXPECT_EQ(133u, dst_pixels_opt[63]); + + // Compare C and SSSE3 match. + ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); + ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); + for (int i = 0; i < 64; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + } +} +#endif // HAS_SCALEROWDOWN2_SSSE3 + +extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); + +TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { + SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); + SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); + SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); + + memset(orig_pixels, 0, sizeof(orig_pixels)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); + + for (int i = 0; i < 2560 * 2; ++i) { + orig_pixels[i] = i; + } + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280); + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } else { + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); + } +#else + ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); +#endif + } + + for (int i = 0; i < 1280; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); + EXPECT_EQ(dst_pixels_c[1279], 3839); +} +#endif // ENABLE_ROW_TESTS + +// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel +// difference. +// 0 = exact. +static int TestPlaneFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int src_stride_y = Abs(src_width); + int dst_y_plane_size = dst_width * dst_height; + int dst_stride_y = dst_width; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + + MemRandomize(src_y, src_y_plane_size); + memset(dst_y_8, 0, dst_y_plane_size); + memset(dst_y_16, 1, dst_y_plane_size * 2); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i] & 255; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y, + dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + + for (i = 0; i < benchmark_iterations; ++i) { + ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16, + dst_stride_y, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_y_16); + + return max_diff; +} + +// The following adjustments in dimensions ensure the scale factor will be +// exactly achieved. +// 2 is chroma subsample. +#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) +#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) + +#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ + TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \ + int diff = TestPlaneFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but +// filtering is different fixed point implementations for SSSE3, Neon and C. +#define TEST_FACTOR(name, nom, denom, boxdiff) \ + TEST_FACTOR1(name, None, nom, denom, 0) \ + TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \ + TEST_FACTOR1(name, Box, nom, denom, boxdiff) + +TEST_FACTOR(2, 1, 2, 0) +TEST_FACTOR(4, 1, 4, 0) +// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds. +TEST_FACTOR(3by4, 3, 4, 1) +TEST_FACTOR(3by8, 3, 8, 1) +TEST_FACTOR(3, 1, 3, 0) +#undef TEST_FACTOR1 +#undef TEST_FACTOR +#undef SX +#undef DX + +TEST_F(LibYUVScaleTest, PlaneTest3x) { + const int kSrcStride = 480; + const int kDstStride = 160; + const int kSize = kSrcStride * 3; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 480 * 3; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(225, dest_pixels[0]); + + ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(225, dest_pixels[0]); + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest4x) { + const int kSrcStride = 640; + const int kDstStride = 160; + const int kSize = kSrcStride * 4; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < 640 * 4; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_pixels, kDstStride); + + int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * + benchmark_iterations_; + for (int i = 0; i < iterations160; ++i) { + ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterBilinear); + } + + EXPECT_EQ(66, dest_pixels[0]); + + ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, + kFilterNone); + + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + + free_aligned_buffer_page_end(dest_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +// Intent is to test 200x50 to 50x200 but width and height can be parameters. +TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterNone); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterNone); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBilinear); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBilinear); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +// Intent is to test 200x50 to 50x200 but width and height can be parameters. +TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { + const int kSize = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_pixels, kSize); + for (int i = 0; i < kSize; ++i) { + orig_pixels[i] = i; + } + align_buffer_page_end(dest_opt_pixels, kSize); + align_buffer_page_end(dest_c_pixels, kSize); + + MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBox); + MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. + + for (int i = 0; i < benchmark_iterations_; ++i) { + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBox); + } + + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + } + + free_aligned_buffer_page_end(dest_c_pixels); + free_aligned_buffer_page_end(dest_opt_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_Box) { + align_buffer_page_end(orig_pixels, 3); + align_buffer_page_end(dst_pixels, 3); + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, + libyuv::kFilterBox); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { + align_buffer_page_end(orig_pixels_alloc, 3 * 2); + align_buffer_page_end(dst_pixels_alloc, 3 * 2); + uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; + uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane_16( + orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels_alloc); + free_aligned_buffer_page_end(orig_pixels_alloc); +} +} // namespace libyuv diff --git a/files/unit_test/scale_rgb_test.cc b/unit_test/scale_rgb_test.cc index 8296abe3..8296abe3 100644 --- a/files/unit_test/scale_rgb_test.cc +++ b/unit_test/scale_rgb_test.cc diff --git a/files/unit_test/scale_test.cc b/unit_test/scale_test.cc index a8c95268..6e3b9271 100644 --- a/files/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -22,6 +22,11 @@ #define STRINGIZE(line) #line #define FILELINESTR(file, line) file ":" STRINGIZE(line) +#if defined(__riscv) && !defined(__clang__) +#define DISABLE_SLOW_TESTS +#undef ENABLE_FULL_TESTS +#endif + #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) // SLOW TESTS are those that are unoptimized C code. // FULL TESTS are optimized but test many variations of the same code. @@ -1123,479 +1128,6 @@ TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3) TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3) #endif #endif - #undef TEST_SCALESWAPXY1 -#ifdef ENABLE_ROW_TESTS -#ifdef HAS_SCALEROWDOWN2_SSSE3 -TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { - SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]); - SIMD_ALIGNED(uint8_t dst_pixels_opt[64]); - SIMD_ALIGNED(uint8_t dst_pixels_c[64]); - memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt)); - memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); - - int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); - if (!has_ssse3) { - printf("Warning SSSE3 not detected; Skipping test.\n"); - } else { - // TL. - orig_pixels[0] = 255u; - orig_pixels[1] = 0u; - orig_pixels[128 + 0] = 0u; - orig_pixels[128 + 1] = 0u; - // TR. - orig_pixels[2] = 0u; - orig_pixels[3] = 100u; - orig_pixels[128 + 2] = 0u; - orig_pixels[128 + 3] = 0u; - // BL. - orig_pixels[4] = 0u; - orig_pixels[5] = 0u; - orig_pixels[128 + 4] = 50u; - orig_pixels[128 + 5] = 0u; - // BR. - orig_pixels[6] = 0u; - orig_pixels[7] = 0u; - orig_pixels[128 + 6] = 0u; - orig_pixels[128 + 7] = 20u; - // Odd. - orig_pixels[126] = 4u; - orig_pixels[127] = 255u; - orig_pixels[128 + 126] = 16u; - orig_pixels[128 + 127] = 255u; - - // Test regular half size. - ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(133u, dst_pixels_c[63]); - - // Test Odd width version - Last pixel is just 1 horizontal pixel. - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(10u, dst_pixels_c[63]); - - // Test one pixel less, should skip the last pixel. - memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); - - EXPECT_EQ(64u, dst_pixels_c[0]); - EXPECT_EQ(25u, dst_pixels_c[1]); - EXPECT_EQ(13u, dst_pixels_c[2]); - EXPECT_EQ(5u, dst_pixels_c[3]); - EXPECT_EQ(0u, dst_pixels_c[4]); - EXPECT_EQ(0u, dst_pixels_c[63]); - - // Test regular half size SSSE3. - ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); - - EXPECT_EQ(64u, dst_pixels_opt[0]); - EXPECT_EQ(25u, dst_pixels_opt[1]); - EXPECT_EQ(13u, dst_pixels_opt[2]); - EXPECT_EQ(5u, dst_pixels_opt[3]); - EXPECT_EQ(0u, dst_pixels_opt[4]); - EXPECT_EQ(133u, dst_pixels_opt[63]); - - // Compare C and SSSE3 match. - ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); - ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); - for (int i = 0; i < 64; ++i) { - EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - } - } -} -#endif // HAS_SCALEROWDOWN2_SSSE3 - -extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width); -extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width); - -TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { - SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun. - SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); - SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); - - memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt)); - memset(dst_pixels_c, 2, sizeof(dst_pixels_c)); - - for (int i = 0; i < 640 * 2 + 1; ++i) { - orig_pixels[i] = i; - } - ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280); - for (int i = 0; i < benchmark_pixels_div1280_; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) - int has_neon = TestCpuFlag(kCpuHasNEON); - if (has_neon) { - ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); - } else { - ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); - } -#else - ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); -#endif - } - - for (int i = 0; i < 1280; ++i) { - EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - } - EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16); - EXPECT_EQ(dst_pixels_c[1279], 800); -} - -extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width); - -TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { - SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]); - SIMD_ALIGNED(uint16_t dst_pixels_c[1280]); - SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]); - - memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); - memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); - - for (int i = 0; i < 2560 * 2; ++i) { - orig_pixels[i] = i; - } - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280); - for (int i = 0; i < benchmark_pixels_div1280_; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) - int has_neon = TestCpuFlag(kCpuHasNEON); - if (has_neon) { - ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); - } else { - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); - } -#else - ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280); -#endif - } - - for (int i = 0; i < 1280; ++i) { - EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - } - - EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); - EXPECT_EQ(dst_pixels_c[1279], 3839); -} -#endif // ENABLE_ROW_TESTS - -// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel -// difference. -// 0 = exact. -static int TestPlaneFilter_16(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int disable_cpu_flags, - int benchmark_cpu_info) { - if (!SizeValid(src_width, src_height, dst_width, dst_height)) { - return 0; - } - - int i; - int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); - int src_stride_y = Abs(src_width); - int dst_y_plane_size = dst_width * dst_height; - int dst_stride_y = dst_width; - - align_buffer_page_end(src_y, src_y_plane_size); - align_buffer_page_end(src_y_16, src_y_plane_size * 2); - align_buffer_page_end(dst_y_8, dst_y_plane_size); - align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); - uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); - uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); - - MemRandomize(src_y, src_y_plane_size); - memset(dst_y_8, 0, dst_y_plane_size); - memset(dst_y_16, 1, dst_y_plane_size * 2); - - for (i = 0; i < src_y_plane_size; ++i) { - p_src_y_16[i] = src_y[i] & 255; - } - - MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. - ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y, - dst_width, dst_height, f); - MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. - - for (i = 0; i < benchmark_iterations; ++i) { - ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16, - dst_stride_y, dst_width, dst_height, f); - } - - // Expect an exact match. - int max_diff = 0; - for (i = 0; i < dst_y_plane_size; ++i) { - int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); - if (abs_diff > max_diff) { - max_diff = abs_diff; - } - } - - free_aligned_buffer_page_end(dst_y_8); - free_aligned_buffer_page_end(dst_y_16); - free_aligned_buffer_page_end(src_y); - free_aligned_buffer_page_end(src_y_16); - - return max_diff; -} - -// The following adjustments in dimensions ensure the scale factor will be -// exactly achieved. -// 2 is chroma subsample. -#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2) -#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) - -#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ - TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \ - int diff = TestPlaneFilter_16( \ - SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ - DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ - kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ - benchmark_cpu_info_); \ - EXPECT_LE(diff, max_diff); \ - } - -// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but -// filtering is different fixed point implementations for SSSE3, Neon and C. -#define TEST_FACTOR(name, nom, denom, boxdiff) \ - TEST_FACTOR1(name, None, nom, denom, 0) \ - TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \ - TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \ - TEST_FACTOR1(name, Box, nom, denom, boxdiff) - -TEST_FACTOR(2, 1, 2, 0) -TEST_FACTOR(4, 1, 4, 0) -// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds. -TEST_FACTOR(3by4, 3, 4, 1) -TEST_FACTOR(3by8, 3, 8, 1) -TEST_FACTOR(3, 1, 3, 0) -#undef TEST_FACTOR1 -#undef TEST_FACTOR -#undef SX -#undef DX - -TEST_F(LibYUVScaleTest, PlaneTest3x) { - const int kSrcStride = 480; - const int kDstStride = 160; - const int kSize = kSrcStride * 3; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < 480 * 3; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_pixels, kDstStride); - - int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * - benchmark_iterations_; - for (int i = 0; i < iterations160; ++i) { - ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, - kFilterBilinear); - } - - EXPECT_EQ(225, dest_pixels[0]); - - ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, - kFilterNone); - - EXPECT_EQ(225, dest_pixels[0]); - - free_aligned_buffer_page_end(dest_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest4x) { - const int kSrcStride = 640; - const int kDstStride = 160; - const int kSize = kSrcStride * 4; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < 640 * 4; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_pixels, kDstStride); - - int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * - benchmark_iterations_; - for (int i = 0; i < iterations160; ++i) { - ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, - kFilterBilinear); - } - - EXPECT_EQ(66, dest_pixels[0]); - - ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, - kFilterNone); - - EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row - - free_aligned_buffer_page_end(dest_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -// Intent is to test 200x50 to 50x200 but width and height can be parameters. -TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterNone); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterNone); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterBilinear); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterBilinear); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -// Intent is to test 200x50 to 50x200 but width and height can be parameters. -TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { - const int kSize = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_pixels, kSize); - for (int i = 0; i < kSize; ++i) { - orig_pixels[i] = i; - } - align_buffer_page_end(dest_opt_pixels, kSize); - align_buffer_page_end(dest_c_pixels, kSize); - - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, benchmark_height_, - benchmark_width_, kFilterBox); - MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, - benchmark_height_, dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, kFilterBox); - } - - for (int i = 0; i < kSize; ++i) { - EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); - } - - free_aligned_buffer_page_end(dest_c_pixels); - free_aligned_buffer_page_end(dest_opt_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest1_Box) { - align_buffer_page_end(orig_pixels, 3); - align_buffer_page_end(dst_pixels, 3); - - // Pad the 1x1 byte image with invalid values before and after in case libyuv - // reads outside the memory boundaries. - orig_pixels[0] = 0; - orig_pixels[1] = 1; // scale this pixel - orig_pixels[2] = 2; - dst_pixels[0] = 3; - dst_pixels[1] = 3; - dst_pixels[2] = 3; - - libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, - /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, - /* dst_width= */ 1, /* dst_height= */ 2, - libyuv::kFilterBox); - - EXPECT_EQ(dst_pixels[0], 1); - EXPECT_EQ(dst_pixels[1], 1); - EXPECT_EQ(dst_pixels[2], 3); - - free_aligned_buffer_page_end(dst_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { - align_buffer_page_end(orig_pixels_alloc, 3 * 2); - align_buffer_page_end(dst_pixels_alloc, 3 * 2); - uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; - uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; - - // Pad the 1x1 byte image with invalid values before and after in case libyuv - // reads outside the memory boundaries. - orig_pixels[0] = 0; - orig_pixels[1] = 1; // scale this pixel - orig_pixels[2] = 2; - dst_pixels[0] = 3; - dst_pixels[1] = 3; - dst_pixels[2] = 3; - - libyuv::ScalePlane_16( - orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, - /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, - /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); - - EXPECT_EQ(dst_pixels[0], 1); - EXPECT_EQ(dst_pixels[1], 1); - EXPECT_EQ(dst_pixels[2], 3); - - free_aligned_buffer_page_end(dst_pixels_alloc); - free_aligned_buffer_page_end(orig_pixels_alloc); -} } // namespace libyuv diff --git a/files/unit_test/scale_uv_test.cc b/unit_test/scale_uv_test.cc index 3d524bef..dab217c9 100644 --- a/files/unit_test/scale_uv_test.cc +++ b/unit_test/scale_uv_test.cc @@ -39,55 +39,35 @@ static int UVTestFilter(int src_width, return 0; } - int i, j; - const int b = 0; // 128 to test for padding/stride. - int64_t src_uv_plane_size = - (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 2LL; - int src_stride_uv = (b * 2 + Abs(src_width)) * 2; + int i; + int64_t src_uv_plane_size = Abs(src_width) * Abs(src_height) * 2LL; + int src_stride_uv = Abs(src_width) * 2; + int64_t dst_uv_plane_size = dst_width * dst_height * 2LL; + int dst_stride_uv = dst_width * 2; align_buffer_page_end(src_uv, src_uv_plane_size); - if (!src_uv) { - printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); - return 0; - } - MemRandomize(src_uv, src_uv_plane_size); - - int64_t dst_uv_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 2LL; - int dst_stride_uv = (b * 2 + dst_width) * 2; - align_buffer_page_end(dst_uv_c, dst_uv_plane_size); align_buffer_page_end(dst_uv_opt, dst_uv_plane_size); - if (!dst_uv_c || !dst_uv_opt) { + + if (!src_uv || !dst_uv_c || !dst_uv_opt) { printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); return 0; } + MemRandomize(src_uv, src_uv_plane_size); memset(dst_uv_c, 2, dst_uv_plane_size); - memset(dst_uv_opt, 3, dst_uv_plane_size); - - // Warm up both versions for consistent benchmarks. - MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. - UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width, - src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv, - dst_width, dst_height, f); - MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. - UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width, - src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv, - dst_width, dst_height, f); + memset(dst_uv_opt, 123, dst_uv_plane_size); MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. double c_time = get_time(); - UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width, - src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv, + UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_c, dst_stride_uv, dst_width, dst_height, f); - c_time = (get_time() - c_time); MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. double opt_time = get_time(); for (i = 0; i < benchmark_iterations; ++i) { - UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width, - src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv, - dst_width, dst_height, f); + UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_opt, + dst_stride_uv, dst_width, dst_height, f); } opt_time = (get_time() - opt_time) / benchmark_iterations; @@ -95,18 +75,11 @@ static int UVTestFilter(int src_width, printf("filter %d - %8d us C - %8d us OPT\n", f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); - // C version may be a little off from the optimized. Order of - // operations may introduce rounding somewhere. So do a difference - // of the buffers and look to see that the max difference isn't - // over 2. int max_diff = 0; - for (i = b; i < (dst_height + b); ++i) { - for (j = b * 2; j < (dst_width + b) * 2; ++j) { - int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] - - dst_uv_opt[(i * dst_stride_uv) + j]); - if (abs_diff > max_diff) { - max_diff = abs_diff; - } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_uv_c[i] - dst_uv_opt[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; } } @@ -121,28 +94,26 @@ static int UVTestFilter(int src_width, #define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom) #define SX(x, nom, denom) static_cast<int>((x / nom) * denom) -#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ +#define TEST_FACTOR1(name, filter, nom, denom) \ TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \ int diff = UVTestFilter( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - EXPECT_LE(diff, max_diff); \ + EXPECT_EQ(0, diff); \ } #if defined(ENABLE_FULL_TESTS) -// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but -// filtering is different fixed point implementations for SSSE3, Neon and C. -#define TEST_FACTOR(name, nom, denom) \ - TEST_FACTOR1(name, None, nom, denom, 0) \ - TEST_FACTOR1(name, Linear, nom, denom, 3) \ - TEST_FACTOR1(name, Bilinear, nom, denom, 3) \ - TEST_FACTOR1(name, Box, nom, denom, 3) +// Test a scale factor with all 4 filters. Expect exact for SIMD vs C. +#define TEST_FACTOR(name, nom, denom) \ + TEST_FACTOR1(name, None, nom, denom) \ + TEST_FACTOR1(name, Linear, nom, denom) \ + TEST_FACTOR1(name, Bilinear, nom, denom) \ + TEST_FACTOR1(name, Box, nom, denom) #else // Test a scale factor with Bilinear. -#define TEST_FACTOR(name, nom, denom) \ - TEST_FACTOR1(name, Bilinear, nom, denom, 3) +#define TEST_FACTOR(name, nom, denom) TEST_FACTOR1(name, Bilinear, nom, denom) #endif TEST_FACTOR(2, 1, 2) diff --git a/files/unit_test/testdata/arm_v7.txt b/unit_test/testdata/arm_v7.txt index 5d7dbd04..5d7dbd04 100644 --- a/files/unit_test/testdata/arm_v7.txt +++ b/unit_test/testdata/arm_v7.txt diff --git a/files/unit_test/testdata/juno.txt b/unit_test/testdata/juno.txt index dd465272..dd465272 100644 --- a/files/unit_test/testdata/juno.txt +++ b/unit_test/testdata/juno.txt diff --git a/files/unit_test/testdata/mips.txt b/unit_test/testdata/mips.txt index d9f28cbf..d9f28cbf 100644 --- a/files/unit_test/testdata/mips.txt +++ b/unit_test/testdata/mips.txt diff --git a/files/unit_test/testdata/mips_loongson2k.txt b/unit_test/testdata/mips_loongson2k.txt index 8a88d38f..8a88d38f 100644 --- a/files/unit_test/testdata/mips_loongson2k.txt +++ b/unit_test/testdata/mips_loongson2k.txt diff --git a/files/unit_test/testdata/mips_loongson3.txt b/unit_test/testdata/mips_loongson3.txt index 1f540b12..1f540b12 100644 --- a/files/unit_test/testdata/mips_loongson3.txt +++ b/unit_test/testdata/mips_loongson3.txt diff --git a/files/unit_test/testdata/mips_loongson_mmi.txt b/unit_test/testdata/mips_loongson_mmi.txt index 0f10b8bb..0f10b8bb 100644 --- a/files/unit_test/testdata/mips_loongson_mmi.txt +++ b/unit_test/testdata/mips_loongson_mmi.txt diff --git a/files/unit_test/testdata/mips_msa.txt b/unit_test/testdata/mips_msa.txt index ac930615..ac930615 100644 --- a/files/unit_test/testdata/mips_msa.txt +++ b/unit_test/testdata/mips_msa.txt diff --git a/unit_test/testdata/riscv64.txt b/unit_test/testdata/riscv64.txt new file mode 100644 index 00000000..fbb4200f --- /dev/null +++ b/unit_test/testdata/riscv64.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imac +mmu : sv48
\ No newline at end of file diff --git a/unit_test/testdata/riscv64_rvv.txt b/unit_test/testdata/riscv64_rvv.txt new file mode 100644 index 00000000..af1b3f36 --- /dev/null +++ b/unit_test/testdata/riscv64_rvv.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imafdcv +mmu : sv48
\ No newline at end of file diff --git a/unit_test/testdata/riscv64_rvv_zvfh.txt b/unit_test/testdata/riscv64_rvv_zvfh.txt new file mode 100644 index 00000000..c416c1af --- /dev/null +++ b/unit_test/testdata/riscv64_rvv_zvfh.txt @@ -0,0 +1,4 @@ +processor : 0 +hart : 1 +isa : rv64imafdcv_zfh_zvfh +mmu : sv48
\ No newline at end of file diff --git a/files/unit_test/testdata/tegra3.txt b/unit_test/testdata/tegra3.txt index d1b09f6b..d1b09f6b 100644 --- a/files/unit_test/testdata/tegra3.txt +++ b/unit_test/testdata/tegra3.txt diff --git a/files/unit_test/testdata/test0.jpg b/unit_test/testdata/test0.jpg Binary files differindex f4461a81..f4461a81 100644 --- a/files/unit_test/testdata/test0.jpg +++ b/unit_test/testdata/test0.jpg diff --git a/files/unit_test/testdata/test1.jpg b/unit_test/testdata/test1.jpg Binary files differindex a0210e9d..a0210e9d 100644 --- a/files/unit_test/testdata/test1.jpg +++ b/unit_test/testdata/test1.jpg diff --git a/files/unit_test/testdata/test2.jpg b/unit_test/testdata/test2.jpg Binary files differindex 816ca767..816ca767 100644 --- a/files/unit_test/testdata/test2.jpg +++ b/unit_test/testdata/test2.jpg diff --git a/files/unit_test/testdata/test3.jpg b/unit_test/testdata/test3.jpg Binary files differindex 792d91dc..792d91dc 100644 --- a/files/unit_test/testdata/test3.jpg +++ b/unit_test/testdata/test3.jpg diff --git a/files/unit_test/testdata/test4.jpg b/unit_test/testdata/test4.jpg Binary files differindex 1ef41668..1ef41668 100644 --- a/files/unit_test/testdata/test4.jpg +++ b/unit_test/testdata/test4.jpg diff --git a/files/unit_test/unit_test.cc b/unit_test/unit_test.cc index 61145a46..239d5b92 100644 --- a/files/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -88,6 +88,11 @@ int TestCpuEnv(int cpu_info) { cpu_info &= ~libyuv::kCpuHasLASX; } #endif +#if defined(__riscv) && defined(__linux__) + if (TestEnv("LIBYUV_DISABLE_RVV")) { + cpu_info &= ~libyuv::kCpuHasRVV; + } +#endif #if !defined(__pnacl__) && !defined(__CLR_VER) && \ (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ defined(_M_IX86)) @@ -139,11 +144,14 @@ int TestCpuEnv(int cpu_info) { if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) { cpu_info &= ~libyuv::kCpuHasAVX512VBITALG; } - if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) { - cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ; + if (TestEnv("LIBYUV_DISABLE_AVX10")) { + cpu_info &= ~libyuv::kCpuHasAVX10; + } + if (TestEnv("LIBYUV_DISABLE_AVXVNNI")) { + cpu_info &= ~libyuv::kCpuHasAVXVNNI; } - if (TestEnv("LIBYUV_DISABLE_GFNI")) { - cpu_info &= ~libyuv::kCpuHasGFNI; + if (TestEnv("LIBYUV_DISABLE_AVXVNNIINT8")) { + cpu_info &= ~libyuv::kCpuHasAVXVNNIINT8; } #endif if (TestEnv("LIBYUV_DISABLE_ASM")) { diff --git a/files/unit_test/unit_test.h b/unit_test/unit_test.h index 0a8df4d2..99cc8d19 100644 --- a/files/unit_test/unit_test.h +++ b/unit_test/unit_test.h @@ -11,10 +11,10 @@ #ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT #define UNIT_TEST_UNIT_TEST_H_ +#include <stddef.h> // For NULL #ifdef _WIN32 #include <windows.h> #else -#include <sys/resource.h> #include <sys/time.h> #endif @@ -77,7 +77,18 @@ static inline bool SizeValid(int src_width, #define free_aligned_buffer_page_end(var) \ free(var##_mem); \ - var = 0 + var = NULL + +#define align_buffer_page_end_16(var, size) \ + uint8_t* var##_mem = \ + reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095)); \ + uint16_t* var = reinterpret_cast<uint16_t*>( \ + (intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \ + ~63) + +#define free_aligned_buffer_page_end_16(var) \ + free(var##_mem); \ + var = NULL #ifdef WIN32 static inline double get_time() { diff --git a/files/unit_test/video_common_test.cc b/unit_test/video_common_test.cc index 36728ea9..36728ea9 100644 --- a/files/unit_test/video_common_test.cc +++ b/unit_test/video_common_test.cc diff --git a/files/util/Makefile b/util/Makefile index 40e74b65..40e74b65 100644 --- a/files/util/Makefile +++ b/util/Makefile diff --git a/files/util/color.cc b/util/color.cc index 8c3bbefd..8c3bbefd 100644 --- a/files/util/color.cc +++ b/util/color.cc diff --git a/files/util/compare.cc b/util/compare.cc index a16613ee..a16613ee 100644 --- a/files/util/compare.cc +++ b/util/compare.cc diff --git a/files/util/cpuid.c b/util/cpuid.c index b618bb10..c07e6e95 100644 --- a/files/util/cpuid.c +++ b/util/cpuid.c @@ -21,8 +21,9 @@ using namespace libyuv; int main(int argc, const char* argv[]) { int cpu_flags = TestCpuFlag(-1); int has_arm = TestCpuFlag(kCpuHasARM); - int has_mips = TestCpuFlag(kCpuHasMIPS); + int has_riscv = TestCpuFlag(kCpuHasRISCV); int has_x86 = TestCpuFlag(kCpuHasX86); + int has_mips = TestCpuFlag(kCpuHasMIPS); int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH); (void)argc; (void)argv; @@ -62,24 +63,28 @@ int main(int argc, const char* argv[]) { model, model); } #endif - printf("Cpu Flags %x\n", cpu_flags); - printf("Has ARM %x\n", has_arm); - printf("Has MIPS %x\n", has_mips); - printf("Has X86 %x\n", has_x86); - printf("Has LOONGARCH %x\n", has_loongarch); + printf("Cpu Flags 0x%x\n", cpu_flags); if (has_arm) { int has_neon = TestCpuFlag(kCpuHasNEON); - printf("Has NEON %x\n", has_neon); + printf("Has ARM 0x%x\n", has_arm); + printf("Has NEON 0x%x\n", has_neon); + } + if (has_riscv) { + int has_rvv = TestCpuFlag(kCpuHasRVV); + printf("Has RISCV 0x%x\n", has_riscv); + printf("Has RVV 0x%x\n", has_rvv); } if (has_mips) { int has_msa = TestCpuFlag(kCpuHasMSA); - printf("Has MSA %x\n", has_msa); + printf("Has MIPS 0x%x\n", has_mips); + printf("Has MSA 0x%x\n", has_msa); } if (has_loongarch) { int has_lsx = TestCpuFlag(kCpuHasLSX); - printf("Has LSX %x\n", has_lsx); int has_lasx = TestCpuFlag(kCpuHasLASX); - printf("Has LASX %x\n", has_lasx); + printf("Has LOONGARCH 0x%x\n", has_loongarch); + printf("Has LSX 0x%x\n", has_lsx); + printf("Has LASX 0x%x\n", has_lasx); } if (has_x86) { int has_sse2 = TestCpuFlag(kCpuHasSSE2); @@ -91,31 +96,34 @@ int main(int argc, const char* argv[]) { int has_erms = TestCpuFlag(kCpuHasERMS); int has_fma3 = TestCpuFlag(kCpuHasFMA3); int has_f16c = TestCpuFlag(kCpuHasF16C); - int has_gfni = TestCpuFlag(kCpuHasGFNI); int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW); int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL); int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI); int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); - int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); - printf("Has SSE2 %x\n", has_sse2); - printf("Has SSSE3 %x\n", has_ssse3); - printf("Has SSE4.1 %x\n", has_sse41); - printf("Has SSE4.2 %x\n", has_sse42); - printf("Has AVX %x\n", has_avx); - printf("Has AVX2 %x\n", has_avx2); - printf("Has ERMS %x\n", has_erms); - printf("Has FMA3 %x\n", has_fma3); - printf("Has F16C %x\n", has_f16c); - printf("Has GFNI %x\n", has_gfni); - printf("Has AVX512BW %x\n", has_avx512bw); - printf("Has AVX512VL %x\n", has_avx512vl); - printf("Has AVX512VNNI %x\n", has_avx512vnni); - printf("Has AVX512VBMI %x\n", has_avx512vbmi); - printf("Has AVX512VBMI2 %x\n", has_avx512vbmi2); - printf("Has AVX512VBITALG %x\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ %x\n", has_avx512vpopcntdq); + int has_avx10 = TestCpuFlag(kCpuHasAVX10); + int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); + int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); + printf("Has X86 0x%x\n", has_x86); + printf("Has SSE2 0x%x\n", has_sse2); + printf("Has SSSE3 0x%x\n", has_ssse3); + printf("Has SSE4.1 0x%x\n", has_sse41); + printf("Has SSE4.2 0x%x\n", has_sse42); + printf("Has AVX 0x%x\n", has_avx); + printf("Has AVX2 0x%x\n", has_avx2); + printf("Has ERMS 0x%x\n", has_erms); + printf("Has FMA3 0x%x\n", has_fma3); + printf("Has F16C 0x%x\n", has_f16c); + printf("Has AVX512BW 0x%x\n", has_avx512bw); + printf("Has AVX512VL 0x%x\n", has_avx512vl); + printf("Has AVX512VNNI 0x%x\n", has_avx512vnni); + printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); + printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); + printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); + printf("Has AVX10 0x%x\n", has_avx10); + printf("HAS AVXVNNI 0x%x\n", has_avxvnni); + printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); } return 0; } diff --git a/files/util/i444tonv12_eg.cc b/util/i444tonv12_eg.cc index 0fcb4095..0fcb4095 100644 --- a/files/util/i444tonv12_eg.cc +++ b/util/i444tonv12_eg.cc diff --git a/files/util/psnr.cc b/util/psnr.cc index c7bee7f9..c7bee7f9 100644 --- a/files/util/psnr.cc +++ b/util/psnr.cc diff --git a/files/util/psnr.h b/util/psnr.h index aac128cb..aac128cb 100644 --- a/files/util/psnr.h +++ b/util/psnr.h diff --git a/files/util/psnr_main.cc b/util/psnr_main.cc index 8b9fd972..8b9fd972 100644 --- a/files/util/psnr_main.cc +++ b/util/psnr_main.cc diff --git a/files/util/ssim.cc b/util/ssim.cc index 096fbcf0..096fbcf0 100644 --- a/files/util/ssim.cc +++ b/util/ssim.cc diff --git a/files/util/ssim.h b/util/ssim.h index a855f1d1..a855f1d1 100644 --- a/files/util/ssim.h +++ b/util/ssim.h diff --git a/files/util/yuvconstants.c b/util/yuvconstants.c index 037e0824..4e5185af 100644 --- a/files/util/yuvconstants.c +++ b/util/yuvconstants.c @@ -43,9 +43,10 @@ // #define BR (-VR * 128 + YB) int main(int argc, const char* argv[]) { - if (argc < 2) { - printf("yuvconstants Kr Kb\n"); - printf(" MC BT KR = 0.2126; KB = 0.0722\n"); + if (argc < 3) { + printf("yuvconstants [KR] [KB]\n"); + printf(" e.g. yuvconstants 0.2126 0.0722\n"); + printf(" MC BT KR KB\n"); printf(" 1 BT.709 KR = 0.2126; KB = 0.0722\n"); printf(" 4 FCC KR = 0.30; KB = 0.11\n"); printf(" 6 BT.601 KR = 0.299; KB = 0.114\n"); @@ -53,8 +54,8 @@ int main(int argc, const char* argv[]) { printf(" 9 BT.2020 KR = 0.2627; KB = 0.0593\n"); return -1; } - float kr = atof(argv[1]); - float kb = atof(argv[2]); + float kr = (float)atof(argv[1]); + float kb = (float)atof(argv[2]); float kg = 1 - kr - kb; float vr = 2 * (1 - kr); diff --git a/files/util/yuvconvert.cc b/util/yuvconvert.cc index 332699e3..93b52668 100644 --- a/files/util/yuvconvert.cc +++ b/util/yuvconvert.cc @@ -42,9 +42,9 @@ static __inline uint32_t Abs(int32_t v) { } // Parse PYUV format. ie name.1920x800_24Hz_P420.yuv -bool ExtractResolutionFromFilename(const char* name, - int* width_ptr, - int* height_ptr) { +static bool ExtractResolutionFromFilename(const char* name, + int* width_ptr, + int* height_ptr) { // Isolate the .width_height. section of the filename by searching for a // dot or underscore followed by a digit. for (int i = 0; name[i]; ++i) { @@ -59,7 +59,7 @@ bool ExtractResolutionFromFilename(const char* name, return false; } -void PrintHelp(const char* program) { +static void PrintHelp(const char* program) { printf("%s [-options] src_argb.raw dst_yuv.raw\n", program); printf( " -s <width> <height> .... specify source resolution. " @@ -78,7 +78,7 @@ void PrintHelp(const char* program) { exit(0); } -void ParseOptions(int argc, const char* argv[]) { +static void ParseOptions(int argc, const char* argv[]) { if (argc <= 1) { PrintHelp(argv[0]); } diff --git a/files/winarm.mk b/winarm.mk index b0a344ae..b0a344ae 100644 --- a/files/winarm.mk +++ b/winarm.mk |